## Tubo de ensaio
This is a sandbox notebook so I can test out some code and functions.

In [1]:
# Imports
import numpy as np

In [14]:
nums = np.linspace(0,10,11)
nums[::2]

array([ 0.,  2.,  4.,  6.,  8., 10.])

In [7]:
A = np.ones((3,3))
B = (np.array(A)*2)**2
n = np.shape(B)[0]
C = B
C[::n] = 0
D = C+np.transpose(C)/D.max()
print(A,B,C,D)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]] [[0. 0. 0.]
 [4. 4. 4.]
 [4. 4. 4.]] [[0. 0. 0.]
 [4. 4. 4.]
 [4. 4. 4.]] [[0.  0.5 0.5]
 [4.  4.5 4.5]
 [4.  4.5 4.5]]


In [6]:
idx = D>0
D[idx]/np.sum(D[idx])

array([0.08333333, 0.08333333, 0.08333333, 0.16666667, 0.16666667,
       0.08333333, 0.16666667, 0.16666667])

In [24]:
bools = np.array([False, False, False, True, True, False, True, False])
np.where(bools)[0][0]

3

### Checking the pickle data from the experiments
Files from Dimitris Mariatos. The file was created automatically by DLC Live.

In [2]:
import pickle
import pandas as pd
from preprocess import h5_to_numpy
import h5py

fdis = 'C:\\Users\\olimp\\Documents\\ki_paper\\data\\375529_7\\thedata63.pkl'
fpose = 'C:\\Users\\olimp\\Documents\\ki_paper\\data\\375529_7\\blackfly_375529_2021-07-25_7_DLC.hdf5'
# distractions
dis = pd.read_pickle(fdis)
dis_columns = ['x','y','edges','opacity','radius','orientation','x','y','edges',
               'opacity','radius','orientation','frame_time']
# pose
#pose = h5py.File(fpose,'r')
pose = pd.read_hdf(fpose) #open the hdf5 file from DLC live
pose.columns = ['snout_x','snout_y','snout_likelihood','left_ear_x','left_ear_y',
                'left_ear_likelihood','right_ear_x','right_ear_y','right_ear_likelihood',
                'tail_x','tail_y','tail_likelihood','frame_time','pose_time']

print(np.shape(dis))
ft = -1
for i in range(1,20):
    print(f'{dis_columns[ft]} line {i}: {dis[i][ft]}')

(257, 15)
frame_time line 1: 1627213591.232682
frame_time line 2: 1627213594.1510882
frame_time line 3: 1627213603.3174865
frame_time line 4: 1627213603.317511
frame_time line 5: 1627213619.397348
frame_time line 6: 1627213620.7130973
frame_time line 7: 1627213621.4963899
frame_time line 8: 1627213621.4964213
frame_time line 9: 1627213658.59921
frame_time line 10: 1627213659.348163
frame_time line 11: 1627213659.970906
frame_time line 12: 1627213659.9709349
frame_time line 13: 1627213715.1696396
frame_time line 14: 1627213716.6367238
frame_time line 15: 1627213717.9677708
frame_time line 16: 1627213717.967794
frame_time line 17: 1627213765.7220788
frame_time line 18: 1627213767.932468
frame_time line 19: 1627213768.5088735


In [3]:
def dfs_to_numpy(fpose):
    '''
    Takes a pose h5 dictionary generated by DLC Live (different from DLC standard) 
    and outputs a matrix with the shape: (# of pose landmarks * 2-3, depending on dims) X (# of frames)
    '''
    pose = pd.read_hdf(fpose) #open the hdf5 file from DLC live
    pose.columns = ['snout_x','snout_y','snout_likelihood','left_ear_x','left_ear_y',
                    'left_ear_likelihood','right_ear_x','right_ear_y','right_ear_likelihood',
                    'tail_x','tail_y','tail_likelihood','frame_time','pose_time']
    pose = np.transpose(pose.to_numpy())
    n_cols = np.shape(pose)[0]
    v_cols = np.arange(1,n_cols+1)
    mask = np.ones(n_cols, dtype=bool)
    mask = [not i%3==0 and i<=np.shape(pose)[1]-2 for i in v_cols] #ignore likelyhood, frame and pose time
    return pose[mask] #don't forget you might need to transpose this matx

In [4]:
pose_matx = dfs_to_numpy(fpose)
np.shape(pose_matx)

(10, 88325)

In [72]:
def mask_trials(fdis, fpose):
    '''
    From two data frames, one from DLC live for pose and the other regarding the distractions,
    yield two masks that match the number of pose frames:
        -Validation mask: tracks the sequences of frames between the mouse pressing the task button
        and it crossing the middle of the arena. The time stamps for such events are provided by
        DLC live.
        -Train mask: same thing but for sequences that occure between the mouse crossing the middle
        of the arena and collecting its due reward.
    '''
    dis = pd.read_pickle(fdis)
    pose = pd.read_hdf(fpose) #open the hdf5 file from DLC live
    pose_frames = pose['frame_time']
    dis_ti = [dis[i][-1] for i in range(1,np.shape(dis)[0],4)]
    dis_tm = [dis[i][-1] for i in range(2,np.shape(dis)[0],4)]
    dis_tf = [dis[i][-1] for i in range(3,np.shape(dis)[0],4)]
    assert len(dis_ti) == len(dis_tm) == len(dis_tf)    
    
    val_mask = np.zeros(len(pose_frames), dtype=bool)
    train_mask = np.zeros(len(pose_frames), dtype=bool)
    j = 0
    for i in range(len(val_mask)):
        ti = dis_ti[j]; tm = dis_tm[j]; tf = dis_tf[j]
        if pose_frames[i] >= ti and pose_frames[i] <= tm:
            val_mask[i] = True
        elif pose_frames[i] >= tm and pose_frames[i] <= tf:
            train_mask[i] = True
        elif pose_frames[i] > tf:
            j+=1
            if j == len(dis_ti):
                print(f'Last trial ends at frame# {i} out of {len(pose_frames)}.')
                break
    return val_mask, train_mask 

In [73]:
val_mask, train_mask = mask_trials(fdis, fpose)
print(np.sum(val_mask))
print(np.sum(train_mask))

Last trial ends at frame# 85865 out of 88325.
8742
3095


In [30]:
# index 6 (x of the 2nd stimulus) has an issue
for i in range(15):
    print(f'index {i}: {obj[5][i]}')

index 0: 0.15628890185484168
index 1: 0.3641397746060996
index 2: 4
index 3: 0.0
index 4: 0.025
index 5: -10
index 6: [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 '0']
 [-0.07862812716697404 0.4578981528397668 128 0.0 array(0.05) -10
  array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0'], dtype=object)
  0.14501461569509855 0.39166111607133236 3 0.0 array(0.025) -10 0
  1627213591.232682]
 [-0.07862812716697404 0.4578981528397668 128 1.0 array(0.05) -10
  array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0'],
         [-0.07862812716697404, 0.4578981528397668, 128, 0.0, array(0.05),
          -10,
          array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0'], dtype=object),
          0.14501461569509855, 0.39166111607133236, 3, 0.0, array(0.025),
          -10, 0, 1627213591.232682]], dtype=object)
  0.14501461569509855 0.39166111607133236 3 1.0 array(0.025) -10 0
  1627213594.1510882]
 [-0.07862812716697404 0.4578981528397668 128 1.0 array(0.05) -10
  array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

### Test embedding algorithms

In [8]:
# Open results
import pickle

# Select a directory
res_dir = "C:\\Users\\jhflc\\OneDrive\\Documentos\\Projects\\KI_article\\results\\19h38m_16_01_2022\\results_dict.pickle"
file = open(res_dir,'rb')
obj = pickle.load(file)
# Get the expression matrix to use:
exp_matx = obj['Preprocess']['Data']['Exp_matrix']

In [20]:
a = (0,10)

### Code junkyard

In [51]:
def function1(pickle,hdf5):
    '''reads the stimuli file from psychopy and the pose file from dlc
    and merges them together. returns Two Dataframes merged1 and merged2
    (one for every stimulus,there are two stimuli appearing in each trial)'''
    #Begin by opening the stimulus data
    k = read1(pickle) #open the pickle file with stimuli data
    k.columns=['x','y','edges','opacity','radius','orientation','delete','x','y','edges','opacity','radius','orientation','laser','frame_time']
    k = k.drop([0])
    k.frame_time = k.frame_time.astype(float) #change the frame time into float     
    k1=k.iloc[:,0:6] # first stimuli
    k2=k.iloc[:,7:13] # second stimuli
    k3 =k.iloc[:,13] # laser
    k4 = k.iloc[:,14] # frame time
    stim1 = pd.concat([k1, k3, k4], axis=1) #stimulus we are analysing
    stim2 = pd.concat([k2, k3, k4], axis=1) #send stimuli to analyse
    #Open the Pose file
    df = pd.read_hdf(hdf5) #open the hdf5 file from DLC live
    df.columns=['snout_x','snout_y','snout_likelihood','left_ear_x','left_ear_y',
            'left_ear_likelihood','right_ear_x','right_ear_y','right_ear_likelihood',
            'tail_x','tail_y','tail_likelihood','frame_time','pose_time'] #rename the columns from the DLC live file 
    dlc = df['frame_time']
    Psychopy = stim2['frame_time']
    dlc = dlc.astype(float)
    Psychopy = Psychopy.astype(float)
    Psychopy = Psychopy.reset_index()
    Psychopy = Psychopy.drop(columns=['index'])
    #merge stim1 and stim2 with df and create two dfs merged1 and merged2 (one for every stimulus)
    merged1=frame_merger(stim1,df)
    merged2=frame_merger(stim2,df)
    return [merged1,merged2]

def read1(name1):
#k4 = k4 != 100
    pkl_file = open(name1, 'rb')
    dot = pickle.load(pkl_file)
    dot = pd.DataFrame(dot)
    return dot

def frame_merger(stim,df):
    stim['dlc_idx'] = np.nan # makes new coloumn in k for where the minimum's value is 
    for i in stim.index:
        a = df.frame_time - stim.frame_time.loc[i]
        idx = a.abs().idxmin()
        stim.loc[i,'dlc_idx'] = idx

In [18]:
def scan_hdf5(path, recursive=True, tab_step=2):
    def scan_node(g, tabs=0):
        print(' ' * tabs, g.name)
        for k, v in g.items():
            if isinstance(v, h5py.Dataset):
                print(' ' * tabs + ' ' * tab_step + ' -', v.name)
            elif isinstance(v, h5py.Group) and recursive:
                scan_node(v, tabs=tabs + tab_step)
    with h5py.File(path, 'r') as f:
        scan_node(f)
        
scan_hdf5(fpose)

 /
   /df_with_missing
     - /df_with_missing/axis0_label0
     - /df_with_missing/axis0_label1
     - /df_with_missing/axis0_level0
     - /df_with_missing/axis0_level1
     - /df_with_missing/axis1
     - /df_with_missing/block0_items_label0
     - /df_with_missing/block0_items_label1
     - /df_with_missing/block0_items_level0
     - /df_with_missing/block0_items_level1
     - /df_with_missing/block0_values
     - /df_with_missing/block1_items_label0
     - /df_with_missing/block1_items_label1
     - /df_with_missing/block1_items_level0
     - /df_with_missing/block1_items_level1
     - /df_with_missing/block1_values
