In [1]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os.path import join
from utils import loadmat
import os
import pandas as pd
import time

## POSTPROCESS EGOVIDEO DATA FOR EACH SESSION
## helper functions
def find_exp(date, task):
    exps = []
    for exp in range(10):
        ptb_path = f"../../fixation_{task}/{date}/tg_20{date[-2:] + date[0:-2]}_{exp}_data_structure.mat"
        if os.path.exists(ptb_path):
            exps.append(exp)
    return exps

dates = ["082824", "082924", "083024", "012925", "013025", "013125", "020425", "021125"]
monkey_ids = ["T", "T", "T", "A", "A", "A", "A", "A"]

monitor_latency = 0.03

# construct base dataframe
all_dates = np.array([], dtype=object)
all_ids = np.array([], dtype=int)
    
for date, monkey_id in zip(dates, monkey_ids):
    # define paths to neural data, psychtoolbox data, and image_ids presented in each trial
    neural_path = f"../../neural_data/{date}.pickle"
    save_dir = f"./preprocessed/"

    # open neural data
    with open(neural_path, "rb") as input_file:
        neural_data = pickle.load(input_file)
    n_neurons = np.max(neural_data[0]['ids']) + 1
    all_ids = np.concatenate((all_ids, np.arange(0, n_neurons)))
    all_dates = np.concatenate((all_dates, np.repeat(date, n_neurons)))
        
df = pd.DataFrame({"session_id": (all_dates), "neuron_id": (all_ids)})

In [2]:
df.head()

Unnamed: 0,session_id,neuron_id
0,82824,0
1,82824,1
2,82824,2
3,82824,3
4,82824,4


In [3]:
# add waveforms and waveform features
import pandas as pd
import numpy as np
from kilosort.io import load_ops

# general function for adding properties to neuron table
def property_check_add(df, requires, returns):
    cols = df.columns.tolist()
    for require in requires:
        assert require in cols
    
    processed = True
    for return_prop in returns:
        if return_prop not in cols:
            df[return_prop] = None
            processed = False
    
    return processed
    
def add_waveform_loc(df, force=True):
    requires = ['session_id', 'neuron_id']
    returns = [ 'x_coord_neu', 'y_coord_neu', 'template_times', 'template']

    processed = property_check_add(df, requires, returns)
    
    if force or not processed:
        session_ids = np.unique(df["session_id"])
        for session_id in session_ids:
            try:
                print(f"processing {session_id}")
                
                kilosort_path = f"../../../RigD/Targaryen/SGL_DATA/{session_id}_g0/{session_id}_g0_imec0/kilosort4/"
                if os.path.exists(kilosort_path + "templates.npy"):
                    templates = np.load(kilosort_path + "templates.npy")
                    positions = np.load(kilosort_path + "channel_positions.npy")
                    chan_map = np.load(kilosort_path + "channel_map.npy")
                    ops = load_ops(kilosort_path  + 'ops.npy')
                    probe = ops['probe']

                    timepoints = (np.arange(ops['nt']) / ops['fs']) * 1000

                    chan_best = (templates**2).sum(axis=1).argmax(axis=-1)
                    chan_map = np.array(np.arange(383))
                    chan_best = chan_map[chan_best]
                    x_coord, y_coord = probe['xc'][chan_best], probe['yc'][chan_best]

                    waveforms = templates[np.arange(templates.shape[0]), :, chan_best]
                    times = np.tile(timepoints, (waveforms.shape[0], 1))
                    index = (df['session_id'] == session_id)
                    if np.sum(index) == len(list(waveforms)):
                        df.loc[index, 'x_coord_neu'] = pd.Series(list(x_coord), index=index[index].index)
                        df.loc[index, 'y_coord_neu'] = pd.Series(list(y_coord), index=index[index].index)
                        df.loc[index, 'template'] = pd.Series(list(waveforms), index=index[index].index)
                        df.loc[index, 'template_times'] = pd.Series(list(times), index=index[index].index)
                    else:
                        raise ValueError("kilosort path has different number of sorted neurons than RF mapping")
                else:
                    raise ValueError(f"path {kilosort_path} does not exist")

            except Exception as e:
                print(f"skipping {session_id}... see exception text below")
                print(e)

def add_waveform_trough_to_peak(df, force=False):
    requires = ['session_id', 'neuron_id', 'template_times', 'template']
    returns = ['template_trough_to_peak']
    
    processed = property_check_add(df, requires, returns)
    if force or not processed:
        for index, row in df.iterrows():
            waveform, timepoints = row['template'], row['template_times']
            if isinstance(row['template'], np.ndarray):
                # Find the index of the trough (minimum value)
                trough_idx = np.argmin(waveform)

                # Find the index of the peak (maximum value after the trough)
                peak_idx = np.argmax(waveform)

                # Calculate the time difference in milliseconds
                time_difference_ms = timepoints[peak_idx] - timepoints[trough_idx]

                # Convert the time difference to microseconds
                time_difference_us = time_difference_ms * 1000

                df.loc[index, 'template_trough_to_peak'] = time_difference_us
    
def add_waveform_type(df, force=False):
    requires = ['session_id', 'neuron_id', 'template_trough_to_peak']
    returns = ['template_type']
    
    processed = property_check_add(df, requires, returns)
    if force or not processed:
        axonal_index = df['template_trough_to_peak'] < 0
        regular_index = df['template_trough_to_peak'] > 200
        fast_index = (df['template_trough_to_peak'] > 0) & (df['template_trough_to_peak'] < 200)
        
        df.loc[axonal_index, 'template_type'] = 'AS'
        df.loc[regular_index, 'template_type'] = 'RS'
        df.loc[fast_index, 'template_type'] = 'FS'
    
add_waveform_loc(df, force=True)
add_waveform_trough_to_peak(df, force=True)
add_waveform_type(df, force=True)

df.head()

processing 012925
processing 013025
processing 013125
processing 020425
processing 021125
processing 082824
processing 082924
processing 083024


Unnamed: 0,session_id,neuron_id,x_coord_neu,y_coord_neu,template_times,template,template_trough_to_peak,template_type
0,82824,0,11.0,20.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[0.22548859, 0.19542037, 0.18996716, 0.1756644...",366.666667,RS
1,82824,1,11.0,20.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[-0.03260965, -0.052709486, -0.06782218, -0.07...",200.0,FS
2,82824,2,11.0,0.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[-0.27105498, -0.2881631, -0.3233422, -0.30338...",-466.666667,AS
3,82824,3,114.0,20.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[-0.14516835, -0.16483098, -0.19752464, -0.184...",-366.666667,AS
4,82824,4,11.0,0.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[0.25014895, 0.25289863, 0.26891083, 0.2699676...",433.333333,RS


In [4]:
# add receptive fields
from tqdm import trange
import pandas as pd
import numpy as np
from os.path import join
import os

def add_receptive_fields(df, force=False):
    requires = ['session_id', 'neuron_id']
    returns = ['rf_x_cond_label', 'rf_y_cond_label',\
              'rf', 'rf_norm', 'x_coord_rf', 'y_coord_rf']

    processed = property_check_add(df, requires, returns)

    if force or not processed:
        session_ids = np.unique(df["session_id"])
        for session_id in session_ids:
            try:
                print(f"processing {session_id}")

                base_path = f"../../fixation_egovideo/neurips/preprocessed/"

                spike_train = np.load(join(base_path, f"{session_id}_rf.npy"))
                
                df_cond = pd.read_csv(join(base_path, f"{session_id}_rf.csv"), index_col=False)
                
                condition = df_cond['condition']
                probe_coords = np.column_stack((df_cond['coords_x'], df_cond['coords_y']))

                times = np.load(join(base_path, f"times_rf.npy"))
                
                time_start = 0.03
                time_end = 0.13
                
                rf_step = 3 # range was always 3 dva steps
                
                n_probes, n_times, n_neurons = spike_train.shape
                spike_train_fr = np.mean(spike_train[:, np.logical_and(times > time_start, times < time_end), :], 1)
                
                ymin = int(np.min(df_cond['coords_y']))
                ymax = int(np.max(df_cond['coords_y']))
                xmin = int(np.min(df_cond['coords_x']))
                xmax = int(np.max(df_cond['coords_x']))
                
                y_range = np.arange(ymin, ymax + rf_step, rf_step)
                x_range = np.arange(xmin, xmax + rf_step, rf_step)

                rfs_norm = []
                rfs_cent = []
                rfs = []

                for i in range(n_neurons):
                    fr_mat = np.zeros((len(x_range), len(y_range)))
                    fr_cnt = np.zeros((len(x_range), len(y_range)))

                    for j in range(n_probes):
                        fr_mat[int(probe_coords[j][0]/rf_step - xmin/rf_step), int(probe_coords[j][1]/rf_step -  ymin/rf_step)] += spike_train_fr[j, i]
                        fr_cnt[int(probe_coords[j][0]/rf_step - xmin/rf_step), int(probe_coords[j][1]/rf_step - ymin/rf_step)] += 1

                    if np.sum(fr_mat) > 0:
                        [r, c] = np.unravel_index(np.argmax(fr_mat), fr_mat.shape)
                        fr_mat = np.fliplr(fr_mat/fr_cnt) # large y probe coords should be HIGHER with the way the code is setup so we have to flipud
                        rfs.append(fr_mat)
                        rfs_cent.append([r*rf_step+xmin, c*rf_step+ymin]) # CONVERT TO DVA FROM INDEX is min + 3*index, r is x, c is y
                        rfs_norm.append((fr_mat - np.min(fr_mat))/(np.max((fr_mat - np.min(fr_mat)))))
                    else:
                        rfs.append(fr_mat)
                        rfs_cent.append([np.nan,np.nan])
                        rfs_norm.append(fr_mat)

                rf_x_cond_num = np.tile(np.array(np.arange(len(x_range)))[None, :], (n_neurons, 1))
                rf_y_cond_num = np.tile(np.array(np.arange(len(y_range)))[None, :], (n_neurons, 1))
                rf_x_cond_label = np.tile(np.array(x_range)[None, :], (n_neurons, 1))
                rf_y_cond_label = np.tile(np.array(y_range)[None, :], (n_neurons, 1))
                
                rf_norm = np.array(rfs_norm)
                rf = np.array(rfs)
                rf_center_x = np.array(np.array(rfs_cent)[:, 0])
                rf_center_y = np.array(np.array(rfs_cent)[:, 1])

                ['rf_x_cond_label', 'rf_y_cond_label','rf', 'rf_norm', 'x_coord_rf', 'y_coord_rf']
                for i in range(n_neurons):
                    index = (df['session_id'] == session_id) & (df['neuron_id'] == i)
                    true_index = index[index].index

                    df.loc[index, 'rf_x_cond_label'] = pd.Series([rf_x_cond_label[i]], index=true_index)
                    df.loc[index, 'rf_y_cond_label'] = pd.Series([rf_y_cond_label[i]], index=true_index)

                    df.loc[index, 'rf'] = pd.Series([rf[i]], index=true_index)
                    df.loc[index, 'rf_norm'] = pd.Series([rf_norm[i]], index=true_index)
                    df.loc[index, 'x_coord_rf'] = pd.Series([rf_center_x[i]], index=true_index)
                    df.loc[index, 'y_coord_rf'] = pd.Series([rf_center_y[i]], index=true_index) 
            except Exception as e:
                print(f"skipping {session_id}... see exception text below")
                print(e)

add_receptive_fields(df, force=True)
df.head()

processing 012925
processing 013025
processing 013125
processing 020425
processing 021125
processing 082824
processing 082924
processing 083024


Unnamed: 0,session_id,neuron_id,x_coord_neu,y_coord_neu,template_times,template,template_trough_to_peak,template_type,rf_x_cond_label,rf_y_cond_label,rf,rf_norm,x_coord_rf,y_coord_rf
0,82824,0,11.0,20.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[0.22548859, 0.19542037, 0.18996716, 0.1756644...",366.666667,RS,"[-3, 0, 3, 6, 9, 12, 15]","[-15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",,
1,82824,1,11.0,20.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[-0.03260965, -0.052709486, -0.06782218, -0.07...",200.0,FS,"[-3, 0, 3, 6, 9, 12, 15]","[-15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15]","[[0.0015384615384615385, 0.002857142857142857,...","[[0.2884615384615385, 0.5357142857142858, 0.40...",12.0,-9.0
2,82824,2,11.0,0.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[-0.27105498, -0.2881631, -0.3233422, -0.30338...",-466.666667,AS,"[-3, 0, 3, 6, 9, 12, 15]","[-15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0007692...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5769230...",6.0,-9.0
3,82824,3,114.0,20.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[-0.14516835, -0.16483098, -0.19752464, -0.184...",-366.666667,AS,"[-3, 0, 3, 6, 9, 12, 15]","[-15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15]","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",6.0,-3.0
4,82824,4,11.0,0.0,"[0.0, 0.03333333333333333, 0.06666666666666667...","[0.25014895, 0.25289863, 0.26891083, 0.2699676...",433.333333,RS,"[-3, 0, 3, 6, 9, 12, 15]","[-15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15]","[[0.0023076923076923075, 0.0014285714285714286...","[[0.2937062937062937, 0.18181818181818185, 0.1...",9.0,-3.0


In [5]:
neu_vid = pd.read_csv(join(save_dir, f"neuron_data_video.csv"), index_col=0)
neu_vid.head()

Unnamed: 0,neuron_id,session_id,monkey_id,px_per_deg,fix_coord_deg
0,0,82824,T,10.075732,[-5 0]
1,1,82824,T,10.075732,[-5 0]
2,2,82824,T,10.075732,[-5 0]
3,3,82824,T,10.075732,[-5 0]
4,4,82824,T,10.075732,[-5 0]


In [6]:
df['session_id'] = df['session_id'].astype(int)
df = df.merge(neu_vid, left_on=["session_id", "neuron_id"], right_on=["session_id", "neuron_id"])

In [7]:
import pickle
import os 

with open(join(save_dir, f"neuron_data.pickle"), 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

os.remove(join(save_dir, f"neuron_data_video.csv"))