In [1]:
import pandas as pd
import numpy as np
import ast
import pickle
from pathlib import Path
import multirecording_spikeanalysis as spike

In [2]:
cols = ['condition ', 'session_dir', 'all_subjects', 'tone_start_timestamp', 'tone_stop_timestamp']

# Load the data
df = pd.read_excel('rce_pilot_2_per_video_trial_labels.xlsx', usecols=cols, engine='openpyxl')

In [3]:
df2 = df.dropna() # Drop the rows missing data
df3 = df2.copy()
df3['all_subjects'] = df3['all_subjects'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) # Make the 'all_subjects' column readable as a list
df4 = df3[df3['all_subjects'].apply(lambda x: len(x) < 3)] # Ignore novel sessions for now

In [4]:
# Initialize an empty list to collect data for the new DataFrame
new_df_data = []

for _, row in df4.iterrows():
    session_dir = row['session_dir']
    subjects = row['all_subjects']
    condition = row['condition ']

    # Split session_dir on '_subj_' and take the first part only
    # This ensures everything after '_subj_' is ignored
    base_session_dir = session_dir.split('_subj_')[0]

    for subject in subjects:
        subject_formatted = subject.replace('.', '-')
        # Append formatted subject to the base session_dir correctly
        subj_recording = f"{base_session_dir}_subj_{subject_formatted}"
        new_df_data.append({
            'session_dir': session_dir,
            'subject': subject,
            'subj_recording': subj_recording,
            'condition': condition if condition in ['rewarded', 'omission', 'both_rewarded', 'tie'] else ('win' if str(condition) == str(subject) else 'lose'),
            'tone_start_timestamp': row['tone_start_timestamp'],
            'tone_stop_timestamp': row['tone_stop_timestamp']
        })

# Convert list to DataFrame
new_df = pd.DataFrame(new_df_data)
new_df = new_df.drop_duplicates()

In [5]:
# Prepare timestamp_dicts from new_df
timestamp_dicts = {}
for _, row in new_df.iterrows():
    key = row['subj_recording']
    condition = row['condition']
    timestamp_start = int(row['tone_start_timestamp']) // 20
    timestamp_end = int(row['tone_stop_timestamp']) // 20
    tuple_val = (timestamp_start, timestamp_end)

    if key not in timestamp_dicts:
        timestamp_dicts[key] = {cond: [] for cond in ['rewarded', 'win', 'lose', 'omission', 'both_rewarded', 'tie']}
    timestamp_dicts[key][condition].append(tuple_val)

# Convert lists in timestamp_dicts to numpy arrays
for subj_recording in timestamp_dicts:
    for condition in timestamp_dicts[subj_recording]:
        timestamp_dicts[subj_recording][condition] = np.array(timestamp_dicts[subj_recording][condition], dtype=np.int64)

In [6]:
new_df

Unnamed: 0,session_dir,subject,subj_recording,condition,tone_start_timestamp,tone_stop_timestamp
0,20230612_101430_standard_comp_to_training_D1_s...,1.3,20230612_101430_standard_comp_to_training_D1_s...,lose,982229.0,1182226.0
1,20230612_101430_standard_comp_to_training_D1_s...,1.4,20230612_101430_standard_comp_to_training_D1_s...,win,982229.0,1182226.0
2,20230612_101430_standard_comp_to_training_D1_s...,1.3,20230612_101430_standard_comp_to_training_D1_s...,win,3382227.0,3582224.0
3,20230612_101430_standard_comp_to_training_D1_s...,1.4,20230612_101430_standard_comp_to_training_D1_s...,lose,3382227.0,3582224.0
4,20230612_101430_standard_comp_to_training_D1_s...,1.3,20230612_101430_standard_comp_to_training_D1_s...,lose,5682225.0,5882222.0
...,...,...,...,...,...,...
1267,20230625_112913_standard_comp_to_both_rewarded...,1.4,20230625_112913_standard_comp_to_both_rewarded...,both_rewarded,67804057.0,68004058.0
1268,20230625_112913_standard_comp_to_both_rewarded...,1.1,20230625_112913_standard_comp_to_both_rewarded...,both_rewarded,69704075.0,69904080.0
1269,20230625_112913_standard_comp_to_both_rewarded...,1.4,20230625_112913_standard_comp_to_both_rewarded...,both_rewarded,69704075.0,69904080.0
1270,20230625_112913_standard_comp_to_both_rewarded...,1.1,20230625_112913_standard_comp_to_both_rewarded...,both_rewarded,70704093.0,70904092.0


In [10]:
import numpy as np

# Replace 'path_to_file.npy' with the path to your .npy file
spike_clusters = np.load('export/updated_phys/non-novel/20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec/phy/spike_clusters.npy')

# Now you can work with the array
print(spike_clusters)


[85 41 55 ... 14 26 87]


In [11]:
len(spike_clusters)

483418

In [12]:
import numpy as np

# Replace 'path_to_file.npy' with the path to your .npy file
spike_times = np.load('export/updated_phys/non-novel/20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec/phy/spike_times.npy')

# Now you can work with the array
print(spike_times)

[[     689]
 [    1754]
 [    1829]
 ...
 [68293021]
 [68293168]
 [68293331]]


In [13]:
len(spike_times)

483418

In [16]:
spike_times[0:10]

array([[ 689],
       [1754],
       [1829],
       [2676],
       [2935],
       [3071],
       [3496],
       [3542],
       [3613],
       [3772]], dtype=int64)