In [1]:
import pandas as pd
import numpy as np
import ast

cols = ['condition ', 'session_dir', 'all_subjects', 'tone_start_timestamp', 'tone_stop_timestamp']

# Load the data
df = pd.read_excel('rce_pilot_2_per_video_trial_labels.xlsx', usecols=cols, engine='openpyxl')
df2 = df.dropna()
df3 = df2.copy()
df3['all_subjects'] = df3['all_subjects'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Initialize an empty list to collect data for the new DataFrame
new_df_data = []

for _, row in df3.iterrows():
    session_dir = row['session_dir']
    subjects = row['all_subjects']
    condition = row['condition ']

    # Split session_dir on '_subj_' and take the first part only
    # This ensures everything after '_subj_' is ignored
    base_session_dir = session_dir.split('_subj_')[0]

    for subject in subjects:
        subject_formatted = subject.replace('.', '-')
        # Append formatted subject to the base session_dir correctly
        subj_recording = f"{base_session_dir}_subj_{subject_formatted}"
        new_df_data.append({
            'session_dir': session_dir,
            'subject': subject,
            'subj_recording': subj_recording,
            'condition': condition if condition in ['rewarded', 'omission', 'both_rewarded', 'tie'] else ('win' if str(condition) == str(subject) else 'lose'),
            'tone_start_timestamp': row['tone_start_timestamp'],
            'tone_stop_timestamp': row['tone_stop_timestamp']
        })


# Convert list to DataFrame
new_df = pd.DataFrame(new_df_data)
new_df = new_df.drop_duplicates()

# Prepare timestamp_dicts from new_df
timestamp_dicts = {}
for _, row in new_df.iterrows():
    key = row['subj_recording']
    condition = row['condition']
    timestamp_start = int(row['tone_start_timestamp']) // 20
    timestamp_end = int(row['tone_stop_timestamp']) // 20
    tuple_val = (timestamp_start, timestamp_end)

    if key not in timestamp_dicts:
        timestamp_dicts[key] = {cond: [] for cond in ['rewarded', 'win', 'lose', 'omission', 'both_rewarded', 'tie']}
    timestamp_dicts[key][condition].append(tuple_val)

# Convert lists in timestamp_dicts to numpy arrays
for subj_recording in timestamp_dicts:
    for condition in timestamp_dicts[subj_recording]:
        timestamp_dicts[subj_recording][condition] = np.array(timestamp_dicts[subj_recording][condition], dtype=np.int64)

In [2]:
timestamp_dicts

{'20230612_101430_standard_comp_to_training_D1_subj_1-3': {'rewarded': array([[1849109, 1859109],
         [1909109, 1919109],
         [1964109, 1974109],
         [2029109, 2039109],
         [2084109, 2094109],
         [2134109, 2144109],
         [2184109, 2194109],
         [2289109, 2299109],
         [2409109, 2419109],
         [2529109, 2539109],
         [2639109, 2649109],
         [2724109, 2734109],
         [2844109, 2854109],
         [2904109, 2914109],
         [2969109, 2979109],
         [3029109, 3039109],
         [3104108, 3114109],
         [3204108, 3214109],
         [3299108, 3309108],
         [3349108, 3359108]], dtype=int64),
  'win': array([[ 169111,  179111],
         [ 479111,  489111],
         [ 649110,  659110],
         [ 754110,  764110],
         [ 804110,  814110],
         [ 884110,  894110],
         [ 949110,  959110],
         [1014110, 1024110],
         [1134110, 1144110],
         [1309110, 1319110],
         [1379110, 1389110],
         [

In [None]:
ephys title:
20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec

dict title:
20230612_101430_standard_comp_to_training_D1_subj_1-3