# Example of how to use the generate_event_dataframe class method

In [1]:
import pandas as pd
import numpy as np
import ast
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
import spikeanal as spike

cols = ['condition ', 'session_dir', 'all_subjects', 'tone_start_timestamp', 'tone_stop_timestamp']

# Load the data
df = pd.read_excel('rce_pilot_2_per_video_trial_labels.xlsx', usecols=cols, engine='openpyxl')

df2 = df.dropna() # Drop the rows missing data
df3 = df2.copy()
df3['all_subjects'] = df3['all_subjects'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) # Make the 'all_subjects' column readable as a list
df4 = df3[df3['all_subjects'].apply(lambda x: len(x) < 3)] # Ignore novel sessions for now

# Initialize an empty list to collect data for the new DataFrame
new_df_data = []

for _, row in df4.iterrows():
    session_dir = row['session_dir']
    subjects = row['all_subjects']
    condition = row['condition ']

    # Split session_dir on '_subj_' and take the first part only
    # This ensures everything after '_subj_' is ignored
    base_session_dir = session_dir.split('_subj_')[0]

    for subject in subjects:
        subject_formatted = subject.replace('.', '-')
        # Append formatted subject to the base session_dir correctly
        subj_recording = f"{base_session_dir}_subj_{subject_formatted}"
        new_df_data.append({
            'session_dir': session_dir,
            'subject': subject,
            'subj_recording': subj_recording,
            'condition': condition if condition in ['rewarded', 'omission', 'both_rewarded', 'tie'] else ('win' if str(condition) == str(subject) else 'lose'),
            'tone_start_timestamp': row['tone_start_timestamp'],
            'tone_stop_timestamp': row['tone_stop_timestamp']
        })

# Convert list to DataFrame
new_df = pd.DataFrame(new_df_data)
new_df = new_df.drop_duplicates()

# Prepare timestamp_dicts from new_df
timestamp_dicts = {}
for _, row in new_df.iterrows():
    key = row['subj_recording']
    condition = row['condition']
    timestamp_start = int(row['tone_start_timestamp']) // 20
    timestamp_end = int(row['tone_stop_timestamp']) // 20
    tuple_val = (timestamp_start, timestamp_end)

    if key not in timestamp_dicts:
        timestamp_dicts[key] = {cond: [] for cond in ['rewarded', 'win', 'lose', 'omission', 'both_rewarded', 'tie']}
    timestamp_dicts[key][condition].append(tuple_val)

# Convert lists in timestamp_dicts to numpy arrays
for subj_recording in timestamp_dicts:
    for condition in timestamp_dicts[subj_recording]:
        timestamp_dicts[subj_recording][condition] = np.array(timestamp_dicts[subj_recording][condition], dtype=np.int64)
        

# Construct the path in a platform-independent way (HiPerGator or Windows)
ephys_path = Path('.') / 'export' / 'updated_phys' / 'non-novel' / 'all_non_novel'

ephys_data = spike.EphysRecordingCollection(str(ephys_path))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230617_115521_standard_comp_to_omission_D1_subj_1-2_t2b2L_box2_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230618_100636_standard_comp_to_omission_D2_subj_1-1_t1b2L_box2_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230618_100636_standard_comp_to_omission_D2_subj_1-4_t4b3L_box1_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230619_115321_standard_comp_to_omission_D3_subj_1-4_t3b3L_box2_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230620_114347_standard_comp_to_omission_D4_subj_1-1_t1b2L_box_2_merged.rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
20230620_114347_standard_comp_to_omission_D4_subj_1-2_t3b3L_box_1_merged.rec
<class

In [2]:
for recording in ephys_data.collection.keys():
    # Check if the recording key (without everything after subject #) is in timestamp_dicts
    start_pos = recording.find('subj_')
    # Add the length of 'subj_' and 3 additional characters to include after 'subj_'
    end_pos = start_pos + len('subj_') + 3
    # Slice the recording key to get everything up to and including the subject identifier plus three characters
    recording_key_without_suffix = recording[:end_pos]
    if recording_key_without_suffix in timestamp_dicts:
        # Assign the corresponding timestamp_dicts dictionary to event_dict
        ephys_data.collection[recording].event_dict = timestamp_dicts[recording_key_without_suffix]
        
        # Extract the subject from the recording key
        start = recording.find('subj_') + 5  # Start index after 'subj_'
        subject = recording[start:start+3]
        
        # Assign the extracted subject
        ephys_data.collection[recording].subject = subject
        
spike_analysis = spike.SpikeAnalysis_MultiRecording(ephys_data, timebin = 100, smoothing_window=250, ignore_freq = 0.5)

All set to analyze


In [3]:
event_and_pre_event_spikes_df = spike_analysis.generate_event_dataframe()

In [4]:
pickle.dump(event_and_pre_event_spikes_df, open("event_and_pre_event_spikes_df.pkl", "wb"))
# To load the pickle later:
# event_spikes = pickle.load(open('event_and_pre_event_spikes_df.pkl', 'rb'))

## Optional steps:

In [5]:
# This step is just to simplify the recording names and pull out the Subject to a new column
def rename_recording_and_extract_subject(ex_string):
    comp_to_start = ex_string.find('_comp_to_') + len('_comp_to_')
    subj_start = ex_string.find('_subj_')
    subj_end = subj_start + 9
    new_name = ex_string[comp_to_start:subj_end].replace('-', '_')
    subj = ex_string[subj_start+6:subj_end].replace('-', '_')
    return new_name, subj

# Apply the renaming and subject extraction method
event_and_pre_event_spikes_df['Recording'], event_and_pre_event_spikes_df['Subject'] = zip(*event_and_pre_event_spikes_df['Recording'].apply(rename_recording_and_extract_subject))

# Ensure the first columns are in the specified order
columns_order = ['Recording', 'Subject', 'Event name', 'Event number', 'Unit number']
# Reorder DataFrame columns
event_and_pre_event_spikes_df = event_and_pre_event_spikes_df[columns_order + [col for col in event_and_pre_event_spikes_df.columns if col not in columns_order]]

# Create a DataFrame for each unique recording and print its name
unique_recordings = event_and_pre_event_spikes_df['Recording'].unique()

df_dict = {}  # Use a dictionary to store DataFrames

for unique_recording in unique_recordings:
    # Filter the DataFrame for each unique recording
    filtered_df = event_and_pre_event_spikes_df[event_and_pre_event_spikes_df['Recording'] == unique_recording]
    
    # Generate DataFrame name dynamically and store it in the dictionary
    df_name = f"{unique_recording}_events_df"
    df_dict[df_name] = filtered_df

# Print the names of DataFrames stored in the dictionary
for df_name in df_dict.keys():
    print(df_name)

training_D1_subj_1_3_events_df
omission_D1_subj_1_1_events_df
omission_D1_subj_1_2_events_df
omission_D2_subj_1_1_events_df
omission_D2_subj_1_4_events_df
omission_D3_subj_1_4_events_df
omission_D4_subj_1_1_events_df
omission_D4_subj_1_2_events_df
omission_D5_subj_1_4_events_df
both_rewarded_D1_subj_1_1_events_df
both_rewarded_D1_subj_1_2_events_df
both_rewarded_D3_subj_1_2_events_df
both_rewarded_D3_subj_1_4_events_df
both_rewarded_D4_subj_1_1_events_df
both_rewarded_D4_subj_1_4_events_df


## Example of how to access a single recording

In [6]:
df_dict['omission_D1_subj_1_1_events_df']

Unnamed: 0,Recording,Subject,Event name,Event number,Unit number,Pre-event timebin 1,Pre-event timebin 2,Pre-event timebin 3,Pre-event timebin 4,Pre-event timebin 5,...,Event timebin 91,Event timebin 92,Event timebin 93,Event timebin 94,Event timebin 95,Event timebin 96,Event timebin 97,Event timebin 98,Event timebin 99,Event timebin 100
760,omission_D1_subj_1_1,1_1,win,1,6,5.88,5.84,5.92,5.92,5.96,...,7.96,8.08,8.16,8.20,8.20,8.44,8.56,8.88,8.88,8.88
761,omission_D1_subj_1_1,1_1,win,2,6,5.56,5.60,5.56,5.60,5.64,...,4.76,4.76,4.76,4.76,4.76,4.72,4.72,4.72,4.76,4.76
762,omission_D1_subj_1_1,1_1,win,3,6,4.52,4.48,4.48,4.44,4.36,...,3.24,3.28,3.32,3.36,3.40,3.36,3.32,3.32,3.32,3.36
763,omission_D1_subj_1_1,1_1,win,4,6,7.68,7.80,7.80,7.88,7.92,...,5.48,5.44,5.40,5.40,5.40,5.40,5.40,5.40,5.40,5.40
764,omission_D1_subj_1_1,1_1,win,5,6,3.60,3.60,3.60,3.60,3.60,...,2.08,2.08,2.08,2.08,2.08,2.08,2.08,2.04,2.04,2.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1112,omission_D1_subj_1_1,1_1,lose,2,17,0.36,0.36,0.36,0.36,0.36,...,0.36,0.36,0.36,0.36,0.36,0.36,0.36,0.36,0.36,0.36
1113,omission_D1_subj_1_1,1_1,lose,1,3,0.04,0.04,0.04,0.04,0.04,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1114,omission_D1_subj_1_1,1_1,lose,2,3,2.68,2.68,2.68,2.68,2.68,...,1.28,1.28,1.24,1.20,1.16,1.16,1.16,1.12,1.08,1.04
1115,omission_D1_subj_1_1,1_1,lose,1,80,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [7]:
# pickle.dump(df_dict['omission_D1_subj_1_1_events_df'], open("omission_D1_subj_1_1_events_df.pkl", "wb"))