# Matching pyramidal vs interneuron worked, but
## Now I'm going to restart the z-score method

In [1]:
import pandas as pd
import numpy as np
import ast
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
import multirecording_spikeanalysis as spike

cols = ['condition', 'session_dir', 'all_subjects', 'tone_start_timestamp', 'tone_stop_timestamp']

# Load the data
df = pd.read_excel('combined_excel_file.xlsx', usecols=cols, engine='openpyxl')

df2 = df.dropna() # Drop the rows missing data
df3 = df2.copy()
df3['all_subjects'] = df3['all_subjects'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) # Make the 'all_subjects' column readable as a list
df4 = df3[df3['all_subjects'].apply(lambda x: len(x) < 3)] # Ignore novel sessions for now

# Initialize an empty list to collect data for the new DataFrame
new_df_data = []

for _, row in df4.iterrows():
    session_dir = row['session_dir']
    subjects = row['all_subjects']
    condition = row['condition']

    # Split session_dir on '_subj_' and take the first part only
    # This ensures everything after '_subj_' is ignored
    base_session_dir = session_dir.split('_subj_')[0]

    for subject in subjects:
        subject_formatted = subject.replace('.', '-')
        # Append formatted subject to the base session_dir correctly
        subj_recording = f"{base_session_dir}_subj_{subject_formatted}"
        new_df_data.append({
            'session_dir': session_dir,
            'subject': subject,
            'subj_recording': subj_recording,
            'condition': condition if condition in ['rewarded', 'omission', 'both_rewarded', 'tie'] else ('win' if str(condition) == str(subject) else 'lose'),
            'tone_start_timestamp': row['tone_start_timestamp'],
            'tone_stop_timestamp': row['tone_stop_timestamp']
        })

# Convert list to DataFrame
new_df = pd.DataFrame(new_df_data)
new_df = new_df.drop_duplicates()

# Prepare timestamp_dicts from new_df
timestamp_dicts = {}
for _, row in new_df.iterrows():
    key = row['subj_recording']
    condition = row['condition']
    timestamp_start = int(row['tone_start_timestamp']) // 20
    timestamp_end = int(row['tone_stop_timestamp']) // 20
    tuple_val = (timestamp_start, timestamp_end)

    if key not in timestamp_dicts:
        timestamp_dicts[key] = {cond: [] for cond in ['rewarded', 'win', 'lose', 'omission', 'both_rewarded', 'tie']}
    timestamp_dicts[key][condition].append(tuple_val)

# Convert lists in timestamp_dicts to numpy arrays
for subj_recording in timestamp_dicts:
    for condition in timestamp_dicts[subj_recording]:
        timestamp_dicts[subj_recording][condition] = np.array(timestamp_dicts[subj_recording][condition], dtype=np.int64)

In [2]:
# Construct the path in a platform-independent way (HiPerGator or Windows)
ephys_path = Path('.') / 'recordings' / 'from_cyborg'

ephys_data = spike.EphysRecordingCollection(str(ephys_path))

<class 'numpy.ndarray'>
20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec
<class 'numpy.ndarray'>
20230612_101430_standard_comp_to_training_D1_subj_1-4_t4b2L_box1_merged.rec
<class 'numpy.ndarray'>
20230612_112630_standard_comp_to_training_D1_subj_1-2_t2b2L_box1_merged.rec
<class 'numpy.ndarray'>
20230613_105657_standard_comp_to_training_D2_subj_1-1_t1b2L_box1_merged.rec
<class 'numpy.ndarray'>
20230613_105657_standard_comp_to_training_D2_subj_1-4_t4b3L_box2_merged.rec
<class 'numpy.ndarray'>
20230614_114041_standard_comp_to_training_D3_subj_1-1_t1b3L_box1_merged.rec
<class 'numpy.ndarray'>
20230614_114041_standard_comp_to_training_D3_subj_1-2_t2b2L_box2_merged.rec
<class 'numpy.ndarray'>
20230616_111904_standard_comp_to_training_D4_subj_1-2_t2b2L_box2_merged.rec
<class 'numpy.ndarray'>
20230616_111904_standard_comp_to_training_D4_subj_1-4_t4b3L_box1_merged.rec
<class 'numpy.ndarray'>
20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec


In [3]:
for recording in ephys_data.collection.keys():
    # Check if the recording key (without everything after subject #) is in timestamp_dicts
    start_pos = recording.find('subj_')
    # Add the length of 'subj_' and 3 additional characters to include after 'subj_'
    end_pos = start_pos + len('subj_') + 3
    # Slice the recording key to get everything up to and including the subject identifier plus three characters
    recording_key_without_suffix = recording[:end_pos]
    if recording_key_without_suffix in timestamp_dicts:
        # Assign the corresponding timestamp_dicts dictionary to event_dict
        ephys_data.collection[recording].event_dict = timestamp_dicts[recording_key_without_suffix]
        
        # Extract the subject from the recording key
        start = recording.find('subj_') + 5  # Start index after 'subj_'
        subject = recording[start:start+3]
        
        # Assign the extracted subject
        ephys_data.collection[recording].subject = subject

In [4]:
spike_analysis = spike.SpikeAnalysis_MultiRecording(ephys_data, timebin = 100, smoothing_window=250, ignore_freq = 0.5)

All set to analyze


In [5]:
win_df = spike_analysis.wilcox_baseline_v_event_collection('win', 10, 10, plot=False)

Wilcoxon can't be done on 20230613_105657_standard_comp_to_training_D2_subj_1-4_t4b3L_box2_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230617_115521_standard_comp_to_omission_D1_subj_1-2_t2b2L_box2_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230619_115321_standard_comp_to_omission_D3_subj_1-4_t3b3L_box2_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230620_114347_standard_comp_to_omission_D4_subj_1-2_t3b3L_box_1_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230621_111240_standard_comp_to_omission_D5_subj_1-4_t3b3L_box1_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230622_110832_standard_comp_to_both_rewarded_D1_subj_1-1_t1b3L_box1_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230624_105855_standard_comp_to_both_rewarded_D3_subj_1-4_t3b3L_box1_merged.rec win, because <6 samples
Wilcoxon can't be done on 20230625_112913_standard_comp_to_both_rewarded_D4_subj_1-4_t3b3L_box1_merged.rec 

In [6]:
lose_df = spike_analysis.wilcox_baseline_v_event_collection('lose', 10, 10, plot=False)

Wilcoxon can't be done on 20230613_105657_standard_comp_to_training_D2_subj_1-1_t1b2L_box1_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20230620_114347_standard_comp_to_omission_D4_subj_1-1_t1b2L_box_2_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20230622_110832_standard_comp_to_both_rewarded_D1_subj_1-2_t3b3L_box1_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20230624_105855_standard_comp_to_both_rewarded_D3_subj_1-2_t1b2L_box1_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20230625_112913_standard_comp_to_both_rewarded_D4_subj_1-1_t1b2L_box1_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20240320_171038_alone_comp_subj_4-3_t5b5_merged.rec lose, because <6 samples
Wilcoxon can't be done on 20240322_120625_alone_comp_subj_3-4_t5b5_merged.rec lose, because <6 samples
Wilcoxon can't b

In [7]:
win_df_sig = win_df[win_df['event1 vs event2'] != 'not significant'].dropna()

In [8]:
lose_df_sig = lose_df[lose_df['event1 vs event2'] != 'not significant'].dropna()

In [9]:
sig_units = pd.concat([win_df_sig, lose_df_sig], ignore_index=True)

In [10]:
putatives = pd.read_pickle(r"./Newest_UMAP/umap_df_detail.pkl")

In [11]:
putatives

Unnamed: 0,x,y,waveform,cluster_id,cluster_color,firing_rate,recording_name,unit_id
0,11.056281,6.725037,"[0.1099006, 0.116567135, 0.12440342, 0.1342621...",6,"[0.8538, 0.2217, 0.02677, 1.0]",4.813648,20240320_142408_alone_comp_subj_3-3_t5b5_merge...,45.0
1,4.670065,5.406483,"[0.068219796, 0.072843686, 0.07739481, 0.08312...",1,"[0.27698, 0.46153, 0.93309, 1.0]",0.820515,20240320_142408_alone_comp_subj_3-3_t5b5_merge...,60.0
2,9.244576,7.548844,"[0.09019028, 0.09590861, 0.10189682, 0.1097604...",7,"[0.4796, 0.01583, 0.01055, 1.0]",0.843159,20240320_142408_alone_comp_subj_3-3_t5b5_merge...,67.0
3,7.081917,5.954050,"[0.084707275, 0.09079722, 0.095418565, 0.10189...",0,"[0.18995, 0.07176, 0.23217, 1.0]",1.958041,20240320_142408_alone_comp_subj_3-3_t5b5_merge...,109.0
4,8.892169,3.198857,"[0.060261134, 0.06250456, 0.064926274, 0.06951...",3,"[0.38127, 0.98909, 0.42386, 1.0]",7.644411,20240320_142408_alone_comp_subj_3-3_t5b5_merge...,124.0
...,...,...,...,...,...,...,...,...
1023,8.960026,7.276891,"[0.082149416, 0.08867837, 0.09511313, 0.102331...",7,"[0.4796, 0.01583, 0.01055, 1.0]",11.014126,20240409_142051_comp_novel_subj_3-4_t4b4_merge...,236.0
1024,6.746817,2.921816,"[-0.020411404, -0.021038724, -0.021981757, -0....",3,"[0.38127, 0.98909, 0.42386, 1.0]",4.094002,20240409_142051_comp_novel_subj_3-4_t4b4_merge...,238.0
1025,9.150778,7.406654,"[0.09007283, 0.09648524, 0.10219027, 0.1087899...",7,"[0.4796, 0.01583, 0.01055, 1.0]",1.999230,20240409_142051_comp_novel_subj_3-4_t4b4_merge...,243.0
1026,10.318449,10.482354,"[0.116127074, 0.12621383, 0.13441177, 0.141879...",6,"[0.8538, 0.2217, 0.02677, 1.0]",0.862059,20240409_142051_comp_novel_subj_3-4_t4b4_merge...,269.0


In [12]:
all_neurons = pd.concat([win_df, lose_df], ignore_index=True)

In [13]:
all_neurons

Unnamed: 0,original unit id,Wilcoxon Stat,p value,event1 vs event2,Recording,Subject,Event
0,65,37.0,0.587891,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline
1,17,30.0,0.518555,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline
2,75,37.0,0.587891,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline
3,24,3.0,0.001221,increases,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline
4,58,28.0,0.423828,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline
...,...,...,...,...,...,...,...
688,81,23.0,0.695312,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline
689,75,7.0,0.148438,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline
690,68,15.5,0.496094,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline
691,4,19.0,0.431641,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline


In [14]:
import pandas as pd

# Example DataFrames, replace these with your actual DataFrame loading
# all_neurons = pd.read_csv('path_to_all_neurons.csv')
# putatives = pd.read_csv('path_to_putatives.csv')

# Merge DataFrames based on the specified columns
merged_df = pd.merge(all_neurons, putatives, left_on=['Recording', 'original unit id'], right_on=['recording_name', 'unit_id'], how='left')

# Create the 'putative' column based on conditions
def classify_neuron(cluster_id):
    if pd.isna(cluster_id):
        return 'n/a'
    elif cluster_id == 5:
        return 'interneuron'
    else:
        return 'pyramidal'

merged_df['putative'] = merged_df['cluster_id'].apply(classify_neuron)

# If you only need specific columns from 'all_neurons' and the new 'putative' column, select these
result_df = merged_df[['original unit id', 'Wilcoxon Stat', 'p value', 'event1 vs event2', 'Recording', 'Subject', 'Event', 'putative']]

# Print result to verify
print(result_df.head())

   original unit id  Wilcoxon Stat   p value event1 vs event2  \
0                65           37.0  0.587891  not significant   
1                17           30.0  0.518555  not significant   
2                75           37.0  0.587891  not significant   
3                24            3.0  0.001221        increases   
4                58           28.0  0.423828  not significant   

                                           Recording Subject  \
0  20230612_101430_standard_comp_to_training_D1_s...     1-3   
1  20230612_101430_standard_comp_to_training_D1_s...     1-3   
2  20230612_101430_standard_comp_to_training_D1_s...     1-3   
3  20230612_101430_standard_comp_to_training_D1_s...     1-3   
4  20230612_101430_standard_comp_to_training_D1_s...     1-3   

                     Event   putative  
0  10s win vs 10s baseline  pyramidal  
1  10s win vs 10s baseline  pyramidal  
2  10s win vs 10s baseline  pyramidal  
3  10s win vs 10s baseline  pyramidal  
4  10s win vs 10s baseli

In [15]:
result_df

Unnamed: 0,original unit id,Wilcoxon Stat,p value,event1 vs event2,Recording,Subject,Event,putative
0,65,37.0,0.587891,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline,pyramidal
1,17,30.0,0.518555,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline,pyramidal
2,75,37.0,0.587891,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline,pyramidal
3,24,3.0,0.001221,increases,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline,pyramidal
4,58,28.0,0.423828,not significant,20230612_101430_standard_comp_to_training_D1_s...,1-3,10s win vs 10s baseline,pyramidal
...,...,...,...,...,...,...,...,...
688,81,23.0,0.695312,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline,pyramidal
689,75,7.0,0.148438,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline,pyramidal
690,68,15.5,0.496094,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline,pyramidal
691,4,19.0,0.431641,not significant,20240323_165815_alone_comp_subj_4-2_t5b5_merge...,4-2,10s lose vs 10s baseline,pyramidal


In [16]:
na_rows = result_df[result_df['putative'] == 'n/a']

In [17]:
na_rows

Unnamed: 0,original unit id,Wilcoxon Stat,p value,event1 vs event2,Recording,Subject,Event,putative
57,0,,,,20230613_105657_standard_comp_to_training_D2_s...,1-4,10s win vs 10s baseline,
139,0,,,,20230617_115521_standard_comp_to_omission_D1_s...,1-2,10s win vs 10s baseline,
168,0,,,,20230619_115321_standard_comp_to_omission_D3_s...,1-4,10s win vs 10s baseline,
177,0,,,,20230620_114347_standard_comp_to_omission_D4_s...,1-2,10s win vs 10s baseline,
178,0,,,,20230621_111240_standard_comp_to_omission_D5_s...,1-4,10s win vs 10s baseline,
179,0,,,,20230622_110832_standard_comp_to_both_rewarded...,1-1,10s win vs 10s baseline,
224,0,,,,20230624_105855_standard_comp_to_both_rewarded...,1-4,10s win vs 10s baseline,
242,0,,,,20230625_112913_standard_comp_to_both_rewarded...,1-4,10s win vs 10s baseline,
260,0,,,,20240320_171038_alone_comp_subj_4-2_t6b6_merge...,4-2,10s win vs 10s baseline,
283,0,,,,20240322_120625_alone_comp_subj_3-3_t6b6_merge...,3-3,10s win vs 10s baseline,


In [18]:
all_n_recs_na = na_rows['Recording']

In [19]:
# Accessing the first element in the Series using .iloc
all_n_recs_na.iloc[0]

'20230613_105657_standard_comp_to_training_D2_subj_1-4_t4b3L_box2_merged.rec'

In [20]:
putative_recs_na = putatives['recording_name']

In [21]:
putative_recs_na[0]

'20240320_142408_alone_comp_subj_3-3_t5b5_merged.rec'

In [23]:
result_df.to_csv('temp_csv.csv')