In [4]:
'''
Author: Conor Lane, April 2023. contact conor.lane1995@gmail.com
Script to match the suite2P detected cells across two recordings.  
Inputs: Suite2P Fneu.npy and iscell.npy outputs for the first analyzed recording (recording_1, iscell_1) and the combined recording 1 and 2 suite2P output.
Performs pairwise correlation on cells of recording 1 and first half of combined trace to match up cells between recordings. 
Outputs: Dataframe containing the list of ROI numbers detected as cells, (Note: zero-indexing, doesn't start at 1) matched with the second recording.  

'''

"\nAuthor: Conor Lane, April 2023. contact conor.lane1995@gmail.com\nScript to match the suite2P detected cells across two recordings.  \nInputs: Suite2P Fneu.npy and iscell.npy outputs for the first analyzed recording (recording_1, iscell_1) and the combined recording 1 and 2 suite2P output.\nPerforms pairwise correlation on cells of recording 1 and first half of combined trace to match up cells between recordings. \nOutputs: Dataframe containing the list of ROI numbers detected as cells, (Note: zero-indexing, doesn't start at 1) matched with the second recording.  \n\n"

In [5]:
import numpy as np
import sklearn.metrics as sklearn
import pandas as pd

#Input desired threshold for classing cells as sufficiently correlated.  Input length of each recording in frames. 
pairwise_threshold = 0.9
rec_length_in_frames = 9000

In [6]:
#Load the required files directtly - recording_1 is the first recording, recording_2 is the combined 1 & 2 suite2P output.  iscell.npy 1 and 2 are for the same suite2P
# outputs, relatively.

recording_1 = "C:/Users/Conor/Documents/Imaging_Data/Two-Photon/Psilocybin_Project/Evoked_Cohort_Mice/ID276/ID276_07032023_Psilocybin/TSeries-03072023-1303-063/suite2p/plane0/Fneu.npy"
traces_1 = np.load(recording_1)
iscell_1 = "C:/Users/Conor/Documents/Imaging_Data/Two-Photon/Psilocybin_Project/Evoked_Cohort_Mice/ID276/ID276_07032023_Psilocybin/TSeries-03072023-1303-063/suite2p/plane0/iscell.npy"
cells_bool_1 = np.load(iscell_1)

recording_2 = "C:/Users/Conor/Documents/Imaging_Data/Two-Photon/Psilocybin_Project/Evoked_Cohort_Mice/ID276/ID276_07032023_Psilocybin/Combined/suite2p/plane0/Fneu.npy"
traces_2 = np.load(recording_2)
iscell_2 = "C:/Users/Conor/Documents/Imaging_Data/Two-Photon/Psilocybin_Project/Evoked_Cohort_Mice/ID276/ID276_07032023_Psilocybin/Combined/suite2p/plane0/iscell.npy"
cells_bool_2 = np.load(iscell_2)
save_loc = "C:/Users/Conor/Documents/Imaging_Data/Two-Photon/Psilocybin_Project/Evoked_Cohort_Mice/ID276/ID276_07032023_Psilocybin/Combined/suite2p/plane0/"

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Conor/Documents/Imaging_Data/Two-Photon/Psilocybin_Project/Evoked_Cohort_Mice/ID276/ID276_07032023_Psilocybin/Combined/suite2p/plane0/Fneu.npy'

In [None]:
# Extract the index positions of detected cells from the first Fneu.npy output.  Used later to match correct cell numbers with rec_2 matches.

df = pd.DataFrame(cells_bool_1)
df = df[df[0]==1].reset_index()
df['cell_number_rec1'] = df['index']


In [None]:
# Extract the ROIs from both traces that suite2P detected as cells
cells_1 = traces_1[cells_bool_1[:,0]==1]
cells_2 = traces_2[cells_bool_2[:,0]==1]


# Take only the first half of the combined trace for each cell (this is recording 1)
cells_2 = np.array([cells_2[cell][:rec_length_in_frames] for cell in range(len(cells_2))])



In [None]:
# Run pairwise correlation on the first recording's cells, against the first half of the combined recording.

pairwise = 1-sklearn.pairwise_distances(cells_1,cells_2, metric='correlation')

In [None]:
#Find indices for day 2 (the cell in rec_2 that is most correlated with each rec_1 cell)
idx2 = pairwise.argmax(axis=1)

# Produces a boolean array for each cell's most correlated cell and whether it meets the threshold
mask2 = pairwise.max(axis=1) >= pairwise_threshold


#Print the percentage of matched cells
print(mask2.sum()/mask2.shape[0])

# Select only the indexes of the cells where the match was above threshold.
cell_matches = idx2[mask2==True]
cell_matched_1 =np.array(df['cell_number_rec1'])
cell_matched_1 = cell_matched_1[mask2==True]


0.6313559322033898


In [None]:
# Place the cells from recording 1 and their recording 2 matches into a DataFrame for neatness. 

cleaned_up = pd.DataFrame(columns=['cells_rec_1','cells_rec_2'])
cleaned_up['cells_rec_2'] = cell_matches.tolist()
cleaned_up['cells_rec_1'] = cell_matched_1
print(cleaned_up)

In [None]:
#Optional: convert dataframe into cell numbers for use in the other scripts. Note, every cell is now one number higher than its suite2p output. 
matched_cell_numbers = cleaned_up + 1
print(matched_cell_numbers)
matched_cell_numbers.to_csv(save_loc + "matched_cell_numbers.csv")

     cells_rec_1  cells_rec_2
0              1            3
1              3           16
2              4            8
3              5            6
4              6            2
..           ...          ...
442         1577          521
443         1579          744
444         1631          752
445         1632          765
446         2058          766

[447 rows x 2 columns]
