In [1]:
import numpy as np
import pandas as pd
import scipy.io as io
import os

In [16]:
PATH = 'clusterData_no_light_FINAL_23_02_22/'
MAT = 'CelltypeClassification.mat'
DEST = 'clusterData_no_light_FINAL_06_03_22_PV/'
chunks = [0, 25, 50, 100, 200, 400, 800 ,1600]

In [3]:
if not os.path.isdir(DEST):
    os.mkdir(DEST)
for chunk in chunks:
    chunk_dir = f"{DEST}{chunk}/"
    if not os.path.isdir(chunk_dir):
        os.mkdir(chunk_dir)

In [4]:
cell_class_mat = io.loadmat(MAT)['sPV']

In [5]:
def find_indices_in_filenames(target_name, cell_class_mat):
    """
    Finds the relevant slice in the spv information

    input:
    target_name: string; recording name
    cell_class_mat: list; spv information

    return:
    start_index, end_index: integer tuple; start and end indices of the relevant data in the spv
    """
    file_name_arr = cell_class_mat['filename'][0][0]
    index = 0
    start_index = 0
    # find first occurrence of targetName
    for filename in file_name_arr:
        if filename == target_name:
            start_index = index
            break
        index += 1

    # find last occurrence of targetName
    for i in range(start_index, len(file_name_arr)):
        if file_name_arr[i] != target_name:
            return start_index, i

    end_index = len(file_name_arr)

    return start_index, end_index


def find_cluster_index_in_shankclu_vector(start_index, end_index, shank_num, clu_num, cell_class_mat):
    """
    Finds the index in the spv for the data

    input:
    start_index: int; start of relevant data
    end_index: int; end of relevant data
    shank_num: int shank number
    clu_num: int; cluster ID
    cell_class_mat: list; spv information

    return:
    index: integer; relevant index in the spv information, None if not found
    """
    shank_clu_vec = cell_class_mat['shankclu'][0][0]
    for i in range(start_index, end_index):
        shank_clu_entry = shank_clu_vec[i]
        if shank_clu_entry[0] == shank_num and shank_clu_entry[1] == clu_num:  # found
            return i
    return None

In [20]:
for chunk in chunks:
    files = os.listdir(f"{PATH}{chunk}/")
    for file in sorted(files):
        temp = pd.read_csv(f"{PATH}{chunk}/{file}")
        if temp.label[0] < 0:
            continue
            
        file_name = str.join('_', file.split('_')[:-2])
        shank_num = int(file.split('_')[-2])
        clu_num = int(file.split('_')[-1].split('.')[-2])
        
        start_index, end_index = find_indices_in_filenames(file_name, cell_class_mat)
        clu_index = find_cluster_index_in_shankclu_vector(start_index, end_index, shank_num, clu_num, cell_class_mat)
                
        is_act = cell_class_mat['act'][0][0][clu_index][0]
        is_exc = cell_class_mat['exc'][0][0][clu_index][0]
        is_inh = cell_class_mat['inh'][0][0][clu_index][0]
                
        label_new = None
        if is_exc == 1:
            label_new = 1
        elif is_act == 1:
            label_new = 0
        elif is_inh == 1:
            label_new = -4
        else:
            raise AssertionError
            
        new_labels = np.ones(len(temp.label)) * label_new
        temp.label = new_labels
    
        temp.to_csv(path_or_buf=f"{DEST}{chunk}/{file}", index=False, header=temp.columns)

1 es04feb12_1_2_12.csv
1 es04feb12_1_2_13.csv
1 es04feb12_1_2_16.csv
1 es04feb12_1_2_2.csv
1 es04feb12_1_2_21.csv
2 es04feb12_1_2_22.csv
1 es04feb12_1_2_23.csv
1 es04feb12_1_2_24.csv
1 es04feb12_1_2_25.csv
1 es04feb12_1_2_4.csv
1 es04feb12_1_2_5.csv
1 es04feb12_1_2_6.csv
1 es04feb12_1_2_8.csv
1 es04feb12_1_2_9.csv
1 es04feb12_1_4_10.csv
1 es04feb12_1_4_12.csv
1 es04feb12_1_4_13.csv
1 es04feb12_1_4_17.csv
1 es04feb12_1_4_19.csv
1 es04feb12_1_4_2.csv
1 es04feb12_1_4_20.csv
2 es04feb12_1_4_24.csv
1 es04feb12_1_4_3.csv
1 es04feb12_1_4_9.csv
1 es09feb12_2_2_11.csv
2 es09feb12_2_2_13.csv
2 es09feb12_2_2_14.csv
1 es09feb12_2_2_15.csv
1 es09feb12_2_2_4.csv
1 es09feb12_2_2_8.csv
2 es09feb12_2_4_8.csv
1 es09feb12_3_2_2.csv
1 es09feb12_3_2_4.csv
2 es09feb12_3_2_8.csv
2 es09feb12_3_3_2.csv
2 es09feb12_3_4_4.csv
2 es20may12_1_4_18.csv
2 es21may12_1_1_10.csv
2 es21may12_1_1_3.csv
2 es21may12_1_1_9.csv
1 es25nov11_12_1_19.csv
1 es25nov11_12_1_21.csv
1 es25nov11_12_2_6.csv
1 es25nov11_12_3_11.csv
1 es

1 m649r1_17_4_8.csv
1 m649r1_17_4_9.csv
1 m649r1_19_2_14.csv
1 m649r1_19_2_18.csv
2 m649r1_19_2_19.csv
1 m649r1_19_2_2.csv
1 m649r1_19_2_20.csv
1 m649r1_19_2_3.csv
1 m649r1_19_3_15.csv
1 m649r1_19_3_8.csv
1 m649r1_19_3_9.csv
2 m649r1_19_4_14.csv
1 m649r1_19_4_18.csv
1 m649r1_19_4_19.csv
1 m649r1_19_4_2.csv
1 m649r1_19_4_20.csv
1 m649r1_19_4_21.csv
1 m649r1_19_4_23.csv
1 m649r1_19_4_24.csv
1 m649r1_19_4_25.csv
1 m649r1_19_4_26.csv
1 m649r1_19_4_27.csv
1 m649r1_19_4_28.csv
1 m649r1_19_4_29.csv
1 m649r1_19_4_3.csv
1 m649r1_19_4_5.csv
1 m649r1_21_2_11.csv
1 m649r1_21_2_16.csv
1 m649r1_21_2_18.csv
1 m649r1_21_2_2.csv
1 m649r1_21_2_20.csv
1 m649r1_21_2_22.csv
1 m649r1_21_2_23.csv
2 m649r1_21_2_25.csv
1 m649r1_21_2_26.csv
1 m649r1_21_2_29.csv
1 m649r1_21_2_3.csv
1 m649r1_21_2_30.csv
1 m649r1_21_2_31.csv
1 m649r1_21_2_4.csv
1 m649r1_21_2_5.csv
1 m649r1_21_2_6.csv
1 m649r1_21_2_9.csv
1 m649r1_21_3_14.csv
1 m649r1_21_3_2.csv
1 m649r1_21_3_7.csv
1 m649r1_21_3_9.csv
1 m649r1_21_4_10.csv
1 m649r1_2