In [4]:
import os
import numpy as np
import pandas as pd

from constants import SPATIAL, feature_names_org

In [2]:
chunks = [0, 25, 50, 100, 200, 400, 800, 1600]
drop = ['num_spikes', 'max_abs', 'name', 'region', 'label']
PATH = [f'cluster_data/clusterData_no_light_29_03_22/{cs}' for cs in chunks]
DEST = [f'cluster_data/clusterData_no_light_29_03_22/{cs}_chunks_improved_v2' for cs in chunks]

for d in DEST:
    if not os.path.isdir(d):
        os.mkdir(d)

### V1

In [3]:
for p, d in zip(PATH, DEST):
    files = os.listdir(p)
    for file in sorted(files):
        df = pd.read_csv(p + '/' + file)
        
        n = df.shape[0]
        md = df[drop]
        df = df.drop(columns=drop)
        nc = len(df.columns)
        
        avg = np.expand_dims(df.mean().to_numpy(), axis=0).repeat(n, axis=0)  # 1
        std = np.expand_dims(df.std().to_numpy(), axis=0).repeat(n, axis=0)  # 2
        q25 = np.expand_dims(df.quantile(0.25).to_numpy(), axis=0).repeat(n, axis=0)  # 3
        q50 = np.expand_dims(df.quantile(0.5).to_numpy(), axis=0).repeat(n, axis=0)  # 4
        q75 = np.expand_dims(df.quantile(0.75).to_numpy(), axis=0).repeat(n, axis=0)  # 5
        
        new_headers = []
        for c in df.columns:
            new_headers += [f'{c}', f'{c}_avg', f'{c}_std', f'{c}_q25', f'{c}_q50', f'{c}_q75']
        new_headers += list(md.columns)
            
        new_df = np.zeros((n, nc * 6 + len(drop)), object)
        new_df[:, :-5:6] = df.to_numpy()
        new_df[:, 1:-5:6] = avg
        new_df[:, 2:-5:6] = std
        new_df[:, 3:-5:6] = q25
        new_df[:, 4:-5:6] = q50
        new_df[:, 5:-5:6] = q75
        new_df[:, -5:] = md.to_numpy()
        
        new_df = pd.DataFrame(new_df, columns = new_headers)
        
        new_df.to_csv(path_or_buf=d + '/' + file, index=False, header=new_headers)


### V2

In [15]:
chunks = [0, 1, 5, 10, 25, 50, 100, 200, 400, 800, 1600]
drop = ['max_abs', 'name', 'region', 'label']
PATH = [f'cluster_data/clusterData_no_light_29_03_22/{cs}' for cs in chunks]
DEST = [f'cluster_data/clusterData_no_light_29_03_22/{cs}_chunks_improved_v2' for cs in chunks]

for d in DEST:
    if not os.path.isdir(d):
        os.mkdir(d)

In [23]:
for p, d in zip(PATH, DEST):
    files = os.listdir(p)
    for file in sorted(files):
        df = pd.read_csv(p + '/' + file)
        
        n = df.shape[0]
        df = df.drop(['num_spikes'], errors='ignore')
        md = df[drop].head(1)

        df = df[[feature_names_org[i] for i in SPATIAL[:-1]]]
        nc = len(df.columns)
        
        avg = np.expand_dims(df.mean().to_numpy(), axis=0)  # 1
        std = np.expand_dims(df.std().to_numpy(), axis=0) # 2
        q25 = np.expand_dims(df.quantile(0.25).to_numpy(), axis=0)  # 3
        q50 = np.expand_dims(df.quantile(0.5).to_numpy(), axis=0)  # 4
        q75 = np.expand_dims(df.quantile(0.75).to_numpy(), axis=0)  # 5
                
        new_headers = []
        for c in df.columns:
            new_headers += [f'{c}_avg', f'{c}_std', f'{c}_q25', f'{c}_q50', f'{c}_q75']
        new_headers += list(md.columns)
            
        new_df = np.zeros((1, nc * 5 + len(drop)), object)
        new_df[:, :-len(drop):5] = avg
        new_df[:, 1:-len(drop):5] = std
        new_df[:, 2:-len(drop):5] = q25
        new_df[:, 3:-len(drop):5] = q50
        new_df[:, 4:-len(drop):5] = q75
        new_df[:, -len(drop):] = md.to_numpy()
        
        new_df = pd.DataFrame(new_df, columns = new_headers)
        
        new_df.to_csv(path_or_buf=d + '/' + file, index=False, header=new_headers)