In [None]:
# Author: Banafsheh Khazali
# Update: feb 06, 2023

In [None]:
import numpy as np
import os
import csv
import pandas as pd

In [None]:
!mkdir rawData

In [None]:
!mkdir csvData

In [None]:
!mkdir names

In [None]:
!mkdir new_csv

In [None]:
!mkdir labeled_csv

In [None]:
!mkdir synth_csv 

## Reading Data

We start by going through all .npy files which are the output voltages of all neurons and then make them of the same shape (600, 279).



In [None]:
def uniform_shape(array):
    min_rows = min(array.shape[0], 600)
    return array[:min_rows]

folder = "/content/rawData"
files = [f for f in os.listdir(folder) if f.endswith(".npy")]

for file in files:
    file_path = os.path.join(folder, file)
    array = np.load(file_path)
    uniform_array = uniform_shape(array)
    np.save(file_path, uniform_array)

Here folder is the path to the folder containing the .npy files, and files is a list of all .npy files in that folder. The function uniform_shape takes in a numpy array and returns a new numpy array with the same number of columns and at most 600 rows. The code then loops through all the .npy files in files, loads the numpy array, calls the uniform_shape function, and saves the uniform array back to the same file.

## Converting .npy to .CSV

Then we go over all the .npy files in a folder, convert them to .csv files, and store the new .csv files in a new folder

In [None]:
import numpy as np
import os
import csv

npy_folder = "/content/rawData"
csv_folder = "/content/csvData"

if not os.path.exists(csv_folder):
    os.makedirs(csv_folder)

files = [f for f in os.listdir(npy_folder) if f.endswith(".npy")]

for file in files:
    npy_file_path = os.path.join(npy_folder, file)
    array = np.load(npy_file_path)
    csv_file_path = os.path.join(csv_folder, os.path.splitext(file)[0] + ".csv")
    with open(csv_file_path, "w") as f:
        writer = csv.writer(f)
        writer.writerows(array)


Here npy_folder is the path to the folder containing the .npy files and csv_folder is the path to the folder where the new .csv files should be stored. If the csv_folder does not exist, the code creates it using the os module's makedirs method. The code then loops through all the .npy files in the npy_folder, loads the numpy array, and writes the contents of the numpy array to a .csv file in the csv_folder with the same name as the .npy file but with the .csv extension.

Now lets see the first DataFrame:

In [None]:


files = [f for f in os.listdir(csv_folder) if f.endswith(".csv")]

if not files:
    print("No csv files found in the folder.")
else:
    first_file = files[0]
    first_file_path = os.path.join(csv_folder, first_file)
    df = pd.read_csv(first_file_path)
df.head()


Unnamed: 0,1.2485823877402336,0.07716476565292019,0.3502820685320868,0.054057907797447034,0.03825069273840749,1.2485838704857064,0.07482795466566532,0.03217123080850717,0.04728306971111774,0.8667117192590219,...,126.29272593939231,50.58200690928512,-38.79807536056665,-0.7853309556859791,0.16564984068950975,-4.229991906865283,-0.14314701440922306,-199.11049708844163,2.02109280608683,-49.94797598404795
0,1.168058,0.079594,0.33473,0.076796,0.038254,1.168059,0.068584,0.02126,0.044577,0.795493,...,133.7704,51.410054,-38.974394,-0.758087,0.152038,-4.554971,-0.140579,-177.867538,1.76677,-51.593009
1,1.092726,0.081452,0.319349,0.098102,0.038388,1.092727,0.063031,0.011418,0.042195,0.727566,...,140.775488,52.228434,-39.174033,-0.731541,0.147179,-4.859585,-0.13862,-155.160785,1.517008,-53.266524
2,1.022252,0.082774,0.304281,0.118047,0.03866,1.022253,0.058181,0.002729,0.040148,0.663711,...,147.293813,53.03548,-39.390151,-0.705714,0.153862,-5.145822,-0.137221,-131.136887,1.272727,-54.962951
3,0.956323,0.083595,0.289683,0.136709,0.039073,0.956324,0.054025,-0.004791,0.038445,0.603596,...,153.196175,53.795387,-39.621274,-0.680616,0.167432,-5.423186,-0.136332,-106.040107,1.034368,-56.688393
4,0.894646,0.083947,0.275622,0.154168,0.039618,0.894647,0.050495,-0.011301,0.037066,0.544421,...,158.080416,54.373936,-39.88223,-0.656254,0.169896,-5.703682,-0.13591,-80.272301,0.801434,-58.463308


Here csv_folder is the path to the folder containing the .csv files, and files is a list of all .csv files. If there are no .csv files in the folder, the code prints a message saying so. Otherwise, the code takes the first file in the list, reads it into a pandas dataframe using the pandas library's read_csv method, and prints the first 5 rows of the dataframe using the head method.

As can be seen, the neuron names are not included, So we use a txt file including the names of all neurons and assign them to the columns:


In [None]:
import os
import pandas as pd
from datetime import datetime, timedelta

folder_path = '/content/csvData'
new_csv = "/content/new_csv"

for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        neurons = pd.read_csv(os.path.join(folder_path, filename))
        time_index = [datetime.now() + timedelta(seconds=i) for i in range(len(neurons))]
        neurons.insert(0, 'time', time_index)
        neurons.set_index('time', inplace=True)
        
        with open('/content/names/neuron_names.txt') as f:
            ncolumns = f.readlines()
            ncolumns = [x.replace(",\n", "") for x in ncolumns]
            ncolumns = [x.replace("u", "") for x in ncolumns]
            ncolumns = [x.replace("'", "") for x in ncolumns]
            ncolumns = [x.replace(" ", "") for x in ncolumns]
            ncolumns = [x.replace("[", "") for x in ncolumns]
            ncolumns = [x.replace("]", "") for x in ncolumns]
            ncolumns = [x.replace("\n", "") for x in ncolumns]
        
        neurons.columns = ncolumns
        neurons.to_csv(os.path.join(new_csv, "new" + filename))


In [None]:
files = [f for f in os.listdir(new_csv) if f.endswith(".csv")]

if not files:
    print("No csv files found in the folder.")
else:
    first_file = files[0]
    first_file_path = os.path.join(new_csv, first_file)
    df = pd.read_csv(first_file_path)
df.head()

Unnamed: 0,time,IL2DL,IL2VL,IL2L,URADL,IL1VL,IL2DR,IL1DL,OLLL,IL1L,...,PVR,PVWL,PVWR,PLNL,PHCR,PHCL,PVNR,PLMR,PVNL,PLML
0,2023-02-06 23:13:28.125270,1.168063,0.077986,0.333078,0.073777,0.036716,1.168062,0.069093,0.025196,0.04403,...,130.746072,50.650398,-38.553274,-0.764498,0.135759,-4.392542,-0.141109,-178.224388,1.758427,-50.667363
1,2023-02-06 23:13:29.125284,1.092732,0.079999,0.317767,0.095264,0.036935,1.092731,0.063621,0.015476,0.041705,...,137.723167,51.450891,-38.745477,-0.737786,0.127717,-4.692679,-0.139057,-156.072318,1.513499,-52.292527
2,2023-02-06 23:13:30.125287,1.022258,0.081472,0.302834,0.115286,0.037304,1.022257,0.058866,0.006899,0.039761,...,144.244179,52.24283,-38.95334,-0.711794,0.128016,-4.975976,-0.137559,-132.554765,1.273613,-53.95171
3,2023-02-06 23:13:31.125289,0.95633,0.08244,0.288439,0.134022,0.0378,0.956329,0.054749,-0.000628,0.038134,...,150.212633,53.008158,-39.171071,-0.686537,0.141649,-5.247997,-0.13659,-107.922509,1.039416,-55.63016
4,2023-02-06 23:13:32.125290,0.894654,0.082936,0.274585,0.151606,0.038412,0.894653,0.051217,-0.007165,0.036785,...,155.157465,53.605008,-39.416315,-0.662019,0.145827,-5.52705,-0.13609,-82.677905,0.810778,-57.351033


## Labeling the data

In [None]:


# path to the folder with the csv files
folder_path = '/content/new_csv'

# path to the folder where the modified csv files will be saved
modified_folder_path = '/content/labeled_csv'

# list all files in the folder
files = os.listdir(folder_path)

# filter only csv files
csvs = [file for file in files if file.endswith('.csv')]

# loop through all csv files
for file in csvs:
    # read the csv file
    df = pd.read_csv(os.path.join(folder_path, file))

    # add a new column with values set to 1
    df['label'] = 1

    # save the modified csv file to the new folder
    df.to_csv(os.path.join(modified_folder_path, file), index=False)


In [None]:
files = [f for f in os.listdir(modified_folder_path) if f.endswith(".csv")]

if not files:
    print("No csv files found in the folder.")
else:
    first_file = files[0]
    first_file_path = os.path.join(modified_folder_path, first_file)
    df = pd.read_csv(first_file_path)
df.head()

Unnamed: 0,time,IL2DL,IL2VL,IL2L,URADL,IL1VL,IL2DR,IL1DL,OLLL,IL1L,...,PVWL,PVWR,PLNL,PHCR,PHCL,PVNR,PLMR,PVNL,PLML,label
0,2023-02-06 23:13:28.125270,1.168063,0.077986,0.333078,0.073777,0.036716,1.168062,0.069093,0.025196,0.04403,...,50.650398,-38.553274,-0.764498,0.135759,-4.392542,-0.141109,-178.224388,1.758427,-50.667363,1
1,2023-02-06 23:13:29.125284,1.092732,0.079999,0.317767,0.095264,0.036935,1.092731,0.063621,0.015476,0.041705,...,51.450891,-38.745477,-0.737786,0.127717,-4.692679,-0.139057,-156.072318,1.513499,-52.292527,1
2,2023-02-06 23:13:30.125287,1.022258,0.081472,0.302834,0.115286,0.037304,1.022257,0.058866,0.006899,0.039761,...,52.24283,-38.95334,-0.711794,0.128016,-4.975976,-0.137559,-132.554765,1.273613,-53.95171,1
3,2023-02-06 23:13:31.125289,0.95633,0.08244,0.288439,0.134022,0.0378,0.956329,0.054749,-0.000628,0.038134,...,53.008158,-39.171071,-0.686537,0.141649,-5.247997,-0.13659,-107.922509,1.039416,-55.63016,1
4,2023-02-06 23:13:32.125290,0.894654,0.082936,0.274585,0.151606,0.038412,0.894653,0.051217,-0.007165,0.036785,...,53.605008,-39.416315,-0.662019,0.145827,-5.52705,-0.13609,-82.677905,0.810778,-57.351033,1


Great! The data is ready! Lets go to the next stage which is classification

## Generate Synthetic Data

In [None]:
import os
import pandas as pd
import random

# path to the folder containing the original csv files
csv_path = '/content/labeled_csv'

# path to the folder where the synthetic csv files should be saved
save_path = '/content/synth_csv'

# loop through the original csv files
for i in range(50):
    for filename in os.listdir(csv_path):
        if filename.endswith(".csv"):
            # read the csv file into a dataframe
            df = pd.read_csv(os.path.join(csv_path, filename))
            
            # add random noise to the data
            for col in df.columns[2:-1]:
                df[col] = df[col] + random.uniform(-0.1, 0.1) * df[col]
            
            # save the synthetic data to a new csv file
            new_filename = f"syn_{i}_{filename}"
            df.to_csv(os.path.join(save_path, new_filename), index=False)

## Save the data on Colab

In [None]:
!ls

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

# Copy folder from Colab to Google Drive
!cp -r /content/synth_csv /content/gdrive/Research/CODES/classification_data
