In [12]:
import numpy as np
import math
import os
import pandas as pd 

# Preprocessing of Brain Datas

In [17]:
def get_duration(name):
    '''Given the name of the film, this function returns its duration in seconds'''
    
    if name == "AfterTheRain_TC_48":
        dur = 496
    elif name == "BetweenViewings_TC_48":
        dur = 808
    elif name == "BigBuckBunny_TC_48":
        dur = 490
    elif name == "Chatter_TC_48":
        dur = 405
    elif name == "FirstBite_TC_48":
        dur = 599
    elif name == "LessonLearned_TC_48":
        dur = 667
    elif name == "Payload_TC_48":
        dur = 1008
    elif name == "Sintel_TC_48":
        dur = 722
    elif name == "Spaceman_TC_48":
        dur = 805
    elif name == "Superhero_TC_48":
        dur = 1028
    elif name == "TearsOfSteel_TC_48":
        dur = 588
    elif name == "TheSecretNumber_TC_48":
        dur = 784
    elif name == "ToClaireFromSonny_TC_48":
        dur = 402
    elif name == "YouAgain_TC_48":
        dur = 798
    elif name == "Rest_TC_48":
        dur = 0
    return dur


In [20]:
def preprocess_brain_file(name,data_path,nb):
    '''This function preprocess one brain data file, by removing the irrelevant data (that is, data that 
    have not been recorded during the film),and by adding a 6 seconds delay and averaging over 13 seconds
    Inputs:
        name : Name of the film, str
        data_path : location of the .csv data file, str
        nb : int, indicates the number of brain data from the same film already preprocessed
    Output:
        The function creates a .csv file with the preprocessed brain data
    '''
    
    ''' 
    Read the data from the .csv file, 
    skip the first 74 rows corresponding to the time before the beginning of the film and the 6 seconds delay
    '''
    sample_freq = 1.3 # Sampling rate
    duration = get_duration(name) #duration of the film
    n_samples = math.floor(duration/sample_freq)# orresponding number of fMRI samples
    
    X = np.genfromtxt(data_path, delimiter=",",skip_header=77) #skip 74 = round((93.9+6)/1.3) rows that correspond to the 90s rest period and then the 6 seconds delay

    ''' Delete the final rows, that correspond to data recorded after the end of the film (based on its duration)'''
    skip_end = X.shape[0]-n_samples
    X = X[:-skip_end, :]
    N = X.shape[0] # number of remaining samples
    D = X.shape[1] # number of features, i.e. brain regions, should be 400 or 48 resp.

    '''Average over 10 datapoints, i.e. average over 13 seconds'''
    new_N = int(N/10)
    X_averaged = np.zeros([new_N,D])

    for i in range(new_N):
        means = np.mean(X[i:i+10],axis=0)
        X_averaged[i,:] = means
    
    '''Output .csv files, with preprocessed data'''
    output_path = "results_48_delay/"+name+"_average_"+nb+".csv"
    pd.DataFrame(X_averaged).to_csv(output_path,header=None, index=None)

In [21]:
dirr = "Z:/" # location of the server
# Scan over all files in the server, read and preprocess all relevant .csv files
for root, subdirectories, files in os.walk(dirr):
    for subdirectory in subdirectories:
        for _,subdirectory2,_ in os.walk(dirr+'/'+subdirectory):
            for subdirectory22 in subdirectory2:
                for _,_,filess in os.walk(dirr+subdirectory+'/'+subdirectory22):
                    for file in filess:
                        if file.endswith("_48.csv"): #If working with the 400 brain regions file, change 48 to 400
                            print(file)
                            name = file[:-4]
                            nb = 1
                            filename="results_48_delay/"+name+"_average_"+"{}.csv"
                            while os.path.isfile(filename.format(nb)):
                                nb += 1
                            filename = filename.format(nb)
                            data_path=dirr+subdirectory+'/'+subdirectory22+'/'+file
                            preprocess_brain_file(name,data_path,str(nb))
        

@Recently-Snapshot
schaefer_2018
sub-S01
BigBuckBunny_TC_48.csv
FirstBite_TC_48.csv
Rest_TC_48.csv
YouAgain_TC_48.csv
AfterTheRain_TC_48.csv
LessonLearned_TC_48.csv
Payload_TC_48.csv
TheSecretNumber_TC_48.csv
BetweenViewings_TC_48.csv
Chatter_TC_48.csv
Spaceman_TC_48.csv
ToClaireFromSonny_TC_48.csv
Sintel_TC_48.csv
Superhero_TC_48.csv
TearsOfSteel_TC_48.csv
sub-S02
AfterTheRain_TC_48.csv
Rest_TC_48.csv
TearsOfSteel_TC_48.csv
YouAgain_TC_48.csv
BigBuckBunny_TC_48.csv
Chatter_TC_48.csv
Superhero_TC_48.csv
TheSecretNumber_TC_48.csv
BetweenViewings_TC_48.csv
LessonLearned_TC_48.csv
Payload_TC_48.csv
FirstBite_TC_48.csv
Sintel_TC_48.csv
Spaceman_TC_48.csv
ToClaireFromSonny_TC_48.csv
sub-S03
Payload_TC_48.csv
Rest_TC_48.csv
Spaceman_TC_48.csv
BetweenViewings_TC_48.csv
LessonLearned_TC_48.csv
Superhero_TC_48.csv
TheSecretNumber_TC_48.csv
AfterTheRain_TC_48.csv
FirstBite_TC_48.csv
Sintel_TC_48.csv
TearsOfSteel_TC_48.csv
BigBuckBunny_TC_48.csv
Chatter_TC_48.csv
ToClaireFromSonny_TC_48.csv
YouAg