In [1]:
import csv
import operator
import time
import random
import math
import functools
import seaborn
import scipy.io
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.tsa.stattools import adfuller, kpss
from os import listdir
from os.path import dirname, join

In [21]:
def find_stationary_location(A):
    res = 1
    A = A[::-1]
    for i in range(len(A)):
        if A[i] == 0:
            return len(A)-i+1
    return res

## Reading in FMRI files

In [39]:
files = listdir('E:/projects/run2')
print("Total number of files loaded: "+str(len(files)))

# construct a dictionary to relate numbering back to original files
filename_to_num = {}
for i in range(len(files)):
    filename_to_num[i] = files[i]

Total number of files loaded: 820


In [23]:
# loop through all files to extract ROI time-series from each subject and store in the tensor matrix subjects[]
subjects = []
# define file directory
input_files_dir = join('E:','/projects','run2')
output_files_dir = join('E:','/projects','/connectome','results')
for file in files:
    file_name = join(input_files_dir,file)
    temp = scipy.io.loadmat(file_name)  
    subjects.append(temp['tc'])


## Creating Files for Storage of Results

In [62]:
# create two big .csv's one for positive time and one for negative time, each stores the time needed for the regions to reach 
# their final stationarity for all subjects
df = pd.DataFrame(list())
region_nums = [None]
for i in range(1,161):
    region_nums.append('region '+str(i))
df.to_csv('E:/projects/connectome/results/positive_time/positive_times_combined_results.csv')
df.to_csv('E:/projects/connectome/results/negative_time/negative_times_combined_results.csv')
with open('E:/projects/connectome/results/positive_time/positive_times_combined_results.csv', 'w',newline='') as file:
    writer = csv.writer(file)
    writer.writerow(region_nums)
with open('E:/projects/connectome/results/negative_time/negative_times_combined_results.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(region_nums)

In [63]:
# create files to store results for each subject within regions at different time steps
time_steps = [None]
for i in range(1, 121):
    step_num ='step ' + str(i)
    time_steps.append(step_num)

for i in range(len(subjects)):
    df = pd.DataFrame(list())
    sub_file = 'E:/projects/connectome/results/positive_time/'+'sub_'+str(i+1)+'_positive_time.csv'
    df.to_csv(sub_file)
    with open(sub_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(time_steps)
for i in range(len(subjects)):
    df = pd.DataFrame(list())
    sub_file = 'E:/projects/connectome/results/negative_time/'+'sub_'+str(i+1)+'_negative_time.csv'
    df.to_csv(sub_file)
    with open(sub_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(time_steps)

In [64]:
# finding how long it takes for the time series to stationarize for each region of each subject. 
# The increment is 10 time points.
increment = 10
for i in range(len(subjects)): 
    start_time = time.time()
    positive_times_for_each_subject = ['subject '+str(i+1)]
    negative_times_for_each_subject = ['subject '+str(i+1)]
    pos_time_sub_file = 'E:/projects/connectome/results/positive_time/'+'sub_'+str(i+1)+'_positive_time.csv'
    neg_time_sub_file = 'E:/projects/connectome/results/negative_time/'+'sub_'+str(i+1)+'_negative_time.csv'
    
    for region in range(160):
        pos_time_region_res = ['region_'+str(region+1)]
        neg_time_region_res = ['region_'+str(region+1)]
        for step in range(len(subjects[0][:,0])//increment):
            end = 10*(step+1)
            ts = subjects[i][:,region][:end]
            reversed_ts = subjects[i][:,region][-end:]
            ADF_test = adfuller(ts)
            reversed_ADF_test = adfuller(reversed_ts)
            if ADF_test[4]['1%'] > ADF_test[0]:
                pos_time_region_res.append(1)
            else:
                pos_time_region_res.append(0)
            if reversed_ADF_test[4]['1%'] > reversed_ADF_test[0]:
                neg_time_region_res.append(1)
            else:
                neg_time_region_res.append(0)
        with open(pos_time_sub_file, 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(pos_time_region_res)
        with open(neg_time_sub_file, 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(neg_time_region_res)
        
        
        # skip the first entry because this entry is actually 'region_num'
        positive_times_for_each_subject.append(find_stationary_location(pos_time_region_res[1:])*10)
        negative_times_for_each_subject.append(find_stationary_location(neg_time_region_res[1:])*10)
    
    # write to large_data_set_result.csv for result storage after each subject is processed
    with open('E:/projects/connectome/results/positive_time/positive_times_combined_results.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(positive_times_for_each_subject)
    with open('E:/projects/connectome/results/negative_time/negative_times_combined_results.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(negative_times_for_each_subject)
    print("subject "+str(i) +" completed"+", %s seconds taken" % (time.time() - start_time))

subject 0 completed, 389.1613805294037 seconds taken
subject 1 completed, 389.2807140350342 seconds taken
subject 2 completed, 386.82291173934937 seconds taken
subject 3 completed, 385.5043740272522 seconds taken
subject 4 completed, 381.2720642089844 seconds taken
subject 5 completed, 391.8977761268616 seconds taken
subject 6 completed, 384.0817952156067 seconds taken
subject 7 completed, 394.90519881248474 seconds taken
subject 8 completed, 402.8086516857147 seconds taken
subject 9 completed, 417.12638783454895 seconds taken
subject 10 completed, 403.8723769187927 seconds taken
subject 11 completed, 426.22848987579346 seconds taken
subject 12 completed, 435.14026045799255 seconds taken
subject 13 completed, 385.7294428348541 seconds taken
subject 14 completed, 391.93221378326416 seconds taken
subject 15 completed, 380.55209517478943 seconds taken
subject 16 completed, 383.5803020000458 seconds taken
subject 17 completed, 384.45870089530945 seconds taken
subject 18 completed, 387.3900

subject 150 completed, 387.4423305988312 seconds taken
subject 151 completed, 387.3047139644623 seconds taken
subject 152 completed, 388.82727241516113 seconds taken
subject 153 completed, 389.80229783058167 seconds taken
subject 154 completed, 388.75448656082153 seconds taken
subject 155 completed, 386.2682044506073 seconds taken
subject 156 completed, 384.3972125053406 seconds taken
subject 157 completed, 384.8939964771271 seconds taken
subject 158 completed, 382.2841753959656 seconds taken
subject 159 completed, 387.3984100818634 seconds taken
subject 160 completed, 387.8932418823242 seconds taken
subject 161 completed, 387.7380576133728 seconds taken
subject 162 completed, 387.8424913883209 seconds taken
subject 163 completed, 389.27316999435425 seconds taken
subject 164 completed, 385.4630026817322 seconds taken
subject 165 completed, 387.5965383052826 seconds taken
subject 166 completed, 387.1064500808716 seconds taken
subject 167 completed, 386.77377104759216 seconds taken
subje

subject 299 completed, 381.839120388031 seconds taken
subject 300 completed, 380.42489743232727 seconds taken
subject 301 completed, 381.6341598033905 seconds taken
subject 302 completed, 382.5486707687378 seconds taken
subject 303 completed, 381.72592639923096 seconds taken
subject 304 completed, 378.8214330673218 seconds taken
subject 305 completed, 379.3532497882843 seconds taken
subject 306 completed, 380.4844331741333 seconds taken
subject 307 completed, 385.99195885658264 seconds taken
subject 308 completed, 383.14305448532104 seconds taken
subject 309 completed, 380.0237612724304 seconds taken
subject 310 completed, 382.9537105560303 seconds taken
subject 311 completed, 384.2986385822296 seconds taken
subject 312 completed, 386.4136154651642 seconds taken
subject 313 completed, 383.4212143421173 seconds taken
subject 314 completed, 382.05188965797424 seconds taken
subject 315 completed, 381.67710304260254 seconds taken
subject 316 completed, 385.90415596961975 seconds taken
subj

subject 447 completed, 443.5069456100464 seconds taken
subject 448 completed, 443.65525794029236 seconds taken
subject 449 completed, 445.32544207572937 seconds taken
subject 450 completed, 406.71178913116455 seconds taken
subject 451 completed, 389.04916620254517 seconds taken
subject 452 completed, 388.3617904186249 seconds taken
subject 453 completed, 388.3051142692566 seconds taken
subject 454 completed, 389.1372973918915 seconds taken
subject 455 completed, 392.0074164867401 seconds taken
subject 456 completed, 389.50251054763794 seconds taken
subject 457 completed, 442.3489019870758 seconds taken
subject 458 completed, 423.8197195529938 seconds taken
subject 459 completed, 391.35103273391724 seconds taken
subject 460 completed, 412.0062584877014 seconds taken
subject 461 completed, 444.8066625595093 seconds taken
subject 462 completed, 450.9652318954468 seconds taken
subject 463 completed, 450.53224658966064 seconds taken
subject 464 completed, 454.17681789398193 seconds taken
su

subject 595 completed, 384.4421079158783 seconds taken
subject 596 completed, 383.1957585811615 seconds taken
subject 597 completed, 401.8583688735962 seconds taken
subject 598 completed, 389.4772882461548 seconds taken
subject 599 completed, 388.0709080696106 seconds taken
subject 600 completed, 387.448184967041 seconds taken
subject 601 completed, 386.41655564308167 seconds taken
subject 602 completed, 385.78229427337646 seconds taken
subject 603 completed, 382.1885287761688 seconds taken
subject 604 completed, 385.04998660087585 seconds taken
subject 605 completed, 385.28346061706543 seconds taken
subject 606 completed, 385.59382128715515 seconds taken
subject 607 completed, 384.74951696395874 seconds taken
subject 608 completed, 383.09384775161743 seconds taken
subject 609 completed, 386.13937187194824 seconds taken
subject 610 completed, 390.17600560188293 seconds taken
subject 611 completed, 390.4269349575043 seconds taken
subject 612 completed, 383.74524760246277 seconds taken
s

subject 744 completed, 386.59690284729004 seconds taken
subject 745 completed, 386.37556648254395 seconds taken
subject 746 completed, 386.94749879837036 seconds taken
subject 747 completed, 385.38965129852295 seconds taken
subject 748 completed, 386.2135474681854 seconds taken
subject 749 completed, 387.0757141113281 seconds taken
subject 750 completed, 386.22037982940674 seconds taken
subject 751 completed, 388.30414152145386 seconds taken
subject 752 completed, 386.8723464012146 seconds taken
subject 753 completed, 387.79619240760803 seconds taken
subject 754 completed, 388.0532867908478 seconds taken
subject 755 completed, 387.50381803512573 seconds taken
subject 756 completed, 387.7517604827881 seconds taken
subject 757 completed, 388.7201886177063 seconds taken
subject 758 completed, 387.2362759113312 seconds taken
subject 759 completed, 388.50031304359436 seconds taken
subject 760 completed, 388.89461612701416 seconds taken
subject 761 completed, 388.04944610595703 seconds taken