# Summary

In this notebook we save .pkl files for faster loading. Additionally, we built the multiscale data set.

In [1]:
#import
import pandas as pd
import numpy as np
import sys
import os

sys.path.insert(0, r"..\code")

from copy import deepcopy
from MSCASTLE import MSCASTLE
from utils import save_obj,load_obj


#dir
data_dir="../data/"
raw_dir="../data/TimeSeriesAAL/" 
ts_dir="../data/TimeSeriesDiffRegion/" 
processed="../data/processed/"
diffreg="../data/processed/diff_regions/"

replace = False #set True to overwrite existing results
verbose = False #set True to print info

# Transform in df and save as .pkl

In [2]:
names = pd.read_csv(data_dir+'AAL89.csv', header=0, )
names['Name']=names.apply(lambda x: str(x['Region'])+'-'+str(x['Side']), axis=1)
names.iloc[88,2]='Vermis'
names

Unnamed: 0,Region,Side,Lobe,Function,Color,Name
0,PrecGy,L,Central,Motor Network,green,PrecGy-L
1,PrecGy,R,Central,Motor Network,green,PrecGy-R
2,FontSup,L,Frontal - Lateral,Cognitive functions | Attention,red,FontSup-L
3,FontSup,R,Frontal - Lateral,Cognitive functions | Attention,red,FontSup-R
4,FrontSupOrb,L,Frontal - Medial,Emotion | Working memory,red,FrontSupOrb-L
...,...,...,...,...,...,...
84,Cereb III - VI,L,Cerebellum,Motor control | Coordination,yellow,Cereb III - VI-L
85,Cereb III - VI,R,Cerebellum,Motor control | Coordination,yellow,Cereb III - VI-R
86,Cereb VII - X,L,Cerebellum,Motor control | Coordination,yellow,Cereb VII - X-L
87,Cereb VII - X,R,Cerebellum,Motor control | Coordination,yellow,Cereb VII - X-R


In [3]:
right=names[(names['Side']=='R') | (pd.isna(names['Side']))].index
left=names[(names['Side']=='L') | (pd.isna(names['Side']))].index

if replace:
    save_obj(names, 'region_names', processed)
    save_obj(right.values, 'index_right_regions', data_dir=processed)
    save_obj(left.values, 'index_left_regions', data_dir=processed) 

right, left, len(right), len(left)

(Int64Index([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
             35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
             69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 88],
            dtype='int64'),
 Int64Index([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
             34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
             68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88],
            dtype='int64'),
 45,
 45)

In [4]:
for filename in os.listdir(raw_dir):
    f = os.path.join(raw_dir, filename)
    
    if os.path.isfile(f):
    
        if verbose: print("\n\nStarting {}".format(filename))
        nn = filename.split('.txt')[0]
        
        #load data
        df = pd.read_csv(raw_dir+filename, sep=' ', header=None)
        left_df=df.iloc[:,left].copy()
        right_df=df.iloc[:,right].copy()
        
        assert left_df.shape==(1200,45) and left_df.shape==right_df.shape, 'Something wrong'
        
        if replace:
            save_obj(left_df, nn+'_L', data_dir=ts_dir)
            save_obj(right_df, nn+'_R', data_dir=ts_dir)

# Multiscale data set

In [5]:
J=5
S=100
N=load_obj('index_left_regions', processed).shape[0]
T=1200

ts_ss_0_L=np.array([])
ts_ss_0_R=np.array([])
ts_ss_1_L=np.array([])
ts_ss_1_R=np.array([])

ts_ms_0_L=np.array([])
ts_ms_0_R=np.array([])
ts_ms_1_L=np.array([])
ts_ms_1_R=np.array([])

In [6]:
transform='swt' #wavelet transform
wv='db5' #wavelet family

In [7]:
for filename in os.listdir(ts_dir):
    f = os.path.join(ts_dir, filename)
    
    if os.path.isfile(f):
        
        if verbose: print("\n\nStarting {}".format(filename))
        nn=filename.split('.pkl')[0]
        subj=nn.split('_')[1]
        test=nn.split('_')[3]
        side=nn.split('_')[-1]

        df=load_obj(nn, data_dir=ts_dir)

        ################ SINGLE-SCALE ################
        if test=='Exam0':
            if side=='L':
                if not ts_ss_0_L.any():
                    ts_ss_0_L=deepcopy(df.T.values)
                    ts_ss_0_L=np.expand_dims(ts_ss_0_L,0)
                else:
                    item=deepcopy(np.expand_dims(df.T.values,0))
                    ts_ss_0_L=np.append(ts_ss_0_L,item,0)
            else:
                if not ts_ss_0_R.any():
                    ts_ss_0_R=deepcopy(df.T.values)
                    ts_ss_0_R=np.expand_dims(ts_ss_0_R,0)
                else:
                    item=deepcopy(np.expand_dims(df.T.values,0))
                    ts_ss_0_R=np.append(ts_ss_0_R,item,0)
        elif test=='Exam1':
            if side=='L':
                if not ts_ss_1_L.any():
                    ts_ss_1_L=deepcopy(df.T.values)
                    ts_ss_1_L=np.expand_dims(ts_ss_1_L,0)
                else:
                    item=deepcopy(np.expand_dims(df.T.values,0))
                    ts_ss_1_L=np.append(ts_ss_1_L,item,0)
            else:
                if not ts_ss_1_R.any():
                    ts_ss_1_R=deepcopy(df.T.values)
                    ts_ss_1_R=np.expand_dims(ts_ss_1_R,0)
                else:
                    item=deepcopy(np.expand_dims(df.T.values,0))
                    ts_ss_1_R=np.append(ts_ss_1_R,item,0)

        ################ MULTISCALE ################

        cut_df = df.iloc[8:-8].copy()
        T,N = cut_df.shape
        mscastle = MSCASTLE(cut_df.values, multiscale=True, transform=transform, wavelet=wv, lag=0, ndetails=J)
        tocopy=deepcopy(np.transpose(mscastle.Y, (0,2,1)))

        if test=='Exam0':
            if side=='L':
                if not ts_ms_0_L.any():
                    ts_ms_0_L=deepcopy(tocopy)
                    ts_ms_0_L=np.expand_dims(ts_ms_0_L,0)
                else:
                    item=deepcopy(np.expand_dims(tocopy,0))
                    ts_ms_0_L=np.append(ts_ms_0_L,item,0)
            else:
                if not ts_ms_0_R.any():
                    ts_ms_0_R=deepcopy(tocopy)
                    ts_ms_0_R=np.expand_dims(ts_ms_0_R,0)
                else:
                    item=deepcopy(np.expand_dims(tocopy,0))
                    ts_ms_0_R=np.append(ts_ms_0_R,item,0)
        elif test=='Exam1':
            if side=='L':
                if not ts_ms_1_L.any():
                    ts_ms_1_L=deepcopy(tocopy)
                    ts_ms_1_L=np.expand_dims(ts_ms_1_L,0)
                else:
                    item=deepcopy(np.expand_dims(tocopy,0))
                    ts_ms_1_L=np.append(ts_ms_1_L,item,0)
            else:
                if not ts_ms_1_R.any():
                    ts_ms_1_R=deepcopy(tocopy)
                    ts_ms_1_R=np.expand_dims(ts_ms_1_R,0)
                else:
                    item=deepcopy(np.expand_dims(tocopy,0))
                    ts_ms_1_R=np.append(ts_ms_1_R,item,0)

In [8]:
#check zero-mean of input ts
for s in range(ts_ss_0_R.shape[0]):
    if max(abs(ts_ss_0_L[s].mean(axis=1)))>1.e-5:
        print("Left {}".format(s))
    if max(abs(ts_ss_0_R[s].mean(axis=1)))>1.e-5:
        print("Right {}".format(s))

for s in range(ts_ss_1_R.shape[0]):
    if max(abs(ts_ss_1_L[s].mean(axis=1)))>1.e-5:
        print("Left {}".format(s))
    if max(abs(ts_ss_1_R[s].mean(axis=1)))>1.e-5:
        print("Right {}".format(s))

#check zero-mean of wavelet details
for s in range(ts_ms_0_R.shape[0]):
    for j in range(1,ts_ms_0_R.shape[1]):
        if max(abs(ts_ms_0_L[s,j].mean(axis=1)))>1.e-5:
            print("Left, scale {}, subject {}".format(j,s))
        if max(abs(ts_ms_0_R[s,j].mean(axis=1)))>1.e-5:
            print("Right, scale {}, subject {}".format(j,s))

for s in range(ts_ms_1_R.shape[0]):
    for j in range(1,ts_ms_1_R.shape[1]):
        if max(abs(ts_ms_1_L[s,j].mean(axis=1)))>1.e-5:
            print("Left, scale {}, subject {}".format(j,s))
        if max(abs(ts_ms_1_R[s,j].mean(axis=1)))>1.e-5:
            print("Right, scale {}, subject {}".format(j,s))

In [9]:
if replace:
    #single scale (S,K,T)
    save_obj(ts_ss_0_L, "0.0_ts_ss_0_L", processed)
    save_obj(ts_ss_0_R, "0.0_ts_ss_0_R", processed)
    save_obj(ts_ss_1_L, "0.0_ts_ss_1_L", processed)
    save_obj(ts_ss_1_R, "0.0_ts_ss_1_R", processed)

    #multiscale (J+1,S,K,T)
    save_obj(np.transpose(ts_ms_0_L, (1,0,2,3)), "0.0_ts_ms_0_L", processed)
    save_obj(np.transpose(ts_ms_0_R, (1,0,2,3)), "0.0_ts_ms_0_R", processed)
    save_obj(np.transpose(ts_ms_1_L, (1,0,2,3)), "0.0_ts_ms_1_L", processed)
    save_obj(np.transpose(ts_ms_1_R, (1,0,2,3)), "0.0_ts_ms_1_R", processed)