In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [14]:
# load the data from csv files
def load_from_fragments(name):
    path = 'data/delta_filtered_' + name + '.csv'
    data = pd.read_csv(path)
    return data

In [15]:
data_029b0RMT = load_from_fragments('029b0RMT')
data_117h0RMT = load_from_fragments('117h0RMT')

In [16]:
def get_fragment_correlation(data, fragment):
    data = data[data['fragment'] == fragment]
    data = data.drop(columns=['fragment'])
    print(data.shape)
    # get the pearson correlation matrix
    corr = data.corr()
    return corr

In [17]:
# create a dataframe where we append the correlation matrices for each fragment
def get_correlation_matrix(data):
    fragments = data['fragment'].unique()
    fragments.sort()
    correlation_matrix = pd.DataFrame()
    for fragment in fragments:
        corr = get_fragment_correlation(data, fragment)
        correlation_matrix = pd.concat([correlation_matrix, corr])
    return correlation_matrix

In [18]:
df_data_029b0RMT = get_correlation_matrix(data_029b0RMT)
df_data_117h0RMT = get_correlation_matrix(data_117h0RMT)

(2501, 42)
(2500, 42)
(1347, 42)
(1348, 42)
(1348, 42)
(1347, 42)
(1348, 42)
(1348, 42)
(1347, 42)
(1348, 42)
(1348, 42)
(1348, 42)
(2500, 42)
(2500, 42)
(2501, 44)
(2500, 44)
(1120, 44)
(1121, 44)
(1121, 44)
(1121, 44)
(1121, 44)
(1120, 44)
(1121, 44)
(1121, 44)
(1121, 44)
(1121, 44)
(2500, 44)
(2500, 44)


In [19]:
df_data_029b0RMT.to_csv('data/correlation_matrix_029b0RMT.csv')
df_data_117h0RMT.to_csv('data/correlation_matrix_117h0RMT.csv')

In [20]:
df_data_029b0RMT

Unnamed: 0,TLL01,TLL02,TLL03,TLL04,TBAL3,TBAL4,TBPL1,TBPL2,TBPL3,TBPL4,...,TBPR3,TBPR4,TBAR1,TBAR2,TBAR3,TBAR4,TLR01,TLR02,TLR03,TLR04
TLL01,1.000000,0.950418,0.917991,0.865616,0.545696,0.699463,0.477446,0.625248,0.536402,0.672009,...,0.810219,0.795296,0.781106,0.666458,0.739332,0.804885,0.862739,0.802840,0.823099,0.628996
TLL02,0.950418,1.000000,0.946645,0.896689,0.556112,0.708679,0.419424,0.606819,0.564799,0.650340,...,0.763246,0.765176,0.755693,0.648190,0.698985,0.783793,0.841659,0.788770,0.799302,0.614288
TLL03,0.917991,0.946645,1.000000,0.934032,0.556939,0.722323,0.400731,0.669874,0.608155,0.683206,...,0.747320,0.744851,0.742293,0.611891,0.694506,0.772279,0.847881,0.770905,0.775984,0.600369
TLL04,0.865616,0.896689,0.934032,1.000000,0.532686,0.657874,0.405061,0.596643,0.578996,0.654217,...,0.724766,0.746846,0.724942,0.617300,0.666755,0.746444,0.833980,0.715908,0.731132,0.604019
TBAL3,0.545696,0.556112,0.556939,0.532686,1.000000,0.864885,0.571856,0.690044,0.723291,0.778958,...,0.313057,0.330387,0.479682,0.315727,0.373290,0.390657,0.423683,0.341850,0.423406,0.341380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TBAR4,0.600260,0.588246,0.548944,0.563537,0.293462,0.453264,0.305257,0.411873,0.308818,0.368603,...,0.589900,0.692415,0.490844,0.471864,0.522643,1.000000,0.573857,0.582701,0.518176,0.286237
TLR01,0.839841,0.815574,0.782066,0.782367,0.474555,0.635701,0.319044,0.506579,0.359734,0.421100,...,0.563711,0.438688,0.575930,0.522049,0.516252,0.573857,1.000000,0.634796,0.854760,0.567504
TLR02,0.638757,0.687175,0.643313,0.625781,0.345522,0.510858,0.163649,0.377522,0.370704,0.390961,...,0.512780,0.549577,0.468164,0.380880,0.411877,0.582701,0.634796,1.000000,0.659288,0.379770
TLR03,0.777208,0.759626,0.715439,0.708261,0.430643,0.591505,0.307650,0.503683,0.329088,0.385661,...,0.655022,0.445155,0.597639,0.556249,0.493935,0.518176,0.854760,0.659288,1.000000,0.659480


In [21]:
df_data_117h0RMT

Unnamed: 0,TLL01,TLL02,TLL03,TLL04,TBAL1,TBAL2,TBAL3,TBAL4,TBPL1,TBPL2,...,TBPR3,TBPR4,TBAR1,TBAR2,TBAR3,TBAR4,TLR01,TLR02,TLR03,TLR04
TLL01,1.000000,0.615789,0.112597,0.521136,0.446883,0.341216,0.295841,0.414056,0.563504,0.210395,...,0.280410,-0.007709,0.345764,0.199917,0.090565,0.323938,0.015046,-0.095216,0.096709,0.267661
TLL02,0.615789,1.000000,0.471827,0.547980,0.358760,0.240196,0.222380,0.424861,0.340921,0.141427,...,0.342110,-0.068069,0.327740,0.234516,0.161320,0.322662,-0.231290,-0.216353,0.046579,0.218101
TLL03,0.112597,0.471827,1.000000,0.429423,0.084162,0.221130,0.208068,0.344741,0.162180,0.157216,...,0.225221,-0.121897,0.230103,0.206354,0.201464,0.252696,-0.178729,-0.065566,0.146693,0.195579
TLL04,0.521136,0.547980,0.429423,1.000000,0.705834,0.604840,0.524008,0.653054,0.701751,0.423781,...,0.119501,-0.080468,0.261989,0.223171,0.226996,0.208889,0.102393,0.054269,0.184537,0.296229
TBAL1,0.446883,0.358760,0.084162,0.705834,1.000000,0.630144,0.552530,0.576129,0.826086,0.504760,...,-0.036915,0.131923,0.015846,-0.011180,0.027649,0.055572,0.195922,0.141268,0.142656,0.124435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TBAR4,0.086347,0.170838,-0.085525,-0.224073,0.000885,-0.172820,-0.007767,-0.083505,0.010533,0.041536,...,0.651904,0.133872,-0.510955,-0.430666,-0.228636,1.000000,-0.273002,-0.115475,0.344056,0.455281
TLR01,0.192436,-0.027114,0.098165,0.593563,0.322672,0.521101,-0.030254,0.265622,0.403458,0.023889,...,-0.302619,-0.147519,0.661790,0.712451,0.760776,-0.273002,1.000000,0.843198,0.070458,0.238080
TLR02,0.089603,0.005558,0.119545,0.485000,0.213784,0.406642,-0.064400,0.235875,0.216908,-0.117182,...,-0.172220,-0.166878,0.511496,0.623650,0.789792,-0.115475,0.843198,1.000000,0.236718,0.367847
TLR03,0.014479,0.136462,0.074715,0.068218,0.100465,-0.028903,0.090261,-0.005085,0.167065,0.041229,...,0.331575,0.140952,-0.122944,0.000742,0.107038,0.344056,0.070458,0.236718,1.000000,0.773572


In [22]:
# for each fragment (each the number of columns)
# Binarize the matrix by picking up threshold such a way all the channels are recruited in the network.

def binarize(data, threshold):
    data = data.applymap(lambda x: 1 if x > threshold else 0)
    return data

# binarize the data such that ratio of GNN to the total number of channels (columns) is 1
def binarize_data(data):
    # start from 1 and reduce the threshold by 0.01 until the ratio is 1
    threshold = 1
    while True:
        data_binarized = binarize(data, threshold)
        # convert the data_binarized to a networkx graph
        G = nx.from_pandas_adjacency(data_binarized)
        # check if the graph is connected
        if nx.is_connected(G):
            break
        threshold -= 0.01
    print(threshold)
    # return the binarized data and the threshold
    return data_binarized, threshold


In [23]:
# get the number of columns in df_data_029b0RMT
num_channels = df_data_029b0RMT.shape[1]
print(num_channels)

df_data_binarized_029b0RMT = pd.DataFrame()

# select num_channels rows sequentially from the data
for i in range(0, df_data_029b0RMT.shape[0], num_channels):
    data = df_data_029b0RMT.iloc[i:i+num_channels]
    data_binarized, threshold = binarize_data(data)
    df_data_binarized_029b0RMT = pd.concat([df_data_binarized_029b0RMT, data_binarized])
    print('Fragment:', i//num_channels, 'Threshold:', threshold)

df_data_binarized_029b0RMT.to_csv('data/binarized_029b0RMT.csv')

42
0.5999999999999996
Fragment: 0 Threshold: 0.5999999999999996
0.38999999999999946
Fragment: 1 Threshold: 0.38999999999999946
0.36999999999999944
Fragment: 2 Threshold: 0.36999999999999944
0.4599999999999995
Fragment: 3 Threshold: 0.4599999999999995
0.4599999999999995
Fragment: 4 Threshold: 0.4599999999999995
0.5199999999999996
Fragment: 5 Threshold: 0.5199999999999996
0.38999999999999946
Fragment: 6 Threshold: 0.38999999999999946
0.49999999999999956
Fragment: 7 Threshold: 0.49999999999999956
0.48999999999999955
Fragment: 8 Threshold: 0.48999999999999955
0.4599999999999995
Fragment: 9 Threshold: 0.4599999999999995
0.5099999999999996
Fragment: 10 Threshold: 0.5099999999999996
0.3099999999999994
Fragment: 11 Threshold: 0.3099999999999994
0.3499999999999994
Fragment: 12 Threshold: 0.3499999999999994
0.4399999999999995
Fragment: 13 Threshold: 0.4399999999999995


In [24]:
# get the number of columns in df_data_117h0RMT
num_channels = df_data_117h0RMT.shape[1]
print(num_channels)

df_data_binarized_117h0RMT = pd.DataFrame()

# select num_channels rows sequentially from the data
for i in range(0, df_data_117h0RMT.shape[0], num_channels):
    data = df_data_117h0RMT.iloc[i:i+num_channels]
    data_binarized, threshold = binarize_data(data)
    df_data_binarized_117h0RMT = pd.concat([df_data_binarized_117h0RMT, data_binarized])
    print('Fragment:', i//num_channels, 'Threshold:', threshold)

df_data_binarized_117h0RMT.to_csv('data/binarized_117h0RMT.csv')

44
0.3399999999999994
Fragment: 0 Threshold: 0.3399999999999994
0.47999999999999954
Fragment: 1 Threshold: 0.47999999999999954
0.4299999999999995
Fragment: 2 Threshold: 0.4299999999999995
0.4099999999999995
Fragment: 3 Threshold: 0.4099999999999995
0.38999999999999946
Fragment: 4 Threshold: 0.38999999999999946
0.5399999999999996
Fragment: 5 Threshold: 0.5399999999999996
0.46999999999999953
Fragment: 6 Threshold: 0.46999999999999953
0.49999999999999956
Fragment: 7 Threshold: 0.49999999999999956
0.6199999999999997
Fragment: 8 Threshold: 0.6199999999999997
0.5199999999999996
Fragment: 9 Threshold: 0.5199999999999996
0.46999999999999953
Fragment: 10 Threshold: 0.46999999999999953
0.5799999999999996
Fragment: 11 Threshold: 0.5799999999999996
0.1999999999999993
Fragment: 12 Threshold: 0.1999999999999993
0.3099999999999994
Fragment: 13 Threshold: 0.3099999999999994
