In [82]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [83]:
# load the data from csv files
def load_from_fragments(name):
    path = 'data/delta_filtered_' + name + '.csv'
    data = pd.read_csv(path)
    return data

In [84]:
data_029b0RMT = load_from_fragments('029b0RMT')
data_117h0RMT = load_from_fragments('117h0RMT')

In [85]:
def get_fragment_correlation(data, fragment):
    data = data[data['fragment'] == fragment]
    data = data.drop(columns=['fragment'])
    print(data.shape)
    # get the pearson correlation matrix
    corr = data.corr()
    return corr

In [86]:
# create a dataframe where we append the correlation matrices for each fragment
def get_correlation_matrix(data):
    fragments = data['fragment'].unique()
    fragments.sort()
    correlation_matrix = pd.DataFrame()
    for fragment in fragments:
        corr = get_fragment_correlation(data, fragment)
        correlation_matrix = pd.concat([correlation_matrix, corr])
    return correlation_matrix

In [87]:
df_data_029b0RMT = get_correlation_matrix(data_029b0RMT)
df_data_117h0RMT = get_correlation_matrix(data_117h0RMT)

(2501, 42)
(2500, 42)
(1347, 42)
(1348, 42)
(1348, 42)
(1347, 42)
(1348, 42)
(1348, 42)
(1347, 42)
(1348, 42)
(1348, 42)
(1348, 42)
(2500, 42)
(2500, 42)
(2501, 74)
(2500, 74)
(1120, 74)
(1121, 74)
(1121, 74)
(1121, 74)
(1121, 74)
(1120, 74)
(1121, 74)
(1121, 74)
(1121, 74)
(1121, 74)
(2500, 74)
(2500, 74)


In [88]:
df_data_029b0RMT.to_csv('data/correlation_matrix_029b0RMT.csv')
df_data_117h0RMT.to_csv('data/correlation_matrix_117h0RMT.csv')

In [89]:
df_data_029b0RMT

Unnamed: 0,TBAL3,TBAL4,TBAR1,TBAR2,TBAR3,TBAR4,TBPL1,TBPL2,TBPL3,TBPL4,...,TR01,TR02,TR03,TR04,TR05,TR06,TR07,TR08,TR09,TR10
TBAL3,1.000000,0.864885,0.479682,0.315727,0.373290,0.390657,0.571856,0.690044,0.723291,0.778958,...,0.190026,0.297138,0.205708,0.258579,0.165265,0.269748,0.303348,0.423357,0.468510,0.456061
TBAL4,0.864885,1.000000,0.662560,0.521054,0.576456,0.607257,0.601851,0.682370,0.596864,0.733939,...,0.351828,0.417489,0.299274,0.343855,0.210825,0.358856,0.424683,0.566889,0.633490,0.621553
TBAR1,0.479682,0.662560,1.000000,0.901407,0.845346,0.814636,0.455730,0.543774,0.496449,0.631789,...,0.416076,0.482099,0.333544,0.372873,0.158555,0.298197,0.447944,0.602933,0.687981,0.652871
TBAR2,0.315727,0.521054,0.901407,1.000000,0.743456,0.760200,0.379714,0.397835,0.375269,0.450217,...,0.481572,0.451959,0.352963,0.346083,0.234479,0.340675,0.427455,0.529863,0.622855,0.611980
TBAR3,0.373290,0.576456,0.845346,0.743456,1.000000,0.869901,0.437791,0.437036,0.389058,0.541408,...,0.366726,0.525122,0.364300,0.389726,0.215917,0.231699,0.519014,0.637547,0.642154,0.573811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TR06,0.398765,0.365322,0.024218,0.069280,-0.047615,0.077905,0.283721,0.343713,0.345758,0.316878,...,0.382760,0.036970,-0.054264,-0.100699,0.778058,1.000000,0.753759,0.410773,0.569239,0.722423
TR07,0.308656,0.323633,0.148968,0.121547,0.134195,0.311001,0.147062,0.222000,0.337900,0.329943,...,0.432492,0.389202,0.344567,0.341203,0.857145,0.753759,1.000000,0.767561,0.734757,0.732722
TR08,0.231095,0.285867,0.206496,0.031031,0.183370,0.394590,0.011283,0.128425,0.278383,0.288971,...,0.282562,0.450818,0.426520,0.479955,0.547638,0.410773,0.767561,1.000000,0.905674,0.753916
TR09,0.355450,0.361299,0.288296,0.102192,0.143802,0.321425,0.122495,0.255871,0.386940,0.383337,...,0.231840,0.290668,0.216210,0.263837,0.498228,0.569239,0.734757,0.905674,1.000000,0.904500


In [90]:
# for each fragment (each the number of columns)
# Binarize the matrix by picking up threshold such a way all the channels are recruited in the network.

def binarize(data, threshold):
    data = data.applymap(lambda x: 1 if x > threshold else 0)
    return data

# binarize the data such that ratio of GNN to the total number of channels (columns) is 1
def binarize_data(data):
    # start from 1 and reduce the threshold by 0.01 until the ratio is 1
    threshold = 1
    while True:
        data_binarized = binarize(data, threshold)
        # convert the data_binarized to a networkx graph
        G = nx.from_pandas_adjacency(data_binarized)
        # check if the graph is connected
        if nx.is_connected(G):
            break
        threshold -= 0.01
    print(threshold)
    # return the binarized data and the threshold
    return data_binarized, threshold


0.5999999999999996


In [None]:
# get the number of columns in df_data_029b0RMT
num_channels = df_data_029b0RMT.shape[1]
print(num_channels)

df_data_binarized_029b0RMT = pd.DataFrame()

# select num_channels rows sequentially from the data
for i in range(0, df_data_029b0RMT.shape[0], num_channels):
    data = df_data_029b0RMT.iloc[i:i+num_channels]
    data_binarized, threshold = binarize_data(data)
    df_data_binarized_029b0RMT = pd.concat([df_data_binarized_029b0RMT, data_binarized])
    print('Fragment:', i//num_channels, 'Threshold:', threshold)

df_data_binarized_029b0RMT.to_csv('data/binarized_029b0RMT.csv')

In [None]:
# get the number of columns in df_data_117h0RMT
num_channels = df_data_117h0RMT.shape[1]
print(num_channels)

df_data_binarized_117h0RMT = pd.DataFrame()

# select num_channels rows sequentially from the data
for i in range(0, df_data_117h0RMT.shape[0], num_channels):
    data = df_data_117h0RMT.iloc[i:i+num_channels]
    data_binarized, threshold = binarize_data(data)
    df_data_binarized_117h0RMT = pd.concat([df_data_binarized_117h0RMT, data_binarized])
    print('Fragment:', i//num_channels, 'Threshold:', threshold)

df_data_binarized_117h0RMT.to_csv('data/binarized_117h0RMT.csv')