In [1]:
import pandas as pd
from MatrixVectorizer import *
import torch

In [2]:
# A_LR_train = pd.read_csv("../data/lr_train.csv")
# A_HR_train = pd.read_csv("../data/hr_train.csv")
# A_LR_test = pd.read_csv("../data/lr_test.csv")

In [3]:
LR_size = 160
HR_size = 268

In [4]:
MatrixVectorizer = MatrixVectorizer()

In [5]:
# num_subject = A_LR_train.shape[0]
# A_LR_train_matrix = np.zeros((num_subject, LR_size, LR_size)) #torch.zeros((num_subject, LR_size, LR_size))
# for i in range(num_subject):
#     A_LR_train_matrix[i] = MatrixVectorizer.anti_vectorize(A_LR_train.iloc[i], LR_size) # torch.from_numpy(MatrixVectorizer.anti_vectorize(A_LR_train.iloc[i], LR_size))

# A_HR_train_matrix = np.zeros((num_subject, HR_size, HR_size)) #torch.zeros((num_subject, LR_size, LR_size))
# for i in range(num_subject):
#     A_HR_train_matrix[i] = MatrixVectorizer.anti_vectorize(A_HR_train.iloc[i], HR_size) 

# num_subject = len(A_LR_test)
# A_LR_test_matrix = np.zeros((num_subject, LR_size, LR_size)) #torch.zeros((num_subject, LR_size, LR_size))
# for i in range(num_subject):
#     A_LR_test_matrix[i] = MatrixVectorizer.anti_vectorize(A_LR_test.iloc[i], LR_size) 

# np.save('A_LR_train_matrix.npy', A_LR_train_matrix)
# np.save('A_HR_train_matrix.npy', A_HR_train_matrix)
# np.save('A_LR_test_matrix.npy', A_LR_test_matrix)

In [6]:
A_LR_train_matrix = np.load('A_LR_train_matrix.npy')
A_HR_train_matrix = np.load('A_HR_train_matrix.npy')
A_LR_test_matrix = np.load("A_LR_test_matrix.npy")

print(A_LR_train_matrix.shape)
print(A_HR_train_matrix.shape)
print(A_LR_test_matrix.shape)

(167, 160, 160)
(167, 268, 268)
(112, 160, 160)


In [7]:
"""Main function of Graph Super-Resolution Network (GSR-Net) framework 
   for predicting high-resolution brain connectomes from low-resolution connectomes. 
    
    ---------------------------------------------------------------------
    
    This file contains the implementation of the training and testing process of our GSR-Net model.
        train(model, optimizer, subjects_adj, subjects_ground_truth, args)

                Inputs:
                        model:        constructor of our GSR-Net model:  model = GSRNet(ks,args)
                                      ks:   array that stores reduction rates of nodes in Graph U-Net pooling layers
                                      args: parsed command line arguments

                        optimizer:    constructor of our model's optimizer (borrowed from PyTorch)  

                        subjects_adj: (n × l x l) tensor stacking LR connectivity matrices of all training subjects
                                       n: the total number of subjects
                                       l: the dimensions of the LR connectivity matrices

                        subjects_ground_truth: (n × h x h) tensor stacking LR connectivity matrices of all training subjects
                                                n: the total number of subjects
                                                h: the dimensions of the LR connectivity matrices

                        args:          parsed command line arguments, to learn more about the arguments run: 
                                       python demo.py --help
                Output:
                        for each epoch, prints out the mean training MSE error


            
        test(model, test_adj,test_ground_truth,args)

                Inputs:
                        test_adj:      (n × l x l) tensor stacking LR connectivity matrices of all testing subjects
                                        n: the total number of subjects
                                        l: the dimensions of the LR connectivity matrices

                        test_ground_truth:      (n × h x h) tensor stacking LR connectivity matrices of all testing subjects
                                                 n: the total number of subjects
                                                 h: the dimensions of the LR connectivity matrices

                        see train method above for model and args.

                Outputs:
                        for each epoch, prints out the mean testing MSE error


    To evaluate our framework we used 5-fold cross-validation strategy.

    ---------------------------------------------------------------------
    Copyright 2020 Megi Isallari, Istanbul Technical University.
    All rights reserved.
    """


import torch
import numpy as np
import torch.optim as optim
from sklearn.model_selection import KFold
from preprocessing import *
from model import *
from train import *
import argparse



epochs = 200


parser = argparse.ArgumentParser(description='GSR-Net')
parser.add_argument('--epochs', type=int, default=epochs, metavar='no_epochs',
                help='number of episode to train ')
parser.add_argument('--lr', type=float, default=0.0001, metavar='lr',
                help='learning rate (default: 0.0001 using Adam Optimizer)')
parser.add_argument('--splits', type=int, default=3, metavar='n_splits',
                help='no of cross validation folds')
parser.add_argument('--lmbda', type=int, default=16, metavar='L',
                help='self-reconstruction error hyperparameter')
parser.add_argument('--lr_dim', type=int, default=LR_size, metavar='N',
                help='adjacency matrix input dimensions')
parser.add_argument('--hr_dim', type=int, default=HR_size, metavar='N',
                help='super-resolved adjacency matrix output dimensions')
parser.add_argument('--hidden_dim', type=int, default=280, metavar='N',
                help='hidden GraphConvolutional layer dimensions')
parser.add_argument('--padding', type=int, default=26, metavar='padding',
                help='dimensions of padding')

# Create an empty Namespace to hold the default arguments
args = parser.parse_args([]) 
print(args)

Namespace(epochs=200, lr=0.0001, splits=3, lmbda=16, lr_dim=160, hr_dim=268, hidden_dim=280, padding=26)


In [8]:
# SIMULATING THE DATA: EDIT TO ENTER YOUR OWN DATA
X = A_LR_train_matrix #np.random.normal(0, 0.5, (167, 160, 160))
Y = A_HR_train_matrix #np.random.normal(0, 0.5, (167, 288, 288))
print(X.shape)
print(Y.shape)

(167, 160, 160)
(167, 268, 268)


In [9]:
device = get_device()
print(device)

cpu


In [10]:
cv = KFold(n_splits=args.splits, random_state=42, shuffle=True)
print("Torch: ")

ks = [0.9, 0.7, 0.6, 0.5]

best_model_fold_list = []
data_fold_list = []
i = 0
for train_index, test_index in cv.split(X):

    print(f"----- Fold {i} -----")

    model = GSRNet(ks, args).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    subjects_adj, test_adj, subjects_ground_truth, test_ground_truth = X[
        train_index], X[test_index], Y[train_index], Y[test_index]
    data_fold_list.append((subjects_adj, test_adj, subjects_ground_truth, test_ground_truth))


    ##################
    # subjects_adj = subjects_adj[:1]
    # subjects_ground_truth = subjects_ground_truth[:1]
    ##################

    return_model = train(model, optimizer, subjects_adj, subjects_ground_truth, args, test_adj, test_ground_truth)
    test(return_model, test_adj, test_ground_truth, args)
    best_model_fold_list.append(return_model)

    i += 1

    # break
    

Torch: 
Epoch: 0, Train Loss: 0.108893, Train Error: 0.235015, Test Error: 0.200097
Epoch: 1, Train Loss: 0.076798, Train Error: 0.201993, Test Error: 0.187153
Epoch: 2, Train Loss: 0.069928, Train Error: 0.194683, Test Error: 0.183119
Epoch: 3, Train Loss: 0.066835, Train Error: 0.191793, Test Error: 0.181274
Epoch: 4, Train Loss: 0.065099, Train Error: 0.190318, Test Error: 0.180233
Epoch: 5, Train Loss: 0.064108, Train Error: 0.189582, Test Error: 0.179738
Epoch: 6, Train Loss: 0.063480, Train Error: 0.189144, Test Error: 0.179513
Epoch: 7, Train Loss: 0.063043, Train Error: 0.188854, Test Error: 0.179449
Epoch: 8, Train Loss: 0.062712, Train Error: 0.188622, Test Error: 0.179400
Epoch: 9, Train Loss: 0.062443, Train Error: 0.188413, Test Error: 0.179294
Epoch: 10, Train Loss: 0.062215, Train Error: 0.188219, Test Error: 0.179194
Epoch: 11, Train Loss: 0.062005, Train Error: 0.188023, Test Error: 0.179081
Epoch: 12, Train Loss: 0.061794, Train Error: 0.187808, Test Error: 0.178938
E

In [11]:
from MatrixVectorizer import MatrixVectorizer

from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr
from scipy.spatial.distance import jensenshannon
import torch
import networkx as nx

def evaluate(pred_matrices, gt_matrices):

    num_test_samples = gt_matrices.shape[0]

    # Initialize lists to store MAEs for each centrality measure
    mae_bc = []
    mae_ec = []
    mae_pc = []

    # # Iterate over each test sample
    # for i in range(num_test_samples):
    #     # Convert adjacency matrices to NetworkX graphs
    #     pred_graph = nx.from_numpy_array(pred_matrices[i], edge_attr="weight")
    #     gt_graph = nx.from_numpy_array(gt_matrices[i], edge_attr="weight")

    #     # Compute centrality measures
    #     pred_bc = nx.betweenness_centrality(pred_graph, weight="weight")
    #     pred_ec = nx.eigenvector_centrality(pred_graph, weight="weight")
    #     pred_pc = nx.pagerank(pred_graph, weight="weight")

    #     gt_bc = nx.betweenness_centrality(gt_graph, weight="weight")
    #     gt_ec = nx.eigenvector_centrality(gt_graph, weight="weight")
    #     gt_pc = nx.pagerank(gt_graph, weight="weight")

    #     # Convert centrality dictionaries to lists
    #     pred_bc_values = list(pred_bc.values())
    #     pred_ec_values = list(pred_ec.values())
    #     pred_pc_values = list(pred_pc.values())

    #     gt_bc_values = list(gt_bc.values())
    #     gt_ec_values = list(gt_ec.values())
    #     gt_pc_values = list(gt_pc.values())

    #     # Compute MAEs
    #     mae_bc.append(mean_absolute_error(pred_bc_values, gt_bc_values))
    #     mae_ec.append(mean_absolute_error(pred_ec_values, gt_ec_values))
    #     mae_pc.append(mean_absolute_error(pred_pc_values, gt_pc_values))

    # # Compute average MAEs
    # avg_mae_bc = sum(mae_bc) / len(mae_bc)
    # avg_mae_ec = sum(mae_ec) / len(mae_ec)
    # avg_mae_pc = sum(mae_pc) / len(mae_pc)

    # vectorize and flatten
    pred_1d = MatrixVectorizer.vectorize(pred_matrices).flatten()
    gt_1d = MatrixVectorizer.vectorize(gt_matrices).flatten()

    mae = mean_absolute_error(pred_1d, gt_1d)
    pcc = pearsonr(pred_1d, gt_1d)[0]
    js_dis = jensenshannon(pred_1d, gt_1d)

    print("MAE: ", mae)
    print("PCC: ", pcc)
    print("Jensen-Shannon Distance: ", js_dis)
    # print("Average MAE betweenness centrality:", avg_mae_bc)
    # print("Average MAE eigenvector centrality:", avg_mae_ec)
    # print("Average MAE PageRank centrality:", avg_mae_pc)
    # return mae, pcc, js_dis, avg_mae_bc, avg_mae_ec, avg_mae_pc



In [12]:
for i in range(args.splits):
    _, test_adjs, _, gt_matrices = data_fold_list[i]
    model = best_model_fold_list[i]
    model.eval()
    pred_matrices = np.zeros(gt_matrices.shape)
    with torch.no_grad():
        for j, test_adj in enumerate(test_adjs):
            pred_matrices[j], _, _, _ = model(torch.from_numpy(test_adj))
    evaluate(pred_matrices, gt_matrices)

MAE:  0.14835803751452897
PCC:  0.5928507311234573
Jensen-Shannon Distance:  0.29957283150860553
MAE:  0.1578448729087319
PCC:  0.5642614149988106
Jensen-Shannon Distance:  0.3078476609803951
MAE:  0.15251106211067725
PCC:  0.586215228686752
Jensen-Shannon Distance:  0.29401755777854227


In [13]:
args.epochs = 110

final_model = GSRNet(ks, args).to(device)
optimizer = optim.Adam(final_model.parameters(), lr=args.lr)

# subjects_adj, test_adj, subjects_ground_truth, test_ground_truth = X[
#     train_index], X[test_index], Y[train_index], Y[test_index]
# data_fold_list.append((subjects_adj, test_adj, subjects_ground_truth, test_ground_truth))


##################
# subjects_adj = subjects_adj[:1]
# subjects_ground_truth = subjects_ground_truth[:1]
##################

final_model = train(final_model, optimizer, X, Y, args)

Epoch: 0, Train Loss: 0.097118, Train Error: 0.222979
Epoch: 1, Train Loss: 0.070837, Train Error: 0.195681
Epoch: 2, Train Loss: 0.066570, Train Error: 0.191245
Epoch: 3, Train Loss: 0.064896, Train Error: 0.189831
Epoch: 4, Train Loss: 0.064007, Train Error: 0.189172


In [None]:
output_pred_list = []
for i in range(A_LR_test_matrix.shape[0]):
    output_pred = final_model(A_LR_test_matrix[i])
    output_pred = MatrixVectorizer.vectorize(output_pred).tolist()
    output_pred_list += output_pred

In [None]:
df = pd.DataFrame({
    "ID": [i+1 for i in range(len(output_pred_list))],
    "Predicted": output_pred_list
})

df

In [None]:
df.to_csv("test.csv", index=False)