In [1]:
#Imports
#Dataprep
import matplotlib.pyplot as plt
import numpy as np
from nilearn import datasets, plotting, image
from nilearn.maskers import NiftiMapsMasker
from sklearn.decomposition import FastICA
import pandas as pd
import tarfile
import gzip
#GNN required
import torch
import torch.nn.functional as F
from torch.nn import Linear
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

## Data Clean from HCP Data notebook

In [2]:
hcp_file = 'Data/netmats_3T_HCP1200_MSMAll_ICAd100_ts2.tar.gz'

In [3]:
# Read Network Matricies
data_raw = []

with gzip.open(hcp_file, 'rb') as gz_file:
    with tarfile.open(fileobj=gz_file, mode='r') as tar:
        
        # Extract a text file
        for file in tar.getnames():
            if file.endswith('.txt'):
                # Read the content and load it into a NumPy array
                subjects = np.loadtxt(tar.extractfile(file), dtype=float)
                
                ICAd = int(np.sqrt(subjects.shape[1]))
                
                # Convert into matrix
                matrix = np.array([np.reshape(i, (ICAd, ICAd)) for i in subjects])
                data_raw.append(matrix)
net_mat2 = data_raw[1]

In [4]:
id_file = 'Data/IDs/subjectIDs.txt'
id_recon1_file = 'Data/IDs/subjectIDs_recon1.txt'
id_recon2_file = 'Data/IDs/subjectIDs_recon2.txt'
subject_IDs = np.loadtxt(id_file, dtype=int)
subject_data = pd.read_csv('Data/subjects.csv', index_col = 'Subject')
subject_data = subject_data[['Gender', 'Age']]
net_mat_data = pd.Series(dict(zip(subject_IDs, net_mat2))).to_frame().rename(columns = {0 : 'netmat'})
data = subject_data.join(net_mat_data, how = 'right')

In [5]:
data

Unnamed: 0,Gender,Age,netmat
100206,M,26-30,"[[0.0, 0.61676, 9.5727, -5.4959, 0.34639, 3.00..."
100307,F,26-30,"[[0.0, -0.29664, 17.317, -9.0467, -0.28723, 1...."
100408,M,31-35,"[[0.0, 1.6486, 6.6189, -8.8877, 1.4337, 1.006,..."
100610,M,26-30,"[[0.0, -0.90275, 7.7215, -8.3907, 3.3144, 2.93..."
101006,F,31-35,"[[0.0, -0.088768, 9.4979, -10.412, 1.0646, 4.3..."
...,...,...,...
992673,F,31-35,"[[0.0, -0.11536, 7.1338, -5.5322, 0.34004, 1.6..."
992774,M,31-35,"[[0.0, 0.25353, 8.0265, -6.2072, 4.1589, 1.582..."
993675,F,26-30,"[[0.0, -1.0378, 10.709, -3.3224, -0.090704, 2...."
994273,M,26-30,"[[0.0, 0.64613, 11.471, -5.5137, 1.9357, 4.259..."


## Pytorch data preparing

In [44]:
data['Gender'] = data['Gender'].replace({"M":0, "F":1})

In [49]:
data['Gender']

100206    0
100307    1
100408    0
100610    0
101006    1
         ..
992673    1
992774    0
993675    1
994273    0
996782    1
Name: Gender, Length: 1003, dtype: int64

In [45]:
num_components = data['netmat'].iloc[0][0].size #100 for ICA100
edges = torch.tensor([[i, j] for i in range(num_components) for j in range(num_components)], dtype=torch.long).t().contiguous()

In [50]:
labels = torch.tensor(data['Gender'].values)
labels

tensor([0, 1, 0,  ..., 1, 0, 1])

In [58]:
netmats = data['netmat']

# Custom dataset class
class CorrelationDataset(torch.utils.data.Dataset):
    def __init__(self, netmats):
        self.netmats = netmats
        self.labels = labels

    def __len__(self):
        return len(self.netmats)

    def __getitem__(self, idx):
        nmap = self.netmats.iloc[idx]

        # Convert the correlation matrix to an edge list
        edge_index = edges

        # Create a graph data object
        graph_data = Data(x=nmap, edge_index=edge_index)

        return graph_data

# Create the custom dataset
custom_dataset = CorrelationDataset(netmats)
loader = DataLoader(custom_dataset, batch_size=1, shuffle=True)

In [59]:
#one data point
t_data = Data(x=torch.tensor(data['netmat'].iloc[0], dtype=torch.float32), edge_index=edges)
graphs = Data(x=None, edge_index=edges, y=labels)

In [113]:
class GCN(nn.Module):
    def __init__(self,):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(100, 25)
        self.conv2 = GCNConv(25, 16)
        self.classifier = Linear(16, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        print(f"size of x[0]: {len(x[0])}") #PRINT
        x = torch.tensor(np.array(x[0]))
        print(f"x[0]: {len(x[0])}") #PRINT
        x = x.view(x.size(0), -1) #reshape
        print(x) #PRINT
        
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = self.classifier(x.float())
        return x

In [114]:
model = GCN()

In [115]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [116]:
len(custom_dataset)

1003

In [117]:
test = iter(loader)
print(next(test).x[0])

[[ 0.       -0.84163   6.8928   ... -1.1801    0.42183  -1.1576  ]
 [-0.84163   0.        0.78394  ...  0.38457   0.1311   -0.22615 ]
 [ 6.8928    0.78394   0.       ...  1.9666   -0.030353 -1.1921  ]
 ...
 [-1.1801    0.38457   1.9666   ...  0.        0.46366   2.9007  ]
 [ 0.42183   0.1311   -0.030353 ...  0.46366   0.        0.28287 ]
 [-1.1576   -0.22615  -1.1921   ...  2.9007    0.28287   0.      ]]


In [118]:
model(next(test))

size of x[0]: 100
x[0]: 100
tensor([[ 0.0000, -1.1206,  8.9821,  ...,  0.6712,  0.6850,  0.0866],
        [-1.1206,  0.0000,  0.5039,  ..., -2.1170, -0.5375, -0.3916],
        [ 8.9821,  0.5039,  0.0000,  ...,  0.5997, -0.6837, -1.8861],
        ...,
        [ 0.6712, -2.1170,  0.5997,  ...,  0.0000,  3.3838,  5.4309],
        [ 0.6850, -0.5375, -0.6837,  ...,  3.3838,  0.0000,  0.1153],
        [ 0.0866, -0.3916, -1.8861,  ...,  5.4309,  0.1153,  0.0000]],
       dtype=torch.float64)


RuntimeError: expected scalar type Double but found Float

In [119]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for i_data in loader:
        print(i_data)
        optimizer.zero_grad()
        out = model(i_data)
        loss = criterion(out.view(-1), i_data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(loader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}')

DataBatch(x=[1], edge_index=[2, 10000], batch=[100], ptr=[2])
size of x[0]: 100
x[0]: 100
tensor([[ 0.0000, -0.5410, 13.6740,  ...,  0.9629, -1.7354,  0.1116],
        [-0.5410,  0.0000,  2.2294,  ...,  0.1051, -0.5452, -1.7550],
        [13.6740,  2.2294,  0.0000,  ...,  0.0930,  0.1709,  0.3577],
        ...,
        [ 0.9629,  0.1051,  0.0930,  ...,  0.0000,  0.2170,  2.0699],
        [-1.7354, -0.5452,  0.1709,  ...,  0.2170,  0.0000,  0.0786],
        [ 0.1116, -1.7550,  0.3577,  ...,  2.0699,  0.0786,  0.0000]],
       dtype=torch.float64)


RuntimeError: expected scalar type Double but found Float