In [1]:
import math
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import pandas as pd

from entities.graphs.data_reader import read_record
from entities.graphs.graph_builder import RawAndPearson, MomentsAndPearson

In [2]:
PATH = "C:/Projects/TFM/dataset/AD_MCI_HC_WINDOWED"
INDEX_PATH = "C:/Projects/TFM/dataset/AD_MCI_HC_WINDOWED/data.csv"

In [3]:
hyperparams = {
    "batch_size": 64,
    "lr": 0.1,
    "momentum": 0.9,
    "epochs": 100
}

In [4]:
from sklearn.model_selection import train_test_split

# Removing MCI patients to retrieve only HC and AD subjects for binary classification
indices = pd.read_csv(INDEX_PATH, index_col="Unnamed: 0")
indices = indices.drop(indices[indices.label == "MCI"].index)
indices_hc = indices[indices.label == 'HC'].sample(frac=0.4)
indices_ad = indices[indices.label == 'AD']
indices = pd.concat([indices_hc, indices_ad])

train_data, test_data = train_test_split(indices, shuffle=True)

In [5]:

class EEGDataset(Dataset):
    def __init__(self, indices, builder, transform=None):
        self.index_df = indices
        self.transform = transform
        self.builder = builder
        
    def __len__(self):
        return len(self.index_df)
    
    def __getitem__(self, idx):
        current_path = self.index_df.iloc[idx]["path"]
        raw_data = read_record(current_path)
        label = self.index_df.iloc[idx]["label"]
        data = self.builder.build(raw_data, label)
        sample = {
            "x": data.x,
            "edge_attr": data.edge_attr,
            "edge_index": data.edge_index,
            "label": data.label
        }
        return sample

In [6]:
transform = transforms.Compose(
    [transforms.ToTensor()]
) 
# Split using tensorflow: Shuffling not that good, more useful when using three classes instead of 2
"""eeg_dataset = EEGDataset(INDEX_PATH, PATH, RawAndPearson(normalize_nodes=True, normalize_edges=True), transform=transform)
train_size = math.floor(len(eeg_dataset) * 0.7)
test_szie = len(eeg_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(eeg_dataset, [train_size, test_szie])"""
#builder = RawAndPearson(normalize_nodes=True, normalize_edges=True)
builder = MomentsAndPearson()
train_dataset = EEGDataset(train_data, builder, transform=transform)
test_dataset = EEGDataset(test_data, builder, transform=transform)

In [7]:
for i, sample in enumerate(train_dataset):
    print(f"Element {i}: Nodes: {sample['x'].size()}, Edge attributes: {sample['edge_attr'].size()}, Label: {sample['label']}")
    if i == 10: break

Element 0: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 1: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 2: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 3: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 4: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 1
Element 5: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 6: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 1
Element 7: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 8: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 0
Element 9: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 1
Element 10: Nodes: torch.Size([19, 6]), Edge attributes: torch.Size([19, 19]), Label: 1


In [8]:
train_dataset = DataLoader(
    train_dataset, 
    batch_size=hyperparams["batch_size"],
    shuffle=True
)

test_dataset = DataLoader(
    test_dataset, 
    batch_size=hyperparams["batch_size"],
    shuffle=True
)

In [9]:
for i_batch, sample_batched in enumerate(train_dataset):
    print(i_batch, sample_batched["x"].size(), sample_batched["edge_attr"].size(), sample_batched["label"].size())
    if i_batch == 3: break

0 torch.Size([64, 19, 6]) torch.Size([64, 19, 19]) torch.Size([64])
1 torch.Size([64, 19, 6]) torch.Size([64, 19, 19]) torch.Size([64])
2 torch.Size([64, 19, 6]) torch.Size([64, 19, 19]) torch.Size([64])
3 torch.Size([64, 19, 6]) torch.Size([64, 19, 19]) torch.Size([64])


In [10]:
# Define the network model: 
class EEGSmall(nn.Module):

    def __init__(self, batch_size=8, **kwargs):
        super(EEGSmall, self).__init__()
       
        #self.batch_norm = BatchNorm1d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        # Fully connected layers
        input_size = kwargs.get("input_size", -1)
        output_size = kwargs.get("output_size", 2)
        
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_size)
        
        # Xavier initializacion for fully connected layers
        self.fc1.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1) if isinstance(x, nn.Linear) else None)
        self.fc2.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1) if isinstance(x, nn.Linear) else None)
        self.fc3.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1) if isinstance(x, nn.Linear) else None)
        
        
    def forward(self, x, edge_index, edge_weigth, batch):
       
        # Perform batch normalization
        #x = F.leaky_relu(self.batch_norm(x), negative_slope=0.01)
        #x = F.dropout(batch_norm_out, p=0.2, training=self.training)
        # Global add pooling
        #mean_pool = global_add_pool(x, batch=batch)
        
        #print("Original nodes size: ", x.size())
        #print(batch.shape)
        x = x.view(x.shape[0], -1)
        #print("Nodes size after view: ", x.size())
        # Apply fully connected layters
        out = F.leaky_relu(self.fc1(x), negative_slope=0.01)
        #out = F.dropout(out, p = 0.2, training=self.training)
        
        out = F.leaky_relu(self.fc2(out), negative_slope=0.01)
        #out = F.dropout(out, p = 0.2, training=self.training)
        
        out = self.fc3(out)
        return out

In [11]:
# Create the model: 
N_CHANNELS = 19
N_FEATURES = 6
N_CLASSES = 2

input_size = N_CHANNELS * N_FEATURES

model = EEGSmall(input_size=input_size, output_size=N_CLASSES)
model = model.double()

In [19]:
import numpy as np

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Trainable parameters: ", params)
model

Trainable parameters:  9506


EEGSmall(
  (fc1): Linear(in_features=114, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=2, bias=True)
)

In [20]:
# Define loss, optimizer and scheduler (if used)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=hyperparams["lr"], momentum=hyperparams["momentum"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', verbose=True, patience=3)

In [21]:
# Nightmare cell where everything goes wrong.
# Here I try to train the network but there are only two possible scenarios: 

#   1. The accuracy is bullshit (very often)
#   2. The network throws an error that i do not fucking know how to solve

# Have luck!
# fails_counter = 5  ----> Add 1 whenever you fail ---- 10/06/2022
from sklearn.metrics import roc_auc_score, balanced_accuracy_score
from numpy import mean
import numpy as np

torch.autograd.set_detect_anomaly(True)

auroc_train_history = []
auroc_test_history = []
balACC_train_history = []

balACC_test_history = []
loss_train_history = []
loss_test_history = []

train_accs = []
test_accs = []

_NUM_EPOCHS = 120
_DEVICE = torch.device("cpu")


def train():
    model.train()
    running_loss = 0.0
    batch_loss = []
    for i, data in enumerate(train_dataset):  # Iterate in batches over the training dataset.

        #data.batch = data.batch.view(data.batch.shape[0], -1)
        #print(data['x'].shape)
        optimizer.zero_grad()  # Clear gradients.
        out = model(data['x'], data['edge_index'], data['edge_attr'], data)  # Perform a single forward pass.
        #print("Output shape: ", out.size())
        loss = criterion(out, data['label'])  # Compute the loss.
        print("Prediction: ", out, "Target: ", data["label"])
        batch_loss.append(loss.item())
        loss.backward()  # Derive gradients.
        print("Loss: ", loss.item())
          # Update parameters based on gradients.
        optimizer.step()
        
        running_loss += loss.item()
        if i%100 == 99:
            print(f'Epoch: {epoch + 1} - Iteration: {i + 1:5d} loss: {running_loss / 100:.3f}')
            running_loss = 0.0
    print(f"Training epoch {epoch}: {mean(batch_loss):.3f} loss")

def test(loader):
    model.eval()

    correct = 0
    y_probs_train = torch.empty(0, 2).to(_DEVICE)
    y_true_train, y_pred_train = [], []
    with torch.no_grad():
        for data in loader:  # Iterate in batches over the training/test dataset.
            out = model(data['x'], data['edge_index'], data['edge_attr'], data)
            y_batch = data['label'].to(device=_DEVICE, non_blocking=True)
            
            pred = out.argmax(dim=1)  # Use the class with highest probability.
            
            
            correct += int((pred == data['label']).sum())  # Check against ground-truth labels.
            y_pred_train += pred.cpu().numpy().tolist()
            
            y_probs_train = torch.cat((y_probs_train, out.data), 0)
            y_true_train += y_batch.cpu().numpy().tolist()
            
    y_probs_train = torch.nn.functional.softmax(y_probs_train, dim=1).cpu().numpy()
    y_true_train = np.array(y_true_train)

    return correct / len(loader.dataset), y_true_train, y_probs_train, y_pred_train  # Derive ratio of correct predictions.


for epoch in range(hyperparams["epochs"]):
    train()
    
    train_acc, y_true_train, y_probs_train, y_pred_train = test(train_dataset)
    test_acc, y_true_test, y_probs_test, y_pred_test = test(test_dataset)
    train_accs.append(train_acc)
    test_accs.append(test_acc)

    balACC_train_history.append(balanced_accuracy_score(y_true_train, y_pred_train))
    balACC_test_history.append(balanced_accuracy_score(y_true_test, y_pred_test))

    print(f"Train Bal.ACC: {balACC_train_history[-1]:.3f}, test Bal.ACC: {balACC_test_history[-1]:.3f}")
    
    
    print(
        f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}'
    )
    scheduler.step(test_acc)

  File "c:\users\lokix\appdata\local\programs\python\python39\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\users\lokix\appdata\local\programs\python\python39\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\Users\lokix\Envs\tfm\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\Users\lokix\Envs\tfm\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
    app.start()
  File "c:\Users\lokix\Envs\tfm\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
    self.io_loop.start()
  File "c:\Users\lokix\Envs\tfm\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "c:\users\lokix\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 596, in run_forever
    self._run_once()
  File "c:\users\lokix\appdata\local\programs\python\python39\lib\async

RuntimeError: Function 'LogSoftmaxBackward0' returned nan values in its 0th output.

In [4]:
import torch
from itertools import product
from torch import nn
from torch_geometric.nn import GCNConv


lstm = nn.LSTM(1280, 512, 1)
lstm2 = nn.LSTM(512, 256, 3) 
fc1 = nn.Linear(256, 128)
conv = GCNConv(128, 64)

edge_index = torch.tensor(
            [[a, b] for a, b in product(range(19), range(19))]
        ).t().contiguous()

input = torch.rand(1, 19, 1280)
print(input.size())

output, hidden = lstm(input)
print(output.size())
print("Hidden state output LSTM 1 gate: ")
print(len(hidden))
print(hidden[0].size())
print("\n\n")


output, hidden = lstm2(output)
print(output.size())
print("Hidden state output LSTM 3 gates: ")
print(len(hidden))
print(hidden[0].size())

print("\n\n")
output = fc1(output)
print(output.size())

output = conv(output, edge_index)
print(output.size())


torch.Size([1, 19, 1280])
torch.Size([1, 19, 512])
Hidden state output LSTM 1 gate: 
2
torch.Size([1, 19, 512])



torch.Size([1, 19, 256])
Hidden state output LSTM 3 gates: 
2
torch.Size([3, 19, 256])



torch.Size([1, 19, 128])
torch.Size([1, 19, 64])


In [2]:
import random
import torch
import numpy as np
from autogl.datasets import build_dataset_from_name, utils
from autogl.solver import AutoGraphClassifier
from autogl.module import Acc
from autogl.backend import DependentBackend
if DependentBackend.is_pyg():
    from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset
else:
    from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset


In [3]:
from torch.utils.data import Dataset
from entities.graphs.graph_builder import RawAndPearson, MomentsAndPearson, MomentsAndPLI, RawAndPLI, PSDAndCSD, PSDAndPearson, OfflineGeneric
from entities.graphs.data_reader import read_record

In [4]:
class BaseDataset(Dataset):
    def __init__(self, indices, builder, transform=None, target_transform=None):
        self.indices = indices
        self.builder = builder
        self.transform = transform
        self.target_transform = target_transform
        

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        current_path = self.indices.iloc[idx]["path"]
        raw_data = read_record(current_path)
        label = self.indices.iloc[idx]["label"]
        data = self.builder.build(raw_data, label)
        return data
    
class OfflineDataset(Dataset):
    def __init__(self, node_indices, edge_indices, builder, transform=None, target_transform=None):
        self.node_indices = node_indices
        self.edge_indices = edge_indices
        self.builder = builder
        self.transform = transform
        self.target_transform = target_transform
        

    def __len__(self):
        return len(self.node_indices)

    def __getitem__(self, idx):
        current_path_nodes = self.node_indices.iloc[idx]["path"]
        computed_nodes = read_record(current_path_nodes)
        
        current_path_edges = self.edge_indices.iloc[idx]["path"]
        computed_edges = read_record(current_path_edges)
        
        label = self.node_indices.iloc[idx]["label"]
        data = self.builder.build(computed_nodes, computed_edges, label)
        
        return data

In [5]:
from sklearn.model_selection import train_test_split
import pandas as pd

MODE = "OFFLINE"

if MODE == "OFFLINE":

    NODE_INDEX_PATH = "C:/Projects/TFM/dataset/AD_MCI_HC_PSD/data.csv"
    EDGE_INDEX_PATH = "C:/Projects/TFM/dataset/AD_MCI_HC_PEARSON/data.csv"

    node_indices = pd.read_csv(NODE_INDEX_PATH, index_col="Unnamed: 0")
    edge_indices = pd.read_csv(EDGE_INDEX_PATH, index_col="Unnamed: 0")

    node_indices = node_indices.drop(node_indices[node_indices.label == "MCI"].index)
    node_indices_hc = node_indices[node_indices.label == 'HC'].sample(frac=0.4)
    node_indices_ad = node_indices[node_indices.label == 'AD']
    node_indices = pd.concat([node_indices_hc, node_indices_ad])

    node_train_indices, node_test_indices = train_test_split(node_indices, shuffle=True)
    edge_train_indices, edge_test_indices = edge_indices.iloc[node_train_indices.index], edge_indices.iloc[node_test_indices.index]

    builder = OfflineGeneric(th=None)
    
    train_dataset = OfflineDataset(node_train_indices, edge_train_indices, builder)
    test_dataset = OfflineDataset(node_test_indices, edge_test_indices, builder)
    
if MODE == "ONLINE":
    indices = pd.read_csv(INDEX_PATH, index_col="Unnamed: 0")
    indices = indices.drop(indices[indices.label == "MCI"].index)
    indices_hc = indices[indices.label == 'HC'].sample(frac=0.4)
    indices_ad = indices[indices.label == 'AD']
    indices = pd.concat([indices_hc, indices_ad])

    train_data, test_data = train_test_split(indices, shuffle=True)

    builder = RawAndPearson(normalize_nodes=True, normalize_edges=False, th=0)
    #builder = MomentsAndPearson(th=0)
    #builder = MomentsAndPLI()
    #builder = RawAndPLI(normalize_nodes=True, normalize_edges=False)
    #builder = PSDAndCSD()
    #builder = PSDAndPearson(th=0.5)

    train_dataset = BaseDataset(train_data, builder)
    test_dataset = BaseDataset(test_data, builder)
    train_data

In [10]:
from autogl.module.model.pyg import AutoGIN
# from autogl.module.model.dgl import AutoGIN  # dgl version
model = AutoGIN(
                num_features=6,
                num_classes=2,
                num_graph_features=0,
                init=False
            ).from_hyper_parameter({
                # hp from model
                "num_layers": 5,
                "hidden": [64,64,64,64],
                "dropout": 0.5,
                "act": "relu",
                "eps": "False",
                "mlp_layers": 2,
                "neighbor_pooling_type": "sum",
                "graph_pooling_type": "sum"
            }).model
            
model = model.double()

In [11]:
from torch_geometric.loader import DataLoader

_BATCH_SIZE = 64
train_dataloader = DataLoader(train_dataset, batch_size=_BATCH_SIZE, shuffle=True)#sampler=weighted_sampler)
test_dataloader = DataLoader(test_dataset, batch_size=_BATCH_SIZE, shuffle=True)

In [13]:
import torch.nn.functional as F

# Define the loss optimizer.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
device = torch.device('cpu')
# Training
for epoch in range(100):
    model.train()
    for data in train_dataloader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, data.label)
        loss.backward()
        optimizer.step()

In [16]:
def test(model, loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data)
        pred = output.max(dim=1)[1]
        correct += pred.eq(data.label).sum().item()
    return correct / len(loader.dataset)

acc = test(model, test_dataloader)


0.5198501872659176

In [None]:
solver = AutoGraphClassifier(
            feature_module=None,
            graph_models=[],
            hpo_module='random',
            ensemble_module=None,
            device=device, max_evals=1,
            trainer_hp_space = fixed(
                **{
                    # hp from trainer
                    "max_epoch": 50,
                    "batch_size": 64,
                    "early_stopping_round": 50 + 1,
                    "lr": 0.01,
                    "weight_decay": 0,
                }
            ),
            model_hp_spaces=[
                fixed(**{
                    # hp from model
                    "num_layers": 5,
                    "hidden": [64,64,64,64],
                    "dropout": 0.5,
                    "act": "relu",
                    "eps": "False",
                    "mlp_layers": 2,
                    "neighbor_pooling_type": "sum",
                    "graph_pooling_type": "sum"
                }) if args.model == 'gin' else fixed(**{
                    "ratio": 0.8,
                    "dropout": 0.5,
                    "act": "relu"
                }),
            ]
        )

# fit auto model
solver.fit(dataset, evaluation_method=['acc'])