In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from oauth2client.client import GoogleCredentials
from google.colab import auth
import gspread
auth.authenticate_user()
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [4]:
root_path = '/content/drive/MyDrive/Innolab' 

In [5]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import h5py
from sklearn.model_selection import train_test_split
from math import ceil

import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, Tensor
from torch.optim import Adam
from torchsummary import summary


### Prepare dataset

In [6]:
os.chdir(root_path + '/Matlab')
mat_files_names = os.listdir()
conn_matrices = []
worked = []

for i in mat_files_names:
        
    with h5py.File(i, 'r') as f:
        conn_matrices.append(np.array(f.get("Z")))
        worked.append(i)

In [7]:
worksheet = gc.open_by_url('https://docs.google.com/spreadsheets/d/1SKLgfrrpKEyo7d967XHsyYdPuuCo8GWXPQbWb5qjskk/edit#gid=685049469')
sheet = worksheet.worksheet('Tabelle1')
rows = sheet.get_all_values()
df = pd.DataFrame.from_records(rows)
df.columns = df.iloc[0]
df = df.iloc[1:]

In [8]:
has_alzheimer = df.prmdiag.isin(['2','3'])
no_alzheimer = df.prmdiag.isin(['0'])

df.loc[has_alzheimer, 'target'] = 1
df.loc[no_alzheimer, 'target'] = 0
df.dropna(subset=['target'], axis = 0, inplace = True)

In [9]:
ind = df.index.values.tolist()

In [10]:
train_ind = np.random.choice(ind, size= int(len(ind) * 0.8), replace=False).tolist()
test_ind = list(set(ind) - set(train_ind))

In [11]:
train_matrices = []
for i in train_ind:
    train_matrices.append(torch.from_numpy(np.nan_to_num(conn_matrices[i-1])))

In [12]:
test_matrices = []
for i in test_ind:
    test_matrices.append(torch.from_numpy(np.nan_to_num(conn_matrices[i-1])))

In [13]:
feature_train = torch.stack(train_matrices)
feature_test = torch.stack(test_matrices)

In [14]:
feature_train = torch.reshape(feature_train, (-1,1,246,246))
feature_test = torch.reshape(feature_test, (-1,1,246,246))

In [16]:
class ConnectomeTrainDataset(Dataset):
    def __init__(self):
        self.label = df['target'][train_ind].reset_index()['target'].astype('float32')
        self.feature = feature_train
        self.n_samples = feature_train.shape[0]

    def __len__(self):
        return self.n_samples

    def __getitem__(self, idx):
        return self.feature[idx].float(), self.label[idx]

In [17]:
class ConnectomeTestDataset(Dataset):
    def __init__(self):
        self.label = df['target'][test_ind].reset_index()['target'].astype('float32')
        self.feature = feature_test
        self.n_samples = feature_test.shape[0]

    def __len__(self):
        return self.n_samples

    def __getitem__(self, idx):
        return self.feature[idx].float(), self.label[idx]

In [18]:
train_dataset = ConnectomeTrainDataset()
test_dataset = ConnectomeTestDataset()

In [19]:
train_dataset[0][0]

tensor([[[ 0.0000,  0.9720,  0.9620,  ..., -0.1857, -0.0156, -0.2422],
         [ 0.9720,  0.0000,  0.8611,  ..., -0.3752, -0.1386, -0.3591],
         [ 0.9620,  0.8611,  0.0000,  ..., -0.2071,  0.1315, -0.0667],
         ...,
         [-0.1857, -0.3752, -0.2071,  ...,  0.0000, -0.0116,  0.4322],
         [-0.0156, -0.1386,  0.1315,  ..., -0.0116,  0.0000,  0.2506],
         [-0.2422, -0.3591, -0.0667,  ...,  0.4322,  0.2506,  0.0000]]])

In [22]:
torch.save(train_dataset, '/content/drive/MyDrive/Innolab/train.pt')
torch.save(test_dataset, '/content/drive/MyDrive/Innolab/test.pt')

### Train Model

In [37]:
model = nn.Sequential(
    nn.Conv2d(
        in_channels=1,
        out_channels=32,
        kernel_size=(3, 3),
    ),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Conv2d(32,32,(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4),
    nn.Conv2d(32,32,(3,3)),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(25088,32),
    nn.ReLU(),
    nn.Linear(32,1),
    nn.Sigmoid()
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [38]:
model.to(device)

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (7): ReLU()
  (8): Flatten(start_dim=1, end_dim=-1)
  (9): Linear(in_features=25088, out_features=32, bias=True)
  (10): ReLU()
  (11): Linear(in_features=32, out_features=1, bias=True)
  (12): Sigmoid()
)

In [39]:
summary(model, input_size=(1, 246, 246), device=str(device))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 244, 244]             320
              ReLU-2         [-1, 32, 244, 244]               0
         MaxPool2d-3         [-1, 32, 122, 122]               0
            Conv2d-4         [-1, 32, 120, 120]           9,248
              ReLU-5         [-1, 32, 120, 120]               0
         MaxPool2d-6           [-1, 32, 30, 30]               0
            Conv2d-7           [-1, 32, 28, 28]           9,248
              ReLU-8           [-1, 32, 28, 28]               0
           Flatten-9                [-1, 25088]               0
           Linear-10                   [-1, 32]         802,848
             ReLU-11                   [-1, 32]               0
           Linear-12                    [-1, 1]              33
          Sigmoid-13                    [-1, 1]               0
Total params: 821,697
Trainable params:

##### Model Parameters

In [40]:
loss = (
    nn.BCELoss()
)

optimizer = (
    Adam(model.parameters(), weight_decay=0.001)
)

In [41]:
epochs = 20
batch_size = 8
num_workers = 4

max_batches = ceil(len(train_dataset) / batch_size)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
)

  cpuset_checked))


In [42]:
for ep in range(1, epochs + 1):

    total_loss = 0
    num_correct = 0

    for batch_idx, (x, y) in enumerate(train_loader):

        # Push tensors to device
        x = x.to(device)
        y = y.to(device)

        y_hat = model(x).squeeze()
        batch_loss = loss(y_hat ,y)
        #set gradients to zero
        optimizer.zero_grad()
        #backpropagte the error
        batch_loss.backward()
        #apply gradients
        optimizer.step()

        # Print progress every 10 batches
        if batch_idx % 10 == 0:
            print('BATCH:\t({:5} / {:5})\tLOSS:\t{:.3f}'
                  .format(batch_idx, max_batches, float(batch_loss) / batch_size), end='\r')

        total_loss += float(batch_loss)
        num_correct += int(torch.sum(torch.round(y_hat) == y))

    print('EPOCH:\t{:5}\tLOSS:\t{:.3f}\tACCURACY:\t{:.3f}'
          .format(ep, total_loss / len(train_dataset), num_correct / len(train_dataset),
                  end='\r'))

  cpuset_checked))


EPOCH:	    1	LOSS:	0.088	ACCURACY:	0.468
EPOCH:	    2	LOSS:	0.087	ACCURACY:	0.495
EPOCH:	    3	LOSS:	0.087	ACCURACY:	0.508
EPOCH:	    4	LOSS:	0.087	ACCURACY:	0.492
EPOCH:	    5	LOSS:	0.086	ACCURACY:	0.529
EPOCH:	    6	LOSS:	0.087	ACCURACY:	0.521
EPOCH:	    7	LOSS:	0.087	ACCURACY:	0.527
EPOCH:	    8	LOSS:	0.087	ACCURACY:	0.457
EPOCH:	    9	LOSS:	0.087	ACCURACY:	0.497
EPOCH:	   10	LOSS:	0.087	ACCURACY:	0.495
EPOCH:	   11	LOSS:	0.087	ACCURACY:	0.532
EPOCH:	   12	LOSS:	0.086	ACCURACY:	0.513
EPOCH:	   13	LOSS:	0.086	ACCURACY:	0.532
EPOCH:	   14	LOSS:	0.086	ACCURACY:	0.532
EPOCH:	   15	LOSS:	0.086	ACCURACY:	0.548
EPOCH:	   16	LOSS:	0.084	ACCURACY:	0.593
EPOCH:	   17	LOSS:	0.082	ACCURACY:	0.657
EPOCH:	   18	LOSS:	0.080	ACCURACY:	0.649
EPOCH:	   19	LOSS:	0.077	ACCURACY:	0.702
EPOCH:	   20	LOSS:	0.074	ACCURACY:	0.707


In [43]:
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=1024,
    num_workers=4,
)

total_loss = 0
num_correct = 0

for batch_idx, (x, y) in enumerate(train_loader):
    # Push tensors to device
    x = x.to(device)
    y = y.to(device)

    with torch.no_grad():
      y_hat = model(x).squeeze()
      batch_loss = loss(y_hat,y)

    total_loss += float(batch_loss)
    num_correct += int(torch.sum(torch.round(y_hat) == y))

print('EVALUATION LOSS:\t{:.3f}\tEVALUATION: ACCURACY:\t{:.3f}'
          .format(total_loss / len(train_dataset), num_correct / len(train_dataset),
                  end='\r'))

  cpuset_checked))


EVALUATION LOSS:	0.069	EVALUATION: ACCURACY:	0.769
