In [1]:
###-------------------------------------------------------------------------------------------------------------------
#         imports
###-------------------------------------------------------------------------------------------------------------------
import sys
sys.path.append('../code/')
from training import *
from models import *
from utils import * 
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from pathlib import Path

## Data path ##
DATA_PATH = (Path.cwd().parent / "DATA").resolve() # TODO : adapt to server
print(f"Data path: {DATA_PATH}")
DATA_PATH = str(DATA_PATH)

Data path: C:\Users\Cyril\Desktop\Code\MIPLab-TeamCEE-DeepLearningforBiomed\DATA


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
###-------------------------------------------------------------------------------------------------------------------
#         hyperparameters
###-------------------------------------------------------------------------------------------------------------------

config = {
    # general
    "epochs": 100,
    "batch_size": 4,
    "lr": 1e-3,

    # model
    "d_model_input": 400,
    "d_model_intermediate": 512,
    "d_model_task_output": 8,
    "d_model_fingerprint_output": None, # needs to be determined from data
    "dropout" : 0.1,
    "attention_dropout" : 0.1,
    "num_heads": 4,
    "num_layers": 0, # TBA?

    # optimizer
    "lambda_si": 0.5,
    "lambda_td": 0.5
}

In [4]:
###-------------------------------------------------------------------------------------------------------------------
#         subject ID list
###-------------------------------------------------------------------------------------------------------------------

IDs = [100307,  117122,  131722,  153025,  211720,
100408,  118528,  133019,  154734,  212318,      
101107,  118730,  133928,  156637,  214423,        
101309,  118932,  135225,  159340,  221319,       
101915,  120111,  135932,  160123,  239944 ,      
103111,  122317,  136833,  161731,  245333,        
103414,  122620,  138534,  162733,  280739,        
103818, 123117,  139637,  163129,  298051,        
105014,  123925,  140925,  176542,  366446,        
105115,  124422,  144832,  178950,  397760,        
106016,  125525,  146432,  188347,  414229,        
108828,  126325,  147737,  189450,  499566,
110411,  127630,  148335,  190031,  654754,
111312,  127933,  148840,  192540,  672756,
111716,  128127,  149337,  196750,  751348,
113619,  128632,  149539,  198451,  756055,
113922,  129028,  149741,  199655,  792564,
114419,  130013,  151223,  201111,  856766,
115320,  130316,  151526,  208226,  857263]

In [5]:
###-------------------------------------------------------------------------------------------------------------------
#         joining train and test dataframes from all subjects
###-------------------------------------------------------------------------------------------------------------------

# data_dict_train, data_dict_test = get_dict_raw_data(DATA_PATH, IDs[0:3])
data_df_train, data_df_test = get_df_raw_data(DATA_PATH, [IDs[0], IDs[5], IDs[10]])
display(data_df_train.head(10))

Unnamed: 0,subject_id,task_id,mat
0,100307,REST1,"[[0.21854491103466994, 0.07509374392964863, 0...."
1,100307,REST2,"[[0.2509722712619662, 0.06429771271159306, 0.1..."
2,100307,EMOTION,"[[0.27626702525883573, 0.03827488524289221, 0...."
3,100307,GAMBLING,"[[0.2356709594115424, 0.03883497545044236, 0.1..."
4,100307,LANGUAGE,"[[0.2317390561241142, 0.06537822245634475, 0.0..."
5,100307,MOTOR,"[[0.2141270371266362, 0.040754342863046, 0.084..."
6,100307,RELATIONAL,"[[0.2709434948110919, 0.08915439190003989, 0.1..."
7,100307,SOCIAL,"[[0.27075755129825896, 0.07942572217389814, 0...."
8,100307,WM,"[[0.28122430896568573, 0.12358947079320645, 0...."
9,100408,REST1,"[[0.31926332845816463, 0.18973934066088882, 0...."


In [6]:
NUM_SUBJECTS = len(data_df_train["subject_id"].unique())
print(f"Number of subjects: {NUM_SUBJECTS}")

Number of subjects: 3


In [7]:
###-------------------------------------------------------------------------------------------------------------------
#         label encoding
###-------------------------------------------------------------------------------------------------------------------

# one hot encoding

# enc_labels = OneHotEncoder(handle_unknown='ignore')
# enc_tasks = OneHotEncoder(handle_unknown='ignore')

# enc_labels.fit(data_dict_train["subject_id"].to_numpy().reshape(-1, 1))
# enc_tasks.fit(data_dict_train["task_id"].to_numpy().reshape(-1, 1))

# enc_train_label_encodings = enc_labels.transform(data_dict_train["subject_id"].to_numpy().reshape(-1, 1)).toarray()
# enc_train_task_encodings = enc_tasks.transform(data_dict_train["task_id"].to_numpy().reshape(-1, 1)).toarray()

# enc_test_label_encodings = enc_labels.transform(data_dict_test["subject_id"].to_numpy().reshape(-1, 1)).toarray()
# enc_test_task_encodings = enc_tasks.transform(data_dict_test["task_id"].to_numpy().reshape(-1, 1)).toarray()

# data_dict_train["enc_label_id"] = enc_train_label_encodings.tolist()
# data_dict_train["enc_task_id"] = enc_train_task_encodings.tolist()

# data_dict_test["enc_label_id"] = enc_test_label_encodings.tolist()
# data_dict_test["enc_task_id"] = enc_test_task_encodings.tolist()

# label encoding
enc_labels = LabelEncoder()
enc_tasks = LabelEncoder()

enc_labels.fit(data_df_train["subject_id"].tolist())
enc_tasks.fit(data_df_train["task_id"].tolist())

enc_train_label_encodings = enc_labels.transform(data_df_train["subject_id"].tolist())
enc_train_task_encodings = enc_tasks.transform(data_df_train["task_id"].tolist())

enc_test_label_encodings = enc_labels.transform(data_df_test["subject_id"].tolist())
enc_test_task_encodings = enc_tasks.transform(data_df_test["task_id"].tolist())

data_df_train["enc_label_id"] = enc_train_label_encodings
data_df_train["enc_task_id"] = enc_train_task_encodings
data_df_test["enc_label_id"] = enc_test_label_encodings
data_df_test["enc_task_id"] = enc_test_task_encodings

#enc.inverse_transform() to reverse

In [8]:
display(data_df_train.head(10))

Unnamed: 0,subject_id,task_id,mat,enc_label_id,enc_task_id
0,100307,REST1,"[[0.21854491103466994, 0.07509374392964863, 0....",0,5
1,100307,REST2,"[[0.2509722712619662, 0.06429771271159306, 0.1...",0,6
2,100307,EMOTION,"[[0.27626702525883573, 0.03827488524289221, 0....",0,0
3,100307,GAMBLING,"[[0.2356709594115424, 0.03883497545044236, 0.1...",0,1
4,100307,LANGUAGE,"[[0.2317390561241142, 0.06537822245634475, 0.0...",0,2
5,100307,MOTOR,"[[0.2141270371266362, 0.040754342863046, 0.084...",0,3
6,100307,RELATIONAL,"[[0.2709434948110919, 0.08915439190003989, 0.1...",0,4
7,100307,SOCIAL,"[[0.27075755129825896, 0.07942572217389814, 0....",0,7
8,100307,WM,"[[0.28122430896568573, 0.12358947079320645, 0....",0,8
9,100408,REST1,"[[0.31926332845816463, 0.18973934066088882, 0....",1,5


In [9]:
###-------------------------------------------------------------------------------------------------------------------
#         initializing dataloader objects
###-------------------------------------------------------------------------------------------------------------------

train_dataset = TensorDataset(
    torch.tensor(
        np.array(data_df_train["mat"].tolist()).astype(np.float32)
    ),
    torch.tensor(data_df_train["enc_label_id"].to_numpy()),
    torch.tensor(data_df_train["enc_task_id"].to_numpy()))
train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)

test_dataset = TensorDataset(
    torch.tensor(
        np.array(data_df_test["mat"].tolist()).astype(np.float32)
    ),
    torch.tensor(data_df_test["enc_label_id"].to_numpy()),
    torch.tensor(data_df_test["enc_task_id"].to_numpy()))
test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)

In [10]:
###-------------------------------------------------------------------------------------------------------------------
#         initializing model
###-------------------------------------------------------------------------------------------------------------------

model = MRIVisionTransformers(
        # output_size_tasks = config["d_model_task_output"],
        output_size_tasks = 9,
        output_size_subjects = NUM_SUBJECTS,
        input_size = config["d_model_input"],
        num_heads = config["num_heads"],
        dropout = config["dropout"],
        attention_dropout = config["attention_dropout"]
)

x = torch.randn(1, 400, 400)
y = model(x)

# x_si, x_td, attn_weights
print(y[0].size())
print(y[1].size())
print(y[2].size())

torch.Size([1, 3])
torch.Size([1, 9])
torch.Size([1, 400, 400])


In [11]:
###-------------------------------------------------------------------------------------------------------------------
#         training
###-------------------------------------------------------------------------------------------------------------------

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=config["lr"])

# change to cuda
device = "cpu"
training_loop(config["epochs"], model, train_loader, test_loader, criterion, optimizer, device, config)

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
[34m[1mwandb[0m: Currently logged in as: [33mc-achard[0m. Use [1m`wandb login --relogin`[0m to force relogin
DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=c:\Users\Cyril\Desktop\Code\MIPLab-TeamCEE-DeepLearningforBiomed, universal_newlines=False, shell=None, istream=<valid stream>)


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

DEBUG:training:SI loss : 34.34313121863774
DEBUG:training:TD loss : 35.08243611880711


Epoch: 1/100 - loss_total: 34.7128- Acc: SI 0.3333 / TD 0.0000- val-loss_total: 30.0611 - val-acc: SI 0.6667 TD 0.0000(15.15s/epoch)


DEBUG:training:SI loss : 20.942123906952993
DEBUG:training:TD loss : 26.07610947745187


Epoch: 2/100 - loss_total: 23.5091- Acc: SI 0.3333 / TD 0.0000- val-loss_total: 11.8013 - val-acc: SI 1.0000 TD 0.0000(14.33s/epoch)


DEBUG:training:SI loss : 5.507892233984811
DEBUG:training:TD loss : 9.467350687299456


Epoch: 3/100 - loss_total: 7.4876- Acc: SI 0.6667 / TD 0.0000- val-loss_total: 8.1839 - val-acc: SI 0.0000 TD 0.0000(13.15s/epoch)


DEBUG:training:SI loss : 4.738003496612821
DEBUG:training:TD loss : 7.887131043842861


Epoch: 4/100 - loss_total: 6.3126- Acc: SI 0.3333 / TD 0.3333- val-loss_total: 6.1790 - val-acc: SI 0.3333 TD 0.0000(13.29s/epoch)


DEBUG:training:SI loss : 0.4535096193264638
DEBUG:training:TD loss : 3.872344970703125


Epoch: 5/100 - loss_total: 2.1629- Acc: SI 0.6667 / TD 0.0000- val-loss_total: 4.1052 - val-acc: SI 0.0000 TD 0.0000(14.34s/epoch)


DEBUG:training:SI loss : 0.6509033528023532
DEBUG:training:TD loss : 2.786150710923331


Epoch: 6/100 - loss_total: 1.7185- Acc: SI 1.0000 / TD 0.3333- val-loss_total: 3.1323 - val-acc: SI 0.0000 TD 0.0000(16.15s/epoch)


DEBUG:training:SI loss : 0.5169490594416857
DEBUG:training:TD loss : 1.962415188550949


Epoch: 7/100 - loss_total: 1.2397- Acc: SI 1.0000 / TD 0.6667- val-loss_total: 3.9482 - val-acc: SI 0.0000 TD 0.0000(14.77s/epoch)


DEBUG:training:SI loss : 0.5420748053916863
DEBUG:training:TD loss : 0.8059181187834058


Epoch: 8/100 - loss_total: 0.6740- Acc: SI 1.0000 / TD 1.0000- val-loss_total: 3.6267 - val-acc: SI 0.0000 TD 0.3333(13.30s/epoch)


DEBUG:training:SI loss : 0.23554155443395888
DEBUG:training:TD loss : 0.6389163125838552


Epoch: 9/100 - loss_total: 0.4372- Acc: SI 1.0000 / TD 0.6667- val-loss_total: 3.6155 - val-acc: SI 0.0000 TD 0.0000(13.28s/epoch)


DEBUG:training:SI loss : 0.23226335870900325
DEBUG:training:TD loss : 0.6496180616585272


Epoch: 10/100 - loss_total: 0.4409- Acc: SI 0.6667 / TD 1.0000- val-loss_total: 2.9531 - val-acc: SI 0.3333 TD 0.3333(18.78s/epoch)


DEBUG:training:SI loss : 0.20370155388289796
DEBUG:training:TD loss : 0.35363257250615526


Epoch: 11/100 - loss_total: 0.2787- Acc: SI 0.6667 / TD 1.0000- val-loss_total: 3.5183 - val-acc: SI 0.0000 TD 0.0000(16.08s/epoch)


DEBUG:training:SI loss : 0.09563975045706943
DEBUG:training:TD loss : 0.21655552142432757


Epoch: 12/100 - loss_total: 0.1561- Acc: SI 1.0000 / TD 1.0000- val-loss_total: 2.9584 - val-acc: SI 0.3333 TD 0.0000(13.72s/epoch)


DEBUG:training:SI loss : 0.06322576471835159
DEBUG:training:TD loss : 0.15258048541311706


Epoch: 13/100 - loss_total: 0.1079- Acc: SI 1.0000 / TD 1.0000- val-loss_total: 2.9895 - val-acc: SI 0.3333 TD 0.0000(16.18s/epoch)


DEBUG:training:SI loss : 0.04730315689130293
DEBUG:training:TD loss : 0.2575010082551411


Epoch: 14/100 - loss_total: 0.1524- Acc: SI 1.0000 / TD 1.0000- val-loss_total: 2.9290 - val-acc: SI 0.3333 TD 0.0000(15.98s/epoch)
