In [1]:
import sys
    
try:
    from utils import data_loading
except:
    sys.path.append('../utils')
    sys.path.append('../')
    from utils import data_loading

In [12]:
from utils import data_loading, model
import os
import numpy as np
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import matplotlib
from matplotlib import pyplot as plt
from nilearn import datasets
from nilearn import plotting
import torch
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
from torchvision import transforms
from sklearn.decomposition import IncrementalPCA
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr as corr
from sklearn.decomposition import PCA
import random

# Create concatenated lists including X samples * 8 subjects
brain_concat = []
images_concat = []
ids_concat = []

for subj in range(1,2+1):
    lh, rh, images, id_list  = data_loading.load_subject_data(subj, 0, 100, include_subject_id=True)
    brain_concat.extend(np.concatenate((lh, rh), axis=1)) ### investigate whether concat of lh and rh results in what we want
    images_concat += images
    ids_concat += id_list

# Create dataset with concatenated hemispheres
dataset = data_loading.CustomDataset(images_list = images_concat, outputs_list = brain_concat, id_list = ids_concat, transform=transforms.ToTensor(), PCA = PCA(n_components = 100))
print('\nDataset made up of ', len(dataset), 'truples? of data\n--------')
print('Shape of 1st element:', dataset[0][0].shape)
print('Type of 2nd element:', type(dataset[0][1]))
print('Shape of 3rd element:', dataset[0][2].shape, '\n\n')

from torch.utils.data import Subset

# Create a train and validation subset of the variable dataset with torch
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Custom subset class to preserve .id_list attribute when splitting into train and val
class CustomSubset(Subset):
    def __init__(self, dataset, indices, id_list):
        super(CustomSubset, self).__init__(dataset, indices)
        self.id_list = id_list

# Use the CustomSubset class for the train and validation subsets
train_dataset = CustomSubset(dataset, range(0, train_size), ids_concat[:train_size])
val_dataset = CustomSubset(dataset, range(train_size, len(dataset)), ids_concat[train_size:])




----------------
Loading subject data with subject ID 1...
Current project directory: /Users/emilykruger/Documents/GitHub/aml_project_2023


----------------
Loading subject data with subject ID 2...
Current project directory: /Users/emilykruger/Documents/GitHub/aml_project_2023

----------------
Initialize CustomDataset

Number of samples:  200
Transform:  ToTensor()
PCA:  PCA(n_components=100)
-------
Data loaded

Data:  200 *  ([<PIL.Image.Image image mode=RGB size=425x425 at 0x7FB53CFD2EE0>, 1], array([-0.8617882 , -0.20318632, -0.62639767, ..., -0.41889378,
       -0.60231453, -0.67537224], dtype=float32))
Output_concat:  200 * 39548 :  [-0.8617882  -0.20318632 -0.62639767 ... -0.41889378 -0.60231453
 -0.67537224]

Dataset made up of  200 truples? of data
--------
Shape of 1st element: torch.Size([3, 425, 425])
Type of 2nd element: <class 'int'>
Shape of 3rd element: torch.Size([100]) 




In [15]:
# Put train dataset into a loader with 2 batches and put test data in val loader
train_sampler = data_loading.SubjectSampler(train_dataset)
val_sampler = data_loading.SubjectSampler(val_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=16, sampler=val_sampler)

# Initialize model, trainer, optimizer and loss function
reg_model = model.ResNet1HeadID(100)
trainer = model.Trainer()
optimizer = torch.optim.Adam
loss = torch.nn.MSELoss()
trainer.compile(reg_model, optimizer, learning_rate=0.1, loss_fn=loss)

trainer.fitID(num_epochs = 2, train_loader=train_loader, val_loader=val_loader)

Epoch 1/2: 100%|██████████| 10/10 [02:37<00:00, 15.73s/it]


Training Loss: 150866.4048553467
Validation Loss: 223941457084416.0


Epoch 2/2: 100%|██████████| 10/10 [06:32<00:00, 39.28s/it]


Training Loss: 15951.304989624023
Validation Loss: 3453166416.0
