In [None]:
# Autoreload modules
%load_ext autoreload
%autoreload 2

import torch
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
    
# Accessing moduels
import sys,os
sys.path.append(os.path.realpath('../Modules'))

from dataloader.dataset import ADNI3Channels
from dataloader.dataloader import ADNILoader
from dataloader.transforms import Transforms

from model.model import ViT
from model.train import Trainer

from matplotlib import pyplot as plt
from utils.image import save_fig

# Dataset and Dataloader Setup

In [None]:
id2label = {0: "CN", 1: "MCI", 2: "AD"}
label2id = {"CN": 0, "MCI": 1, "AD": 2}

transforms = Transforms(image_size=(384, 384), p=0.5)

train_ds = ADNI3Channels("../Data/Training/", transforms=transforms.eval()) #eval
valid_ds = ADNI3Channels("../Data/Validation/", transforms=transforms.eval())
test_ds = ADNI3Channels("../Data/Test/", transforms=transforms.eval())

In [None]:
image, label = train_ds[0]

print("Image shape:", image.shape)
print("Label:", id2label[label.item()], "\n")

print("Number of training samples:", len(train_ds))
print("Number of validation samples:", len(valid_ds))
print("Number of test samples:", len(test_ds), "\n")

fig, axes = plt.subplots(ncols=3, figsize=(6, 2), dpi=300)
for i in range(3):
    axes[i].imshow(image[i, :, :])
    axes[i].axis("off");

print("Min pixel value =", image.min().item())
print("Max pixel value =", image.max().item())

In [None]:
kwargs = {'train_ds': train_ds,
           'valid_ds': valid_ds,
           'test_ds': test_ds,
         }

train_dataloader = ADNILoader(**kwargs).train_dataloader()
valid_dataloader= ADNILoader(**kwargs).validation_dataloader()
test_dataloader = ADNILoader(**kwargs).test_dataloader()

batch = next(iter(train_dataloader))
print(batch[0].shape)
print(batch[1].shape)

# Loading Model

In [None]:
model = ViT(
    pretrained=True,
    model_name="google/vit-base-patch32-384",
    device="cuda:0"
)

model.load_best_state_file("acc", "../ViT/Best models/", "ViT_Pretrained")

kwargs = {
    "epochs": 100,
    "model":model,
    "train_dataloader": train_dataloader,
    "valid_dataloader": valid_dataloader,
    "test_dataloader": test_dataloader,
}

trainer = Trainer(**kwargs)

In [None]:
# trainer.test(trainer.train_dataloader)
# trainer.test(trainer.valid_dataloader)
trainer.test(trainer.test_dataloader)

# Saving Hidden States

In [None]:
def save_hidden_states(dataloader):
    last_hidden_states = np.empty((0, 145*768))
    targets = np.empty((0, 1))

    model.eval()
    with torch.no_grad():
        for x, y in dataloader:
            x, y  = x.to(model.device), y.to(model.device)
            _, _, hidden_states = model(x)
    
            last_hidden_states = np.append(
                last_hidden_states,
                hidden_states[-1].reshape(dataloader.batch_size, 145*768).cpu().numpy(),
                axis=0
            )
            targets = np.append(
                targets,
                y.unsqueeze(dim=1).cpu().numpy(),
                axis=0
            )

    return last_hidden_states, targets

In [None]:
train_last_hidden_states, train_targets = save_hidden_states(train_dataloader)
valid_last_hidden_states, valid_targets = save_hidden_states(valid_dataloader)
test_last_hidden_states, test_targets = save_hidden_states(test_dataloader)

# PCA

In [None]:
standard_scaler = StandardScaler()
train_last_hidden_states = standard_scaler.fit_transform(train_last_hidden_states)
valid_last_hidden_states = standard_scaler.fit_transform(valid_last_hidden_states)
test_last_hidden_states = standard_scaler.fit_transform(test_last_hidden_states)

In [None]:
pca = PCA(n_components=2)

train_pca = pca.fit_transform(train_last_hidden_states)
print(pca.explained_variance_ratio_)

valid_pca = pca.fit_transform(valid_last_hidden_states)
print(pca.explained_variance_ratio_)

test_pca = pca.fit_transform(test_last_hidden_states)
print(pca.explained_variance_ratio_)

In [None]:
train_df = pd.DataFrame(
    np.concat([train_targets, train_pca], axis=1),
    columns = ["target", "principal component 1", "principal component 2"]
)

valid_df = pd.DataFrame(
    np.concat([valid_targets, valid_pca], axis=1),
    columns = ["target", "principal component 1", "principal component 2"]
)

test_df = pd.DataFrame(
    np.concat([test_targets, test_pca], axis=1),
    columns = ["target", "principal component 1", "principal component 2"]
)

In [None]:
targets = [0, 1, 2]
colors = ['r', 'g', 'b']
markers = ['o', '*', 'P']

In [None]:
fig, ax = plt.subplots(figsize = (8, 8), dpi=300)
for i in range(len(targets)):
    sample_df = train_df[train_df['target'] == targets[i]]
    ax.scatter(sample_df['principal component 1'],
               sample_df['principal component 2'],
               s=100,
               alpha=0.5,
               c=colors[i],
               marker=markers[i],
               label=f'{id2label[targets[i]]} (Train)'
              )

plt.xticks(fontsize=15)
plt.yticks(fontsize=15)

ax.legend(fontsize=14);

ax.set_xlabel('Principle Component 1 - Explained Variance Ratio = 0.42', size = 15)
ax.set_ylabel('Principle Component 2 - Explained Variance Ratio = 0.19', size = 15);

save_fig("PCA_train", fig)

In [None]:
fig, ax = plt.subplots(figsize = (8, 8), dpi=300)
for i in range(len(targets)):
    sample_df = test_df[test_df['target'] == targets[i]]
    ax.scatter(sample_df['principal component 1'],
               sample_df['principal component 2'],
               s=100,
               alpha=0.5,
               c=colors[i],
               marker=markers[i],
               label=f'{id2label[targets[i]]} (Test)'
              )

plt.xticks(fontsize=15)
plt.yticks(fontsize=15)

ax.legend(fontsize=14);

ax.set_xlabel('Principle Component 1 - Explained Variance Ratio = 0.42', size = 15)
ax.set_ylabel('Principle Component 2 - Explained Variance Ratio = 0.19', size = 15);

save_fig("PCA_test", fig)