# Creating a Deep Model to predict the antidepressant effect

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

import nibabel as nib
import matplotlib.pyplot as plt # For data viz
import pandas as pd
import numpy as np
import sys

print('System Version:', sys.version)
print('PyTorch version', torch.__version__)
print('Numpy version', np.__version__)
print('Pandas version', pd.__version__)

System Version: 3.12.4 (main, Jul  1 2024, 10:14:11) [GCC 12.2.0]
PyTorch version 2.7.1+cu128
Numpy version 2.3.2
Pandas version 2.3.1


In [2]:
# Confirm device setup
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [3]:
# Load the data
X_train = np.load('data/X_SYN.npy')
X_test = np.load('data/X_TEST_RAW.npy')

y_train = np.load('data/y_SYN.npy')
y_test = np.load('data/y_TEST_RAW.npy')

In [4]:
X_train = X_train.reshape(X_train.shape[0], -1)  # Shape: (n, 91*109*91)
X_test = X_test.reshape(X_test.shape[0], -1)  # Shape: (n, 91*109*91)

# Generate the index mapping
# First, create all combinations of indices for the last 3 dims
sub_indices = np.indices((91, 109, 91)).reshape(3, -1).T  # Shape: (91*109*91, 3)

# Now tile this across the first dimension (39)
num_per_outer = sub_indices.shape[0]  # 91*109*91
outer_indices = np.repeat(np.arange(39), num_per_outer).reshape(-1, 1)  # Shape: (39*num_per_outer, 1)

# Repeat sub_indices for each outer index
all_indices = np.hstack([outer_indices, np.tile(sub_indices, (39, 1))])  # Shape: (39*num_per_outer, 4)
locs = np.delete(all_indices, 0, axis=1)

# Select top k voxels with highest mutual information
k = 150  # amount of voxels to keep
selector = SelectKBest(score_func=f_classif, k=k)
selector.fit(X_train, y_train)

# Transform both the training and test data using the fitted selector
X_train = selector.transform(X_train)
X_test = selector.transform(X_test)


In [5]:
# Load reference NIfTI file (to get shape and affine)
ref_file = '/projectnb/fastfmri/sdwilli/scripts/FEAT/grouplevel/atlas/amygdalawhole_thr50_2mm.nii.gz'
ref_img = nib.load(ref_file)
ref_shape = ref_img.shape
ref_affine = ref_img.affine


# Get indices of selected voxels
selected_voxel_indices = selector.get_support(indices=True)

# === Save mask of selected voxels ===
# Create an empty mask with the same shape as reference
mask_data = np.zeros(ref_shape, dtype=np.uint8)

# Matrix of coordinates (assumed in voxel space)
locations_voxel = locs[selected_voxel_indices]

# Populate the mask
for xc, yc, zc in locations_voxel:
    if 0 <= xc < ref_shape[0] and 0 <= yc < ref_shape[1] and 0 <= zc < ref_shape[2]:
        mask_data[int(xc), int(yc), int(zc)] = 1

# Create a new NIfTI image
mask_img = nib.Nifti1Image(mask_data, affine=ref_affine)

# Save to disk
print('Saving mask to disk....')
nib.save(mask_img, 'masks/mask_selected_voxels.nii.gz')
print('DONE')

Saving mask to disk....
DONE


In [6]:
print(X_train.shape)

(420, 150)


In [7]:
# Define the dataset
class COPEDataset(Dataset):
    def __init__(self, data, target):
        self.data = data
        self.target = target

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        cope_data = self.data[index]
        cope_data = (cope_data - np.min(cope_data)) / (np.max(cope_data) - np.min(cope_data))  # normalize

        label = self.target[index]
        volume = torch.tensor(cope_data, dtype=torch.float32).unsqueeze(0)  # (1, 91, 109, 91)
        label = torch.tensor([1.0, 0.0] if label == 0 else [0.0, 1.0], dtype=torch.float32)
        return volume, label

In [8]:
# Initiate the dataset and data loader
train_dataset = COPEDataset(X_train, y_train)
test_dataset = COPEDataset(X_test, y_test)
train_dataloader = DataLoader(train_dataset, batch_size=10)
test_dataloader = DataLoader(test_dataset)

In [9]:
class BrainClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [10]:
# Setup
# Example usage:
input_size = k
hidden_size = 1280
num_classes = 2

# Create an instance of the model
#torch.manual_seed(117)
model = BrainClassifier(input_size, hidden_size, num_classes).cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Train
for epoch in range(15):
    model.train()
    total_loss, correct = 0.0, 0

    for batch in train_dataloader:
        inputs, labels = batch
        inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == torch.argmax(labels, dim=1)).sum().item()

    acc = correct / len(train_dataloader.dataset)
    print(f"Epoch {epoch+1}, Loss: {total_loss:.2f}, Accuracy: {acc:.4f}")


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
# Test the model

def evaluate(model, dataloader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.cuda(), labels.cuda()
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1)
            print(torch.argmax(labels, dim=1))
            correct += (preds == torch.argmax(labels, dim=1)).sum().item()
    return correct / len(dataloader.dataset)

val_acc = evaluate(model, test_dataloader)
print(f"Validation Accuracy: {val_acc:.4f}")


tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
Validation Accuracy: 0.8000
