In [2]:
import transformers
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import os
from nilearn.image import load_img
import pandas as pd
import gc

data_dir = 'data'
pet_dir = "data/ad_pet_huw"

In [2]:
files = {}
for file in os.listdir(pet_dir):
    if file.endswith(".nii"):
        img = load_img(os.path.join(pet_dir, file))
        patient_id = file.split(".")[0].removeprefix('AD_normalised_')
        print(f'Processing patient: {patient_id}')
        # Convert the image to a PyTorch tensor
        torch_img = torch.tensor(img.get_fdata(), dtype=torch.float32)
        files[patient_id] =torch_img

Processing patient: 002_S_5018
Processing patient: 003_S_4136
Processing patient: 003_S_4152
Processing patient: 003_S_4373
Processing patient: 003_S_4892
Processing patient: 003_S_5165
Processing patient: 003_S_5187
Processing patient: 005_S_4707
Processing patient: 005_S_4910
Processing patient: 005_S_5038
Processing patient: 005_S_5119
Processing patient: 006_S_4153
Processing patient: 006_S_4192
Processing patient: 006_S_4546
Processing patient: 006_S_4867
Processing patient: 007_S_4568
Processing patient: 007_S_4637
Processing patient: 007_S_4911
Processing patient: 007_S_5196
Processing patient: 009_S_5027
Processing patient: 009_S_5037
Processing patient: 009_S_5224
Processing patient: 009_S_5252
Processing patient: 011_S_4827
Processing patient: 011_S_4845
Processing patient: 011_S_4906
Processing patient: 011_S_4912
Processing patient: 011_S_4949
Processing patient: 013_S_5071
Processing patient: 014_S_4039
Processing patient: 014_S_4615
Processing patient: 016_S_4009
Processi

In [3]:
df = pd.read_csv(os.path.join(data_dir, 'ADNIMERGE_19Jun2025.csv'))
sex_df = df.filter(['PTID', 'PTGENDER'])
sex_map = {'Male': 0, 'Female': 1}
sex_labels = {0: 'Male', 1: 'Female'}
sex_df['PTGENDER'] = sex_df['PTGENDER'].map(sex_map)

  df = pd.read_csv(os.path.join(data_dir, 'ADNIMERGE_19Jun2025.csv'))


In [10]:
del df
gc.collect()

NameError: name 'df' is not defined

In [4]:
# Compute the number of common patients between the PET files and the sex_df
missing_patients = sex_df[~sex_df['PTID'].isin(files.keys())]
print(f'Missing patients: {len(missing_patients)}')

common_patients = sex_df['PTID'].isin(files.keys())
print(f'Common patients: {common_patients.sum()}')

print(f'Total patients {len(files)}')

Missing patients: 15684
Common patients: 737
Total patients 149


In [5]:
# Update the sex DataFrame to include a new column for the PET image data matched on PTID
for patient in files:
    img = files.get(patient)
    sex_df['PET_IMAGE'] = sex_df['PTID'].map(files) # Insert the img data ino the 'PET_IMAGE' column in sex_df for the corresponding PTID field

In [6]:
# Print the PTID for the columns for which PET_IMAGE is not None
print(f'Number of patients: {len(sex_df)}')
sex_df.dropna(subset=['PET_IMAGE'], inplace=True)
sex_df.drop_duplicates(subset=['PTID'], inplace=True)
print(f'Number of patients with PET images: {len(sex_df)}')

Number of patients: 16421
Number of patients with PET images: 149


In [7]:
# Print the shape of the PET image for the first patient
first_patient = sex_df.iloc[0]
print(f'First patient PTID: {first_patient["PTID"]}')
print(f'PET image shape: {first_patient["PET_IMAGE"].shape} with data type {type(first_patient["PET_IMAGE"])}')
print(f'Sex: {first_patient["PTGENDER"]}')

First patient PTID: 135_S_5275
PET image shape: torch.Size([101, 116, 96]) with data type <class 'torch.Tensor'>
Sex: 1


In [11]:
# Select PET_IMAGE and PTGENDER from sex_df
X = sex_df['PET_IMAGE'].tolist()  # This will be a list of torch.Tensor objects
y = sex_df['PTGENDER'].values     # This will be a numpy array of labels

print(f'X length: {len(X)}, PET image shape: {X[0].shape}, y shape: {y.shape}')

X length: 149, PET image shape: torch.Size([101, 116, 96]), y shape: (149,)


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)
print(f'Train X length: {len(X_train)}, Test X length: {len(X_test)} with shapes {X_train[0].shape}, {X_test[0].shape}')
print(f'Train y shape: {y_train.shape}, Test y shape: {y_test.shape}')

Train X length: 119, Test X length: 30 with shapes torch.Size([101, 116, 96]), torch.Size([101, 116, 96])
Train y shape: (119,), Test y shape: (30,)


In [20]:
os.makedirs('data/pet_demographic', exist_ok=True)
# Save the training and test data
torch.save(X_train, 'data/pet_demographic/X_train.pt')
torch.save(X_test, 'data/pet_demographic/X_test.pt')
torch.save(y_train, 'data/pet_demographic/y_train.pt')
torch.save(y_test, 'data/pet_demographic/y_test.pt')

In [3]:
X_train = torch.load('data/pet_demographic/X_train.pt', weights_only=False)
X_test = torch.load('data/pet_demographic/X_test.pt', weights_only=False)
y_train = torch.load('data/pet_demographic/y_train.pt', weights_only=False)
y_test = torch.load('data/pet_demographic/y_test.pt', weights_only=False)

In [4]:
class PETDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return image.unsqueeze(0), label  # Add channel dimension for CNN input

In [5]:
# Define a 3D CNN to deal with images of shape (101, 116, 96)
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv3d(1, 16, kernel_size=5, padding='valid')
        self.conv2 = torch.nn.Conv3d(16, 32, kernel_size=5, padding='valid')
        self.pool = torch.nn.MaxPool3d(kernel_size=2, stride=2)
        self.fc1 = torch.nn.Linear(32 * 46 * 54 * 44, 128)
        self.fc2 = torch.nn.Linear(128, 1)  # Output layer for binary classification
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = torch.nn.functional.relu(x)
        x = self.conv2(x)
        x = torch.nn.functional.relu(x)
        x = self.pool(x)  # Apply max pooling
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        x = torch.nn.functional.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.empty_cache()
print(f'Using device: {device}')

inputs = torch.stack([torch.unsqueeze(img, 0) for img in X_train], dim=0)  # Add channel dimension
labels = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Convert labels to float and add a channel dimension
model = CNN().to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
train_losses = []

batch_size = 2  # Adjust based on your GPU memory
train_dataset = PETDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
total_batches = len(train_loader)

for epoch in range(1, epochs + 1):
    model.train()
    batch_counter = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        batch_counter += 1
        print(f'Processing batch {batch_counter}/{total_batches} of epoch {epoch}')
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
        # Save the best model based on loss
        if loss.item() < min(train_losses, default=float('inf')):
            torch.save(model.state_dict(), 'data/pet_demographic/best_pet_cnn_model.pth')
    del inputs, labels, outputs  # Clear variables to free memory
    torch.cuda.empty_cache()  # Clear GPU memory after each epoch
    gc.collect()  # Collect garbage to free up memory

    print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')

Using device: cuda
Processing batch 1/30 of epoch 1
Processing batch 2/30 of epoch 1
Processing batch 3/30 of epoch 1
Processing batch 4/30 of epoch 1
Processing batch 5/30 of epoch 1
Processing batch 6/30 of epoch 1
Processing batch 7/30 of epoch 1
Processing batch 8/30 of epoch 1
Processing batch 9/30 of epoch 1
Processing batch 10/30 of epoch 1
Processing batch 11/30 of epoch 1
Processing batch 12/30 of epoch 1
Processing batch 13/30 of epoch 1
Processing batch 14/30 of epoch 1
Processing batch 15/30 of epoch 1
Processing batch 16/30 of epoch 1
Processing batch 17/30 of epoch 1
Processing batch 18/30 of epoch 1
Processing batch 19/30 of epoch 1
Processing batch 20/30 of epoch 1
Processing batch 21/30 of epoch 1
Processing batch 22/30 of epoch 1
Processing batch 23/30 of epoch 1
Processing batch 24/30 of epoch 1
Processing batch 25/30 of epoch 1
Processing batch 26/30 of epoch 1
Processing batch 27/30 of epoch 1
Processing batch 28/30 of epoch 1
Processing batch 29/30 of epoch 1
Proc

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'data/pet_demographic/pet_cnn_model.pth')

In [9]:
# Compute the accuracy on the train set
model.eval()
with torch.no_grad():
    train_dataset = PETDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    y_pred_train = []
    y_true_train = []

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        y_pred_train.extend(outputs.cpu().numpy())
        y_true_train.extend(labels.cpu().numpy())
    y_pred_train = (torch.tensor(y_pred_train) > 0.5).float().numpy()  # Convert probabilities to binary predictions
    y_true_train = torch.tensor(y_true_train).numpy()
    train_accuracy = accuracy_score(y_true_train, y_pred_train)
    train_f1 = f1_score(y_true_train, y_pred_train)
    train_roc_auc = roc_auc_score(y_true_train, y_pred_train)
    print(f'Train Accuracy: {train_accuracy:.4f}, Train F1 Score: {train_f1:.4f}, Train ROC AUC: {train_roc_auc:.4f}')

Train Accuracy: 0.4202, Train F1 Score: 0.5917, Train ROC AUC: 0.5000


In [8]:
# Compute the accuracy on the test set
with torch.no_grad():
    test_dataset = PETDataset(X_test, y_test)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    y_pred_test = []
    y_true_test = []

    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        y_pred_test.extend(outputs.cpu().numpy())
        y_true_test.extend(labels.cpu().numpy())
    y_pred_test = (torch.tensor(y_pred_test) > 0.5).float().numpy()  # Convert probabilities to binary predictions
    y_true_test = torch.tensor(y_true_test).numpy()
    test_accuracy = accuracy_score(y_true_test, y_pred_test)
    test_f1 = f1_score(y_true_test, y_pred_test)
    test_roc_auc = roc_auc_score(y_true_test, y_pred_test)
    print(f'Test Accuracy: {test_accuracy:.4f}, Test F1 Score: {test_f1:.4f}, Test ROC AUC: {test_roc_auc:.4f}')

  y_pred_test = (torch.tensor(y_pred_test) > 0.5).float().numpy()  # Convert probabilities to binary predictions


Test Accuracy: 0.4333, Test F1 Score: 0.6047, Test ROC AUC: 0.5000
