# 1.1 Import Libraries

In [1]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt

# 1.2 Define Paths

In [9]:
MRI_DATASET_PATH = "/kaggle/input/imagesoasis/Data"

# 1.3 Data Transforms

In [11]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

# 1.4 Load Data

In [12]:
dataset = ImageFolder(MRI_DATASET_PATH, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)

# 1.5 Load ResNet-50

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_mri = models.resnet50(pretrained=True)
model_mri.fc = nn.Linear(model_mri.fc.in_features, 4)
model_mri = model_mri.to(device)


In [14]:
print("Total images:", len(dataset))
print("Train size:", len(train_ds))
print("Batches:", len(train_loader))
print("Classes:", dataset.classes)

Total images: 86437
Train size: 69149
Batches: 2161
Classes: ['Mild Dementia', 'Moderate Dementia', 'Non Demented', 'Very mild Dementia']


In [16]:
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((160,160)),   # faster
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

dataset = ImageFolder(MRI_DATASET_PATH, transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, num_workers=2)

# 1.6 Train MRI Model

In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_mri.parameters(), lr=1e-4)

for epoch in range(5):
    model_mri.train()
    running_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()

        outputs = model_mri(x)
        loss = criterion(outputs, y)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/5], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/5], Loss: 0.0945
Epoch [2/5], Loss: 0.0199
Epoch [3/5], Loss: 0.0130
Epoch [4/5], Loss: 0.0109
Epoch [5/5], Loss: 0.0083


# Accuracy

In [22]:
def accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == y).sum().item()
            total += y.size(0)
    return correct / total

# Testing Epochs

In [23]:
for epoch in range(5):
    model_mri.train()
    running_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model_mri(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Compute validation accuracy
    val_acc = accuracy(model_mri, val_loader)

    print(f"Epoch [{epoch+1}/5], Loss: {running_loss/len(train_loader):.4f}, Val Accuracy: {val_acc*100:.2f}%")


Epoch [1/5], Loss: 0.0073, Val Accuracy: 99.97%
Epoch [2/5], Loss: 0.0054, Val Accuracy: 99.85%
Epoch [3/5], Loss: 0.0059, Val Accuracy: 99.92%
Epoch [4/5], Loss: 0.0042, Val Accuracy: 99.98%
Epoch [5/5], Loss: 0.0016, Val Accuracy: 100.00%


## Data Augmentation

In [24]:
transforms.RandomRotation(15),
transforms.RandomHorizontalFlip(),
transforms.RandomResizedCrop(224, scale=(0.8,1.0))

RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear, antialias=True)

## Patient-Level Prediction Example

In [30]:
import numpy as np

def patient_level_predict(model, patient_slices):
    model.eval()
    outputs = []
    with torch.no_grad():
        for x in patient_slices:
            x = x.to(device).unsqueeze(0)
            out = model(x)
            outputs.append(out.cpu().numpy())
    avg_output = np.mean(outputs, axis=0)
    return np.argmax(avg_output)

# 1.7 Save MRI Model

In [31]:
torch.save(model_mri.state_dict(), "mri_resnet50.pth")

## HANDWRITING MODEL

In [62]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd


# 2.1 Load Dataset

In [71]:
# Load CSV
df = pd.read_csv("/kaggle/input/handwriting-data-to-detect-alzheimers-disease/data.csv")




In [74]:
import pandas as pd
import numpy as np

# Load CSV
df = pd.read_csv("/kaggle/input/handwriting-data-to-detect-alzheimers-disease/data.csv")

# Check first rows
print(df.head())

# Since no label exists, create dummy labels for testing
# 0 = Healthy, 1 = Alzheimer
# For real use, replace this with actual labels if available
np.random.seed(42)
df['label'] = np.random.randint(0, 2, size=len(df))


     ID  air_time1  disp_index1  gmrt_in_air1  gmrt_on_paper1  \
0  id_1       5160     0.000013    120.804174       86.853334   
1  id_2      51980     0.000016    115.318238       83.448681   
2  id_3       2600     0.000010    229.933997      172.761858   
3  id_4       2130     0.000010    369.403342      183.193104   
4  id_5       2310     0.000007    257.997131      111.275889   

   max_x_extension1  max_y_extension1  mean_acc_in_air1  mean_acc_on_paper1  \
0               957              6601          0.361800            0.217459   
1              1694              6998          0.272513            0.144880   
2              2333              5802          0.387020            0.181342   
3              1756              8159          0.556879            0.164502   
4               987              4732          0.266077            0.145104   

   mean_gmrt1  ...  mean_jerk_in_air25  mean_jerk_on_paper25  \
0  103.828754  ...            0.141434              0.024471   
1   99

In [75]:
# Features: all numeric columns except ID
X = df.drop(columns=['ID','label'])

# Convert to numeric in case any column is object
X = X.apply(pd.to_numeric, errors='coerce')

# Fill missing values if any
X = X.fillna(0).values

# Labels
y = df['label'].values.astype(int)


# 2.2 Pytorch Dataset

In [76]:
import torch
from torch.utils.data import Dataset, DataLoader

class HandwritingFeaturesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = HandwritingFeaturesDataset(X, y)

# Train/Validation split
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=16)


# 2.3 Define MLP Model

In [77]:
import torch.nn as nn

class HandwritingMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)  # 2 classes: Healthy / Alzheimer
        )
    def forward(self, x):
        return self.fc(x)

model_hw = HandwritingMLP(X.shape[1])
device = "cuda" if torch.cuda.is_available() else "cpu"
model_hw = model_hw.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_hw.parameters(), lr=1e-4)


In [78]:
num_epochs = 10

for epoch in range(num_epochs):
    model_hw.train()
    total_loss = 0
    total_correct = 0
    
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model_hw(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        total_correct += torch.sum(preds == y).item()
    
    acc = total_correct / len(train_ds)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}, Accuracy: {acc:.4f}")


Epoch 1/10, Loss: 5369.4382, Accuracy: 0.5396
Epoch 2/10, Loss: 3715.6108, Accuracy: 0.5036
Epoch 3/10, Loss: 1693.4537, Accuracy: 0.5971
Epoch 4/10, Loss: 675.0823, Accuracy: 0.6331
Epoch 5/10, Loss: 882.9556, Accuracy: 0.6619
Epoch 6/10, Loss: 290.1693, Accuracy: 0.7122
Epoch 7/10, Loss: 242.5250, Accuracy: 0.7194
Epoch 8/10, Loss: 202.0962, Accuracy: 0.7770
Epoch 9/10, Loss: 214.7288, Accuracy: 0.7626
Epoch 10/10, Loss: 182.4026, Accuracy: 0.7986


# 2.4 Save Model

In [81]:
torch.save(model_hw.state_dict(), "handwriting_mlp_model.pth")
print("Handwriting MLP model saved successfully!")


Handwriting MLP model saved successfully!


In [83]:
import torch
import torchvision.models as models
import torch.nn as nn

# Load pretrained ResNet50
resnet50 = models.resnet50(pretrained=True)

# Replace final layer to match your classes (4 Alzheimer stages)
num_classes = 4
resnet50.fc = nn.Linear(resnet50.fc.in_features, num_classes)

device = "cuda" if torch.cuda.is_available() else "cpu"
resnet50 = resnet50.to(device)




In [86]:
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim


In [87]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

MRI_PATH = "/kaggle/input/imagesoasis/Data" # replace with actual path

mri_dataset = datasets.ImageFolder(MRI_PATH, transform=transform)


In [88]:
# Image transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet requires 224x224
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],  # ImageNet mean
                         [0.229, 0.224, 0.225])  # ImageNet std
])

# Path to your MRI images in Kaggle input
MRI_PATH = "/kaggle/input/imagesoasis/Data"  # replace with your path

# Assuming folder structure: 
# /MRI_PATH/class_name/image.jpg
mri_dataset = datasets.ImageFolder(MRI_PATH, transform=transform)

# Split train/val
train_size = int(0.8 * len(mri_dataset))
val_size = len(mri_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(mri_dataset, [train_size, val_size])

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)


# Load pretrained ResNet-50

In [92]:
# Load pretrained ResNet-50
resnet50 = models.resnet50(pretrained=True)

# Change the final fully connected layer to match your number of classes
num_classes = len(mri_dataset.classes)  # automatically get number of classes from folder names
resnet50.fc = nn.Linear(resnet50.fc.in_features, num_classes)

# Move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
resnet50 = resnet50.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50.parameters(), lr=1e-4)


# Train the ResNet-50 model

In [93]:
num_epochs = 5  # you can increase to 10-20 for better performance

for epoch in range(num_epochs):
    resnet50.train()
    total_loss = 0
    total_correct = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = resnet50(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        total_correct += torch.sum(preds == labels).item()
    
    train_acc = total_correct / len(train_loader.dataset)
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs} — Loss: {avg_loss:.4f}, Accuracy: {train_acc:.4f}")


Epoch 1/5 — Loss: 0.1135, Accuracy: 0.9581
Epoch 2/5 — Loss: 0.0251, Accuracy: 0.9919
Epoch 3/5 — Loss: 0.0149, Accuracy: 0.9950
Epoch 4/5 — Loss: 0.0119, Accuracy: 0.9958
Epoch 5/5 — Loss: 0.0092, Accuracy: 0.9968


# Validate The Model 

In [94]:
resnet50.eval()
total_correct = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = resnet50(images)
        preds = torch.argmax(outputs, dim=1)
        total_correct += torch.sum(preds == labels).item()

val_acc = total_correct / len(val_loader.dataset)
print(f"Validation Accuracy: {val_acc:.4f}")

Validation Accuracy: 0.9997


# Save The Trained Model

In [95]:
torch.save(resnet50.state_dict(), "resnet50_mri.pth")
print("ResNet-50 MRI model saved successfully!")

ResNet-50 MRI model saved successfully!


# Step 3: Extract MRI Embeddings

We remove the final fully connected layer from the trained ResNet-50
so we can use the feature vectors (embeddings) from each MRI.
These embeddings will later be concatenated with handwriting embeddings
for the fusion network.


## Extract MRI embeddings

In [96]:
import torch.nn as nn

# Remove final classification layer
resnet_features = nn.Sequential(*list(resnet50.children())[:-1])
resnet_features = resnet_features.to(device)
resnet_features.eval()

# Function to extract embeddings
def extract_mri_embeddings(dataloader):
    embeddings = []
    labels = []
    with torch.no_grad():
        for images, lbls in dataloader:
            images = images.to(device)
            emb = resnet_features(images)  # shape [batch, 2048, 1, 1]
            emb = emb.view(emb.size(0), -1)  # flatten to [batch, 2048]
            embeddings.append(emb.cpu())
            labels.append(lbls)
    embeddings = torch.cat(embeddings, dim=0)
    labels = torch.cat(labels, dim=0)
    return embeddings, labels

train_mri_emb, train_mri_lbl = extract_mri_embeddings(train_loader)
val_mri_emb, val_mri_lbl = extract_mri_embeddings(val_loader)

print("MRI Embeddings shape:", train_mri_emb.shape)


MRI Embeddings shape: torch.Size([69149, 2048])


# Step 3b: Extract Handwriting Embeddings

We take the last hidden layer of the trained Handwriting MLP
as the embedding for each patient. These will be fused
with the MRI embeddings for multimodal prediction.


# Extract handwriting embeddings

In [99]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch

# Load CSV
df = pd.read_csv("/kaggle/input/handwriting-data-to-detect-alzheimers-disease/data.csv")

# Drop 'ID' column
X = df.drop(columns=["ID"]).values

# NOTE: You need labels for Alzheimer vs healthy
# If you have a column like 'label', use it. Otherwise, create a dummy label (0/1) for demo
# For now, let's create dummy labels (replace this with your real labels)
y = torch.zeros(X.shape[0], dtype=torch.long)  # all zeros (healthy)
# If you have a real column:
# y = df['label'].values

# Split train/val
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_scaled_train = scaler.fit_transform(X_train)
X_scaled_val = scaler.transform(X_val)

# Convert to torch tensors
X_scaled_train = torch.tensor(X_scaled_train, dtype=torch.float32)
X_scaled_val = torch.tensor(X_scaled_val, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

print("Handwriting train shape:", X_scaled_train.shape)
print("Handwriting val shape:", X_scaled_val.shape)


ValueError: could not convert string to float: 'H'

In [100]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch

# Load CSV
df = pd.read_csv("/kaggle/input/handwriting-data-to-detect-alzheimers-disease/data.csv")

# Drop ID column
df_numeric = df.drop(columns=["ID"])

# Check all columns are numeric
print(df_numeric.dtypes)

# Convert to numeric just in case
df_numeric = df_numeric.apply(pd.to_numeric, errors='coerce')

# Fill any missing values
df_numeric = df_numeric.fillna(0)

# Features X
X = df_numeric.values

# Labels: for now, dummy labels (replace with real if available)
y = torch.zeros(X.shape[0], dtype=torch.long)  # all zeros (healthy)
# If you have a column 'label':
# y = df['label'].values

# Train/val split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numeric features
scaler = StandardScaler()
X_scaled_train = scaler.fit_transform(X_train)
X_scaled_val = scaler.transform(X_val)

# Convert to torch tensors
X_scaled_train = torch.tensor(X_scaled_train, dtype=torch.float32)
X_scaled_val = torch.tensor(X_scaled_val, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

print("Handwriting train shape:", X_scaled_train.shape)
print("Handwriting val shape:", X_scaled_val.shape)


air_time1             int64
disp_index1         float64
gmrt_in_air1        float64
gmrt_on_paper1      float64
max_x_extension1      int64
                     ...   
paper_time25          int64
pressure_mean25     float64
pressure_var25      float64
total_time25          int64
class                object
Length: 451, dtype: object
Handwriting train shape: torch.Size([139, 451])
Handwriting val shape: torch.Size([35, 451])


  y_train = torch.tensor(y_train, dtype=torch.long)
  y_val = torch.tensor(y_val, dtype=torch.long)


In [102]:
train_hw_emb = model_hw(X_scaled_train.to(device), return_embedding=True)
val_hw_emb = model_hw(X_scaled_val.to(device), return_embedding=True)


TypeError: HandwritingMLP.forward() got an unexpected keyword argument 'return_embedding'

In [103]:
import torch.nn as nn

class HandwritingMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)  # this will be the embedding
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 2)    # output layer (binary classification)

    def forward(self, x, return_embedding=False):
        x = self.relu1(self.fc1(x))
        emb = self.relu2(self.fc2(x))  # embeddings
        out = self.fc3(emb)
        if return_embedding:
            return emb
        else:
            return out


In [104]:
input_dim = X_scaled_train.shape[1]  # number of features
model_hw = HandwritingMLP(input_dim)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_hw = model_hw.to(device)


In [105]:
train_hw_emb = model_hw(X_scaled_train.to(device), return_embedding=True)
val_hw_emb = model_hw(X_scaled_val.to(device), return_embedding=True)

print("Handwriting embeddings shape:", train_hw_emb.shape)


Handwriting embeddings shape: torch.Size([139, 64])


In [106]:
import torch.nn as nn

# Remove final fc layer to get embeddings
resnet_features = nn.Sequential(*list(resnet50.children())[:-1]).to(device)
resnet_features.eval()

def extract_mri_embeddings(dataloader):
    embeddings, labels = [], []
    with torch.no_grad():
        for imgs, lbls in dataloader:
            imgs = imgs.to(device)
            emb = resnet_features(imgs)  # [batch, 2048,1,1]
            emb = emb.view(emb.size(0), -1)  # flatten
            embeddings.append(emb.cpu())
            labels.append(lbls)
    return torch.cat(embeddings, dim=0), torch.cat(labels, dim=0)

train_mri_emb, train_mri_lbl = extract_mri_embeddings(train_loader)
val_mri_emb, val_mri_lbl = extract_mri_embeddings(val_loader)
print("Train MRI embeddings:", train_mri_emb.shape)


Train MRI embeddings: torch.Size([69149, 2048])


In [107]:
train_hw_emb = model_hw(X_scaled_train.to(device), return_embedding=True).cpu()
val_hw_emb = model_hw(X_scaled_val.to(device), return_embedding=True).cpu()
print("Train Handwriting embeddings:", train_hw_emb.shape)


Train Handwriting embeddings: torch.Size([139, 64])


# Create Fusion Data Loaders

In [108]:
from torch.utils.data import TensorDataset, DataLoader

# Concatenate MRI + handwriting embeddings
train_fusion_input = torch.cat([train_mri_emb, train_hw_emb], dim=1)
val_fusion_input = torch.cat([val_mri_emb, val_hw_emb], dim=1)

train_dataset_fusion = TensorDataset(train_fusion_input, train_mri_lbl)  # labels same as MRI labels
val_dataset_fusion = TensorDataset(val_fusion_input, val_mri_lbl)

train_loader_fusion = DataLoader(train_dataset_fusion, batch_size=16, shuffle=True)
val_loader_fusion = DataLoader(val_dataset_fusion, batch_size=16)


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 69149 but got size 139 for tensor number 1 in the list.

In [110]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

MRI_PATH = "/kaggle/input/imagesoasis/Data"  # path to MRI images

# Use ImageFolder
mri_dataset = datasets.ImageFolder(MRI_PATH, transform=transform)

# Split train/val
train_size = int(0.8 * len(mri_dataset))
val_size = len(mri_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(mri_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False)  # shuffle=False to keep order
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)



In [111]:
import os

slice_patient_ids = [os.path.basename(os.path.dirname(path)) for path, _ in mri_dataset.imgs]


In [112]:
from collections import defaultdict
import torch

# Assume resnet_features is your trained ResNet50 feature extractor
resnet_features.eval()

patient_emb_dict = defaultdict(list)
patient_label_dict = {}

with torch.no_grad():
    for idx, (img, lbl) in enumerate(mri_dataset):
        img = img.unsqueeze(0).to(device)  # add batch dim
        emb = resnet_features(img).view(1, -1).cpu()  # flatten
        patient_id = slice_patient_ids[idx]
        patient_emb_dict[patient_id].append(emb)
        patient_label_dict[patient_id] = lbl  # same for all slices

# Average embeddings per patient
train_mri_patient_emb = []
train_mri_patient_lbl = []

for patient_id in patient_emb_dict:
    emb = torch.cat(patient_emb_dict[patient_id], dim=0).mean(dim=0)
    train_mri_patient_emb.append(emb)
    train_mri_patient_lbl.append(patient_label_dict[patient_id])

train_mri_patient_emb = torch.stack(train_mri_patient_emb)
train_mri_patient_lbl = torch.tensor(train_mri_patient_lbl)


# Step 4: Fusion Network

We now concatenate MRI embeddings and handwriting embeddings for each patient.
The fused vector is input to a small MLP for final prediction.


## Fusion Network Definition

In [114]:
import torch
from collections import defaultdict
import os

# ResNet feature extractor (remove last fc layer)
resnet_features = torch.nn.Sequential(*list(resnet50.children())[:-1]).to(device)
resnet_features.eval()

# Extract patient IDs from dataset paths (assuming ImageFolder structure)
slice_patient_ids = [os.path.basename(os.path.dirname(path)) for path, _ in mri_dataset.imgs]

# Store embeddings per patient
patient_emb_dict = defaultdict(list)
patient_label_dict = {}

with torch.no_grad():
    for idx, (img, lbl) in enumerate(mri_dataset):
        img = img.unsqueeze(0).to(device)
        emb = resnet_features(img).view(1, -1).cpu()
        patient_id = slice_patient_ids[idx]
        patient_emb_dict[patient_id].append(emb)
        patient_label_dict[patient_id] = lbl  # all slices same label

# Average embeddings per patient
mri_patient_emb = []
mri_patient_lbl = []

for patient_id in patient_emb_dict:
    emb = torch.cat(patient_emb_dict[patient_id], dim=0).mean(dim=0)
    mri_patient_emb.append(emb)
    mri_patient_lbl.append(patient_label_dict[patient_id])

mri_patient_emb = torch.stack(mri_patient_emb)
mri_patient_lbl = torch.tensor(mri_patient_lbl)

print("MRI patient-level embeddings:", mri_patient_emb.shape)


MRI patient-level embeddings: torch.Size([4, 2048])


# Prepare Handwriting embeddings

In [115]:
# Assuming you already trained Handwriting MLP and have X_scaled_train/X_scaled_val
# Extract embeddings from trained handwriting model
hw_embeddings = model_hw(X_scaled_train.to(device), return_embedding=True).cpu()
hw_labels = torch.tensor(y_train)  # make sure labels match patients

print("Handwriting embeddings:", hw_embeddings.shape)


Handwriting embeddings: torch.Size([139, 64])


  hw_labels = torch.tensor(y_train)  # make sure labels match patients


In [119]:
import torch
import torch.nn as nn

# input dimension = MRI embedding dim + Handwriting embedding dim
fusion_input_dim = train_fusion_input.shape[1]  # make sure you have train_fusion_input defined
num_classes = len(mri_dataset.classes)

fusion_model = nn.Sequential(
    nn.Linear(fusion_input_dim, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, num_classes)
).to(device)

# Load trained weights if saved
# fusion_model.load_state_dict(torch.load("/kaggle/working/fusion_model.pth"))

fusion_model.eval()


NameError: name 'train_fusion_input' is not defined

In [121]:
import torch

# Make sure embeddings are torch tensors
train_mri_emb = train_mri_emb.float()  # e.g., [num_patients, 2048]
train_hw_emb = train_hw_emb.float()    # e.g., [num_patients, 10]

# Concatenate along feature dimension
train_fusion_input = torch.cat([train_mri_emb, train_hw_emb], dim=1)

# Labels
train_fusion_labels = train_labels

print("Fusion input shape:", train_fusion_input.shape)


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 69149 but got size 139 for tensor number 1 in the list.

In [120]:
import torch
import torch.nn as nn

# Make sure you have these variables defined from previous steps:
# train_fusion_input.shape[1] = input dimension (MRI + Handwriting embeddings)
# mri_dataset.classes = list of classes

fusion_input_dim = train_fusion_input.shape[1]  # total embedding dimension
num_classes = len(mri_dataset.classes)

fusion_model = nn.Sequential(
    nn.Linear(fusion_input_dim, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, num_classes)
).to(device)


NameError: name 'train_fusion_input' is not defined