In [1]:
import pandas as pd

In [5]:
df = pd.read_excel(r"C:\Users\Micha\OneDrive - Høyskolen Kristiania\MsC Business Analytics\Master Thesis\Python\Subject_info.xlsx")

In [6]:
# Assuming df is your DataFrame
def preprocess_labels(df):
    # Mapping similar categories to a single category
    label_mapping = {
        'EMCI': 'MCI',
        'LMCI': 'MCI',
        'SMC': 'CN'  # If you want SMC to be considered as CN, include this; remove if not
    }
    df['Research Group'] = df['Research Group'].replace(label_mapping)
    return df

preprocess_labels(df)

Unnamed: 0,Subject,Sex,Weight,Research Group,APOE A1,APOE A2,Age,dataset_split,File_Path,File_Path_desktop
0,002_S_0295,M,73.0,CN,3.0,4.0,90.0,train,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\002_S_0295_f...
1,002_S_0413,F,57.6,CN,3.0,3.0,81.5,test,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\002_S_0413_f...
2,002_S_0685,F,68.9,CN,3.0,3.0,95.8,test,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\002_S_0685_f...
3,002_S_0729,F,65.8,MCI,3.0,4.0,71.3,validation,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\002_S_0729_f...
4,002_S_1155,M,64.9,MCI,3.0,3.0,64.0,train,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\002_S_1155_f...
...,...,...,...,...,...,...,...,...,...,...
173,941_S_4377,F,121.6,MCI,3.0,4.0,69.5,test,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\941_S_4377_f...
174,941_S_4420,M,79.4,MCI,3.0,3.0,81.5,train,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\941_S_4420_f...
175,941_S_4764,F,77.6,MCI,3.0,3.0,82.8,train,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\941_S_4764_f...
176,941_S_5124,F,78.9,CN,3.0,3.0,76.8,test,C:\Users\Micha\OneDrive - Høyskolen Kristiania...,D:\Data\Preprocessed\Fused Images\941_S_5124_f...


In [8]:
import pandas as pd

# Assuming df is your DataFrame
df = df[df['Research Group'] != 'AD']


In [13]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import nibabel as nib
import pandas as pd
from torchvision import transforms


In [14]:
class NiiDataset(Dataset):
    def __init__(self, df, transform=None):
        """
        Initializes the dataset object.
        :param df: DataFrame containing file paths, labels, and subject IDs.
        :param transform: A function or a series of transforms to apply to the images.
        """
        self.paths = df['File_Path_desktop'].tolist()  # Paths to .nii files
        # Convert labels to categorical codes and maintain a mapping
        self.label_mapping = {category: code for code, category in enumerate(pd.Categorical(df['Research Group']).categories)}
        self.labels = pd.Categorical(df['Research Group'], categories=self.label_mapping.keys()).codes
        self.subjects = df['Subject'].tolist()  # Subject identifiers
        self.transform = transform  # Transformation function(s)

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return len(self.paths)

    def __getitem__(self, idx):
        """
        Retrieves the nii image at the specified index, applies transformations, and returns it along with its label and subject ID.
        :param idx: Index of the image to retrieve.
        :return: tuple containing the transformed image, its numeric label, the file path, and the subject ID.
        """
        path = self.paths[idx]
        image = nib.load(path).get_fdata()  # Load the image data
        image = np.expand_dims(image, axis=0)  # Add a channel dimension

        if self.transform:
            image = self.transform(image)  # Apply transformation

        label = torch.tensor(self.labels[idx], dtype=torch.long)
        subject = self.subjects[idx]  # Retrieve the subject ID
        
        return image, label, subject


In [15]:
# Load datasets
def load_datasets(df):
    train_df = df[df['dataset_split'] == 'train']
    val_df = df[df['dataset_split'] == 'validation']
    test_df = df[df['dataset_split'] == 'test']
    
    train_dataset = NiiDataset(train_df)
    val_dataset = NiiDataset(val_df)
    test_dataset = NiiDataset(test_df)
    
    return train_dataset, val_dataset, test_dataset


In [16]:
def create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=4):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return train_loader, val_loader, test_loader


In [17]:
# Model definition
class Baseline3DCNN(nn.Module):
    def __init__(self, num_classes=3, init_filters=32, kernel_size=3, stride=2, num_fc_units=128):
        super(Baseline3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(1, init_filters, kernel_size=kernel_size, stride=stride, padding=1)
        self.conv2 = nn.Conv3d(init_filters, init_filters*2, kernel_size=kernel_size, stride=stride, padding=1)
        self.conv3 = nn.Conv3d(init_filters*2, init_filters*4, kernel_size=kernel_size, stride=stride, padding=1)
        self.pool = nn.MaxPool3d(2)
        self.relu = nn.ReLU()

        # Compute the flattened size after all convolutions and pooling
        self.final_dim = self._get_conv_output_dim(193, 3, stride, kernel_size, init_filters*4)
        self.fc1 = nn.Linear(self.final_dim, num_fc_units)
        self.fc2 = nn.Linear(num_fc_units, num_classes)

    def _get_conv_output_dim(self, input_dim, num_convs, stride, kernel_size, num_filters):
        output_dim = input_dim
        for _ in range(num_convs):
            output_dim = ((output_dim - kernel_size + 2 * (kernel_size // 2)) // stride + 1) // 2  # Pooling divides size by 2
        return output_dim * output_dim * output_dim * num_filters

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [18]:
# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Assuming Baseline3DCNN is defined as provided above
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model
model = Baseline3DCNN(num_classes=3, init_filters=32, kernel_size=3, stride=2, num_fc_units=128)
model.to(device)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training function
def train(model, train_loader, criterion, optimizer, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels, subjects in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')

# Evaluation function
def evaluate(model, loader, device):
    model.eval()
    total = correct = 0
    with torch.no_grad():
        for images, labels, subjects in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total}%')


In [None]:
train_dataset, val_dataset, test_dataset = load_datasets(df)  # Create datasets
train_loader, val_loader, test_loader = create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=4)  # Create dataloaders