In [147]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import nibabel as nib
import pandas as pd
from torchvision import transforms
from tqdm import tqdm
import os
import datetime
import pandas as pd
import numpy as np
import nibabel as nib
import torch
from torch.utils.data import Dataset

In [148]:
df = pd.read_excel("references\Subject_info_balanced.xlsx")

In [149]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
import nibabel as nib

class NiiDataset(Dataset):
    def __init__(self, df, image_type='MRI_PET', transform=None):
        """
        Initializes the dataset object.
        :param df: DataFrame containing file paths, labels, and subject IDs.
        :param image_type: Type of images to load ('MRI_PET', 'MRI', or 'PET').
        :param transform: A function or a series of transforms to apply to the images.
        """
        self.image_type = image_type
        if image_type == 'MRI_PET':
            self.paths = df['PATH_MRI_PET'].tolist()
        elif image_type == 'MRI':
            self.paths = df['PATH_MRI'].tolist()
        elif image_type == 'PET':
            self.paths = df['PATH_PET'].tolist()
        self.labels = pd.Categorical(df['Research Group']).codes
        self.subjects = df['Subject'].tolist()
        self.transform = transform

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return len(self.paths)

    def __getitem__(self, idx):
        """
        Retrieve the nth sample from the dataset.
        """
        path = self.paths[idx]
        image = self.load_nii(path)
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        subject = self.subjects[idx]
        return image, label, path, subject

    def load_nii(self, path):
        """
        Load a NIfTI file and normalize its intensity.
        """
        image = nib.load(path).get_fdata(dtype=np.float32)
        image = self.normalize_intensity(image)
        image = np.expand_dims(image, axis=0)  # Add a channel dimension
        return image

    @staticmethod
    def normalize_intensity(image):
        """
        Normalize the image data to zero mean and unit variance.
        """
        mean_intensity = np.mean(image)
        std_intensity = np.std(image)
        normalized_image = (image - mean_intensity) / std_intensity
        return normalized_image


In [150]:
# Load datasets
def load_datasets(df, image_type):
    train_df = df[df['dataset_split'] == 'train']
    val_df = df[df['dataset_split'] == 'validation']
    test_df = df[df['dataset_split'] == 'test']
    
    train_dataset = NiiDataset(train_df)
    val_dataset = NiiDataset(val_df)
    test_dataset = NiiDataset(test_df)
    
    return train_dataset, val_dataset, test_dataset


In [151]:
def create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=4):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    return train_loader, val_loader, test_loader


In [152]:
# Model definition
class Baseline3DCNN(nn.Module):
    def __init__(self, num_classes=2, init_filters=32, kernel_size=3, stride=2, num_fc_units=128):
        super(Baseline3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(1, init_filters, kernel_size=kernel_size, stride=stride, padding=1)
        self.conv2 = nn.Conv3d(init_filters, init_filters*2, kernel_size=kernel_size, stride=stride, padding=1)
        self.conv3 = nn.Conv3d(init_filters*2, init_filters*4, kernel_size=kernel_size, stride=stride, padding=1)
        self.pool = nn.MaxPool3d(2)
        self.relu = nn.ReLU()

        # Compute the flattened size after all convolutions and pooling
        self.final_dim = self._get_conv_output_dim(193, 3, stride, kernel_size, init_filters*4)
        self.fc1 = nn.Linear(self.final_dim, num_fc_units)
        self.fc2 = nn.Linear(num_fc_units, num_classes)

    def _get_conv_output_dim(self, input_dim, num_convs, stride, kernel_size, num_filters):
        output_dim = input_dim
        for _ in range(num_convs):
            output_dim = ((output_dim - kernel_size + 2 * (kernel_size // 2)) // stride + 1) // 2  # Pooling divides size by 2
        return output_dim * output_dim * output_dim * num_filters

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [153]:
# Assuming 'df' is your DataFrame loaded with the 'Research Group' column available
label_categories = pd.Categorical(df['Research Group'])
label_mapping = {code: category for code, category in enumerate(label_categories.categories)}

In [154]:
def train_model(model, train_loader, criterion, optimizer, label_mapping, num_epochs=10, device='cuda'):
    model.to(device)
    model.train()
    train_results = []
    train_accuracies = []
    for epoch in range(num_epochs):
        correct = 0
        total = 0
        epoch_losses = []
        for images, labels, paths, subjects in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())

            _, predicted_indices = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted_indices == labels).sum().item()
            predicted_labels = [label_mapping[code] for code in predicted_indices.cpu().numpy()]

            for label, pred, path, subject in zip(labels.cpu().numpy(), predicted_labels, paths, subjects):
                train_results.append({
                    'Subject': subject,
                    'Path': path,
                    'Actual Label': label_mapping[label.item()],
                    'Prediction': pred,
                    'Type': 'Train'
                })
        
        avg_loss = sum(epoch_losses) / len(epoch_losses)
        epoch_accuracy = 100 * correct / total
        train_accuracies.append(epoch_accuracy)
        print(f"Average loss for Epoch {epoch+1}: {avg_loss:.4f} - Accuracy: {epoch_accuracy:.2f}%")

    return train_results, train_accuracies



def validate_model(model, val_loader, criterion, label_mapping, device='cpu'):
    model.to(device)
    model.eval()
    validation_results = []
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels, paths, subjects in tqdm(val_loader, desc='Validation'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted_indices = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted_indices == labels).sum().item()
            predicted_labels = [label_mapping[code] for code in predicted_indices.cpu().numpy()]

            for label, pred, path, subject in zip(labels.cpu().numpy(), predicted_labels, paths, subjects):
                validation_results.append({
                    'Subject': subject,
                    'Path': path,
                    'Actual Label': label_mapping[label.item()],
                    'Prediction': pred,
                    'Type': 'Validation'
                })
    accuracy = 100 * correct / total
    return validation_results, accuracy

def test_model(model, test_loader, label_mapping, device='cpu'):
    model.to(device)
    model.eval()
    test_results = []
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels, paths, subjects in tqdm(test_loader, desc='Testing'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted_indices = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted_indices == labels).sum().item()
            predicted_labels = [label_mapping[code] for code in predicted_indices.cpu().numpy()]

            for label, pred, path, subject in zip(labels.cpu().numpy(), predicted_labels, paths, subjects):
                test_results.append({
                    'Subject': subject,
                    'Path': path,
                    'Actual Label': label_mapping[label.item()],
                    'Prediction': pred,
                    'Type': 'Test'
                })
    accuracy = 100 * correct / total
    return test_results, accuracy



In [171]:

from openpyxl import load_workbook
import pandas as pd
import os
import datetime

def run_experiment(df, config):
    """Run the experiment with the given configuration on the preprocessed DataFrame."""
    train_dataset, val_dataset, test_dataset = load_datasets(df, config['image_type'])
    train_loader, val_loader, test_loader = create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=config['batch_size'])
    
    # Add the image type to the configuration output in Excel
    config['Image Type'] = config['image_type'] 
    
    # Initialize model and training components
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Baseline3DCNN(num_classes=config['num_classes'], init_filters=config['init_filters'],
                          kernel_size=config['kernel_size'], stride=config['stride'], num_fc_units=config['num_fc_units']).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    # Training and validation
    train_results, train_accuracies = train_model(model, train_loader, criterion, optimizer, label_mapping, config['num_epochs'], device)
    validate_results, val_accuracy = validate_model(model, val_loader, criterion, label_mapping, device)
    test_results, test_accuracy = test_model(model, test_loader, label_mapping, device)
    
    # Save detailed results to Excel
    current_time = datetime.datetime.now()
    formatted_time = current_time.strftime('%Y-%m-%d_%H-%M-%S')
    filename = os.path.join('reports', f'{formatted_time}_Experiment.xlsx')
    
    summary_data = {
        'Phase': ['Training', 'Validation', 'Testing'],
        'Accuracy': [train_accuracies[-1], val_accuracy, test_accuracy]
    }
    summary_df = pd.DataFrame(summary_data)
    all_results = pd.DataFrame(train_results + validate_results + test_results)
    config_df = pd.DataFrame([config])
    
    with pd.ExcelWriter(filename) as writer:
        config_df.to_excel(writer, sheet_name='Configuration')
        all_results.to_excel(writer, sheet_name='Results')
        summary_df.to_excel(writer, sheet_name='Summary')

    # Append a summary of this experiment to the cumulative RESULTS.xlsx file
    results_file = os.path.join('reports', 'RESULTS.xlsx')
    experiment_summary = {**config, **{'Training Accuracy': train_accuracies[-1], 'Validation Accuracy': val_accuracy, 'Test Accuracy': test_accuracy, 'DATETIME': formatted_time}}
    summary_row = pd.DataFrame([experiment_summary])

    if os.path.exists(results_file):
        with pd.ExcelWriter(results_file, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:
            summary_row.to_excel(writer, startrow=writer.sheets['Sheet1'].max_row, index=False, header=False)
    else:
        summary_row.to_excel(results_file, index=False)

    return filename, train_accuracies[-1], val_accuracy, test_accuracy

# Example of how to call run_experiment
# Assuming 'df' has been preprocessed already
config = {
    'num_classes': 2,
    'init_filters': 128,
    'kernel_size': 3,
    'stride': 2,
    'num_fc_units': 128,
    'optimizer': 'Adam',
    'loss_criterion': 'BCEWithLogitsLoss',
    'num_epochs': 2,
    'batch_size': 1
}



In [172]:
image_types = ['MRI_PET', 'MRI', 'PET']
results = []
for image_type in image_types:
    config['image_type'] = image_type
    result = run_experiment(df, config)
    results.append(result)


Epoch 1/2: 100%|██████████| 76/76 [00:10<00:00,  7.50it/s]


Average loss for Epoch 1: 1.1821 - Accuracy: 48.68%


Epoch 2/2: 100%|██████████| 76/76 [00:09<00:00,  7.61it/s]


Average loss for Epoch 2: 0.6942 - Accuracy: 50.00%


Validation: 100%|██████████| 26/26 [00:03<00:00,  8.46it/s]
Testing: 100%|██████████| 26/26 [00:03<00:00,  7.73it/s]


KeyError: "There is no item named '[Content_Types].xml' in the archive"

In [155]:
from openpyxl import load_workbook
import pandas as pd
import os

def append_to_excel(results_file, new_data):
    """Appends a DataFrame to an existing Excel file or creates a new one if it doesn't exist."""
    if os.path.exists(results_file):
        book = load_workbook(results_file)
        with pd.ExcelWriter(results_file, engine='openpyxl') as writer:
            writer.book = book
            writer.sheets = {ws.title: ws for ws in book.worksheets}
            existing_data = pd.read_excel(results_file)
            combined_data = pd.concat([existing_data, new_data], ignore_index=True, sort=False)
            combined_data.to_excel(writer, index=False, sheet_name='Sheet1')
    else:
        new_data.to_excel(results_file, index=False)


In [162]:
import pandas as pd
import os
import datetime
from openpyxl import load_workbook


def run_experiment(df, config):
    """Run the experiment with the given configuration on the preprocessed DataFrame."""
    train_dataset, val_dataset, test_dataset = load_datasets(df, config['image_type'])
    train_loader, val_loader, test_loader = create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=config['batch_size'])
    
    # Add the image type to the configuration output in Excel
    config['Image Type'] = config['image_type'] 
    
    # Initialize model and training components
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Baseline3DCNN(num_classes=config['num_classes'], init_filters=config['init_filters'],
                          kernel_size=config['kernel_size'], stride=config['stride'], num_fc_units=config['num_fc_units']).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    # Training and validation
    train_results, train_accuracies = train_model(model, train_loader, criterion, optimizer, label_mapping, config['num_epochs'], device)
    validate_results, val_accuracy = validate_model(model, val_loader, criterion, label_mapping, device)
    test_results, test_accuracy = test_model(model, test_loader, label_mapping, device)
    
    # Save detailed results to Excel
    current_time = datetime.datetime.now()
    formatted_time = current_time.strftime('%Y-%m-%d_%H-%M-%S')
    filename = os.path.join('reports', f'{formatted_time}_Experiment.xlsx')
    
    summary_data = {
        'Phase': ['Training', 'Validation', 'Testing'],
        'Accuracy': [train_accuracies[-1], val_accuracy, test_accuracy]
    }
    summary_df = pd.DataFrame(summary_data)
    all_results = pd.DataFrame(train_results + validate_results + test_results)
    config_df = pd.DataFrame([config])
    
    with pd.ExcelWriter(filename) as writer:
        config_df.to_excel(writer, sheet_name='Configuration')
        all_results.to_excel(writer, sheet_name='Results')
        summary_df.to_excel(writer, sheet_name='Summary')

# Append a summary of this experiment to the cumulative RESULTS.xlsx file
    results_file = os.path.join('reports', 'RESULTS.xlsx')
    experiment_summary = {**config, **{
        'Training Accuracy': train_accuracies[-1],
        'Validation Accuracy': val_accuracy,
        'Test Accuracy': test_accuracy,
        'DATETIME': formatted_time
    }}
    new_row = pd.DataFrame([experiment_summary])

    if os.path.exists(results_file):
        # Load the existing Excel file and sheet
        book = load_workbook(results_file)
        with pd.ExcelWriter(results_file, engine='openpyxl') as writer:
            writer.book = book
            writer.sheets = {ws.title: ws for ws in book.worksheets}

            # Read existing data
            existing_data = pd.read_excel(results_file)
            
            # Combine new row with existing DataFrame, aligning on columns
            combined_data = pd.concat([existing_data, new_row], ignore_index=True, sort=False)

            # Write updated DataFrame to Excel, replacing old data
            combined_data.to_excel(writer, index=False, sheet_name='Sheet1')
            writer.save()
    else:
        # If the file does not exist, simply write the new DataFrame
        new_row.to_excel(results_file, index=False)

    return filename, train_accuracies[-1], val_accuracy, test_accuracy

# Example of how to call run_experiment
# Assuming 'df' has been preprocessed already
config = {
    'num_classes': 2,
    'init_filters': 128,
    'kernel_size': 3,
    'stride': 2,
    'num_fc_units': 128,
    'optimizer': 'Adam',
    'loss_criterion': 'BCEWithLogitsLoss',
    'num_epochs': 1,
    'batch_size': 1
}


In [164]:
import pandas as pd
import os
import datetime
from openpyxl import load_workbook

def save_experiment_results(config, train_results, train_accuracies, validate_results, val_accuracy, test_results, test_accuracy):
    """Saves detailed results of an experiment to an Excel file and appends a summary to a cumulative results file."""
    # Setup file paths and names
    current_time = datetime.datetime.now()
    formatted_time = current_time.strftime('%Y-%m-%d_%H-%M-%S')
    detailed_filename = os.path.join('reports', f'{formatted_time}_Experiment.xlsx')
    summary_results_file = os.path.join('reports', 'RESULTS.xlsx')

    # Prepare dataframes for detailed results
    summary_data = {'Phase': ['Training', 'Validation', 'Testing'], 'Accuracy': [train_accuracies[-1], val_accuracy, test_accuracy]}
    summary_df = pd.DataFrame(summary_data)
    all_results = pd.DataFrame(train_results + validate_results + test_results)
    config_df = pd.DataFrame([config])
    
    # Write detailed results to Excel
    with pd.ExcelWriter(detailed_filename) as writer:
        config_df.to_excel(writer, sheet_name='Configuration')
        all_results.to_excel(writer, sheet_name='Results')
        summary_df.to_excel(writer, sheet_name='Summary')

    # Prepare summary row for cumulative results
    experiment_summary = {**config, **{
        'Training Accuracy': train_accuracies[-1],
        'Validation Accuracy': val_accuracy,
        'Test Accuracy': test_accuracy,
        'DATETIME': formatted_time
    }}
    summary_row = pd.DataFrame([experiment_summary])

    # Append summary row to the cumulative RESULTS.xlsx file
    append_to_excel(summary_results_file, summary_row)

    return detailed_filename

def append_to_excel(results_file, new_data):
    """Appends a DataFrame to an existing Excel file or creates a new one if it doesn't exist."""
    if os.path.exists(results_file):
        book = load_workbook(results_file)
        with pd.ExcelWriter(results_file, engine='openpyxl') as writer:
            writer.book = book
            writer.sheets = {ws.title: ws for ws in book.worksheets}
            existing_data = pd.read_excel(results_file)
            combined_data = pd.concat([existing_data, new_data], ignore_index=True, sort=False)
            combined_data.to_excel(writer, index=False, sheet_name='Sheet1')
    else:
        new_data.to_excel(results_file, index=False)


In [165]:
def run_experiment(df, config):
    """Run the experiment with the given configuration on the preprocessed DataFrame."""
    train_dataset, val_dataset, test_dataset = load_datasets(df, config['image_type'])
    train_loader, val_loader, test_loader = create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=config['batch_size'])
    
    # Initialize model and training components
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = Baseline3DCNN(num_classes=config['num_classes'], init_filters=config['init_filters'],
                          kernel_size=config['kernel_size'], stride=config['stride'], num_fc_units=config['num_fc_units']).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    # Training and validation
    train_results, train_accuracies = train_model(model, train_loader, criterion, optimizer, config['num_epochs'], device=device)
    validate_results, val_accuracy = validate_model(model, val_loader, criterion, device=device)
    test_results, test_accuracy = test_model(model, test_loader, device=device)
    
    # Save results to Excel
    filename = save_experiment_results(config, train_results, train_accuracies, validate_results, val_accuracy, test_results, test_accuracy)

    return filename, train_accuracies[-1], val_accuracy, test_accuracy

config = {
    'num_classes': 2,
    'init_filters': 128,
    'kernel_size': 3,
    'stride': 2,
    'num_fc_units': 128,
    'optimizer': 'Adam',
    'loss_criterion': 'BCEWithLogitsLoss',
    'num_epochs': 1,
    'batch_size': 1
}
