In [20]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import nibabel as nib
import pandas as pd
from torchvision import transforms
from tqdm import tqdm
import os
import datetime
import pandas as pd
import numpy as np
import nibabel as nib
import torch
from torch.utils.data import Dataset

In [21]:
import importlib.util
import sys

# Specify the full path to the module file
module_path = 'D:\\Github Folder\\MasterThesis\\notebooks\\FUNCTIONS.py'

# Load the module
spec = importlib.util.spec_from_file_location("FUNCTIONS", module_path)
functions = importlib.util.module_from_spec(spec)
spec.loader.exec_module(functions)

# Now you can use the functions as if you had imported them
load_datasets = functions.load_datasets
create_dataloaders = functions.create_dataloaders
train_and_validate = functions.train_and_validate
test_model = functions.test_model


In [22]:
df = pd.read_excel("references\Subject_info_balanced.xlsx")

In [23]:
# Assuming 'df' is your DataFrame loaded with the 'Research Group' column available
label_categories = pd.Categorical(df['Research Group'])
label_mapping = {code: category for code, category in enumerate(label_categories.categories)}

In [24]:
def train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=5, device='cuda'):
    model.to(device)
    best_val_loss = float('inf')
    epochs_no_improve_loss = 0

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        train_epoch_losses = []

        for images, _, _, _ in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} - Train'):
            images = images.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, images)  # Note the target here is 'images' itself
            loss.backward()
            optimizer.step()
            train_epoch_losses.append(loss.item())

        print(f'Epoch {epoch+1}: Train Loss: {sum(train_epoch_losses) / len(train_epoch_losses):.4f}')
        
        # Validation phase at the end of each epoch
        model.eval()  # Set model to evaluation mode
        val_epoch_losses = []
        with torch.no_grad():
            for images, _, _, _ in tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} - Validate'):
                images = images.to(device)
                outputs = model(images)
                val_loss = criterion(outputs, images)
                val_epoch_losses.append(val_loss.item())

        val_avg_loss = sum(val_epoch_losses) / len(val_epoch_losses)
        print(f'Epoch {epoch+1}: Validation Loss: {val_avg_loss:.4f}')
        
        # Early stopping logic based on loss
        if val_avg_loss < best_val_loss:
            best_val_loss = val_avg_loss
            epochs_no_improve_loss = 0
        else:
            epochs_no_improve_loss += 1
        
        if epochs_no_improve_loss >= patience:
            print(f'Early stopping triggered after {epoch + 1} epochs due to no improvement in validation loss.')
            break



In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SparseAutoencoder3D(nn.Module):
    def __init__(self):
        super(SparseAutoencoder3D, self).__init__()
        # Encoder
        self.enc_conv1 = nn.Conv3d(1, 32, kernel_size=3, stride=1, padding=1)
        self.enc_conv2 = nn.Conv3d(32, 64, kernel_size=3, stride=2, padding=1)
        self.enc_conv3 = nn.Conv3d(64, 128, kernel_size=3, stride=2, padding=1)
        self.bottleneck = nn.Conv3d(128, 256, kernel_size=3, stride=1, padding=1)

        # Decoder
        self.dec_conv1 = nn.ConvTranspose3d(256, 128, kernel_size=3, stride=1, padding=1)
        self.dec_conv2 = nn.ConvTranspose3d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
        self.dec_conv3 = nn.ConvTranspose3d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1)
        self.dec_conv4 = nn.Conv3d(32, 1, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        # Encoder
        x = F.relu(self.enc_conv1(x))
        x = F.relu(self.enc_conv2(x))
        x = F.relu(self.enc_conv3(x))
        x = self.bottleneck(x)
        x = F.dropout(x, p=0.1, training=self.training)  # Applying dropout for inducing sparsity

        # Decoder
        x = F.relu(self.dec_conv1(x))
        x = F.relu(self.dec_conv2(x))
        x = F.relu(self.dec_conv3(x))
        x = torch.sigmoid(self.dec_conv4(x))  # Sigmoid activation for output
        return x

# Model instantiation
model = SparseAutoencoder3D()
print(model)

# Example tensor for checking
example_input = torch.rand((1, 1, 193, 229, 193))  # Batch size of 1
output = model(example_input)
print(output.shape)  # Should output torch.Size([1, 1, 193, 229, 193])


SparseAutoencoder3D(
  (enc_conv1): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (enc_conv2): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
  (enc_conv3): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
  (bottleneck): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (dec_conv1): ConvTranspose3d(256, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (dec_conv2): ConvTranspose3d(128, 64, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1), output_padding=(1, 1, 1))
  (dec_conv3): ConvTranspose3d(64, 32, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1), output_padding=(1, 1, 1))
  (dec_conv4): Conv3d(32, 1, kernel_size=(1, 1, 1), stride=(1, 1, 1))
)
torch.Size([1, 1, 196, 232, 196])


In [26]:
from openpyxl import load_workbook
import pandas as pd
import os
import datetime
import torch
import torch.nn as nn
import torch.optim as optim


def run_experiment(df, config):
    """Run the experiment with the given configuration on the preprocessed DataFrame."""
    train_dataset, val_dataset, test_dataset = load_datasets(df, config['image_type'], sample_size=config['sample_size'])
    train_loader, val_loader, test_loader = create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=config['batch_size'])
    
    # Initialize model and training components
    device = torch.device("cuda")
    model = SparseAutoencoder3D().to(device)
    criterion = nn.MSELoss()  # Change this for the autoencoder
    optimizer = optim.Adam(model.parameters(), lr=config['lr'])

    # Training and validation    
    train_accuracies, val_accuracies, val_losses = train_and_validate(model, train_loader, val_loader, criterion, optimizer, config['num_epochs'], config['patience'], device)
    torch.save(model.encoder.state_dict(), 'encoder_only.pth')
    test_results, test_accuracy = test_model(model, test_loader, label_mapping, device)
    
    
    
    
  # Save detailed results to Excel
    current_time = datetime.datetime.now()
    formatted_time = current_time.strftime('%Y-%m-%d_%H-%M-%S')
    filename = os.path.join('reports', f'{formatted_time}_Experiment.xlsx')
    
    summary_data = {
        'Phase': ['Training', 'Validation', 'Testing'],
        'Accuracy': [train_accuracies[-1], val_accuracies[-1], test_accuracy]
    }
    summary_df = pd.DataFrame(summary_data)
    all_results = pd.DataFrame(test_results)
    config_df = pd.DataFrame([config])
    
    with pd.ExcelWriter(filename) as writer:
        config_df.to_excel(writer, sheet_name='Configuration')
        all_results.to_excel(writer, sheet_name='Results')
        summary_df.to_excel(writer, sheet_name='Summary')

    # Append a summary of this experiment to the cumulative RESULTS.xlsx file
    results_file = os.path.join('reports', 'RESULTS.xlsx')
    experiment_summary = {**config, **{'Training Accuracy': train_accuracies[-1], 'Validation Accuracy': val_accuracies[-1], 'Test Accuracy': test_accuracy, 'DATETIME': formatted_time}}
    summary_row = pd.DataFrame([experiment_summary])

    if os.path.exists(results_file):
        with pd.ExcelWriter(results_file, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:
            existing_df = pd.read_excel(results_file)
            # Reindex the existing DataFrame to ensure all columns are aligned and add new columns if necessary
            combined_df = pd.concat([existing_df, summary_row], ignore_index=True)
            combined_df = combined_df.reindex(columns=(existing_df.columns.tolist() + [col for col in summary_row.columns if col not in existing_df.columns]))
            combined_df.to_excel(writer, index=False, sheet_name='Sheet1')
    else:
        summary_row.to_excel(results_file, index=False)

    return filename, train_accuracies[-1], val_accuracies[-1], test_accuracy

# Example configuration and use case
config = {
    'loss_criterion': 'MSELoss',  # Use mean squared error loss
    'num_epochs': 50,
    'batch_size': 2,
    'patience': 10,
    'lr': 0.001,
    'Description': 'Autoencoder Phase 1',
    'sample_size': 4
}

In [27]:
image_types = ['MRI_PET', 'MRI', 'PET']
results = []
for image_type in image_types:
    config['image_type'] = image_type
    print(f"Working on image type: {image_type}")
    result = run_experiment(df, config)
    results.append(result)


Working on image type: MRI_PET


Epoch 1/50 - Train:   0%|          | 0/2 [00:47<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.04 GiB. GPU 