## Setup and Imports

In [1]:
import os
import sys
import json
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset

# Add the src directory to Python path
src_path = os.path.join(os.getcwd(), 'src')
if src_path not in sys.path:
    sys.path.append(src_path)

# Import Custom Modules
import pre_processing as pp
from ResNet18 import ResNet18_Grayscale
from engine import train_model, validate, train_one_epoch

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úÖ Running on device: {device}")

# Create results directory
os.makedirs('results/models', exist_ok=True)
print("üìÅ Results directory created.")

‚úÖ Running on device: cpu
üìÅ Results directory created.


## Data Loading and Preprocessing

In [2]:
print("üìä Loading Data...")

# Load and split data
X, Y, Z, proba = pp.get_data()

# Standard Split (Train/Val/Test)
X_train, Y_train, Z_train, X_val, Y_val, Z_val, X_test, Y_test, Z_test = pp.split_data(X, Y, Z)

print(f"Train Data: {X_train.shape}")
print(f"Val Data:   {X_val.shape}")
print(f"Test Data:  {X_test.shape}")

# Helper to create DataLoaders
def get_dataloader(X, y, batch_size=32, shuffle=True):
    if X.ndim == 3: 
        X = np.expand_dims(X, axis=1)
    # Convert to tensor
    tensor_x = torch.Tensor(X)
    tensor_y = torch.Tensor(y)
    dataset = TensorDataset(tensor_x, tensor_y)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

print("‚úÖ Data loaded and helper functions defined.")

üìä Loading Data...
Train Data: (2099, 128, 128)
Val Data:   (459, 128, 128)
Test Data:  (66, 128, 128)
‚úÖ Data loaded and helper functions defined.


## Hyperparameter Tuning

In [3]:
print("\nüîç STARTING HYPERPARAMETER TUNING (on Baseline Data)...")

# Grid to search
learning_rates = [1e-3, 1e-4, 1e-5]
batch_sizes = [16, 32]
tune_epochs = 5  # Short runs to check convergence

best_val_f1 = 0.0
best_params = {'lr': 1e-4, 'batch_size': 32}  # Default

for lr in learning_rates:
    for bs in batch_sizes:
        print(f"  Testing LR={lr}, Batch={bs}...", end="")

        # Create loaders
        tune_train_loader = get_dataloader(X_train, Y_train, batch_size=bs)
        tune_val_loader = get_dataloader(X_val, Y_val, batch_size=bs, shuffle=False)

        # Init simple model for tuning
        model = ResNet18_Grayscale(num_classes=1).to(device)
        config = {'lr': lr, 'epochs': tune_epochs}

        # Train (suppress output for cleanliness)
        _, hist = train_model(model, tune_train_loader, tune_val_loader, config, device)

        # Check result
        max_f1 = max(hist['val_f1'])
        print(f" Best F1: {max_f1:.4f}")

        if max_f1 > best_val_f1:
            best_val_f1 = max_f1
            best_params = {'lr': lr, 'batch_size': bs}

print(f"‚úÖ Tuning Complete. Best Params: {best_params}")


üîç STARTING HYPERPARAMETER TUNING (on Baseline Data)...
  Testing LR=0.001, Batch=16...Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/kiansorosh/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 44.7M/44.7M [00:08<00:00, 5.77MB/s]


Training on cpu for 5 epochs...
Ep 1: TrLoss 0.5602 | Val F1 0.6245 | ValLoss 0.6473 | LR 0.001000
Ep 2: TrLoss 0.4370 | Val F1 0.6788 | ValLoss 0.4028 | LR 0.001000
Ep 3: TrLoss 0.4443 | Val F1 0.5316 | ValLoss 0.5884 | LR 0.001000
Ep 4: TrLoss 0.4161 | Val F1 0.6131 | ValLoss 0.3712 | LR 0.001000
Ep 5: TrLoss 0.3891 | Val F1 0.6977 | ValLoss 0.4326 | LR 0.001000
 Best F1: 0.6977
  Testing LR=0.001, Batch=32...Training on cpu for 5 epochs...
Ep 1: TrLoss 0.5217 | Val F1 0.4554 | ValLoss 0.5359 | LR 0.001000
Ep 2: TrLoss 0.4377 | Val F1 0.6381 | ValLoss 0.4207 | LR 0.001000
Ep 3: TrLoss 0.3902 | Val F1 0.6791 | ValLoss 0.3401 | LR 0.001000
Ep 4: TrLoss 0.3403 | Val F1 0.6891 | ValLoss 0.3830 | LR 0.001000
Ep 5: TrLoss 0.3174 | Val F1 0.7376 | ValLoss 0.3448 | LR 0.001000
 Best F1: 0.7376
  Testing LR=0.0001, Batch=16...Training on cpu for 5 epochs...
Ep 1: TrLoss 0.4630 | Val F1 0.5897 | ValLoss 0.4644 | LR 0.000100
Ep 2: TrLoss 0.3275 | Val F1 0.6244 | ValLoss 0.4200 | LR 0.000100
Ep 

KeyboardInterrupt: 

## Baseline Model Training

In [None]:
print("\nüöÄ STARTING BASELINE TRAINING...")

# Setup
final_config = {
    'lr': best_params['lr'],
    'epochs': 20  # Full training duration
}
train_loader = get_dataloader(X_train, Y_train, batch_size=best_params['batch_size'])
val_loader = get_dataloader(X_val, Y_val, batch_size=best_params['batch_size'], shuffle=False)

# Train
model_base = ResNet18_Grayscale(num_classes=1).to(device)
model_base, hist_base = train_model(model_base, train_loader, val_loader, final_config, device)

# Save Weights
torch.save(model_base.state_dict(), 'results/models/baseline_best.pth')
print("üíæ Baseline weights saved.")

## Save Training History

In [None]:
# Define the file path for the training history
history_filepath = 'results/baseline_training_history.json'

# Save the hist_base dictionary to a JSON file
with open(history_filepath, 'w') as f:
    json.dump(hist_base, f)

print(f"üíæ Training history saved to {history_filepath}")

## Visualize Training Metrics

In [None]:
epochs = range(1, len(hist_base['train_loss']) + 1)

# Plotting Training Loss
plt.figure(figsize=(10, 5))
plt.plot(epochs, hist_base['train_loss'], label='Training Loss', color='blue')
plt.title('Baseline Model Training Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
training_loss_filepath = 'results/baseline_training_loss.png'
plt.savefig(training_loss_filepath)
plt.show()
print(f"üìä Training loss plot saved to {training_loss_filepath}")

# Plotting Validation F1-score
plt.figure(figsize=(10, 5))
plt.plot(epochs, hist_base['val_f1'], label='Validation F1-score', color='green')
plt.title('Baseline Model Validation F1-score over Epochs')
plt.xlabel('Epochs')
plt.ylabel('F1-score')
plt.grid(True)
plt.legend()
validation_f1_filepath = 'results/baseline_validation_f1.png'
plt.savefig(validation_f1_filepath)
plt.show()
print(f"üìä Validation F1-score plot saved to {validation_f1_filepath}")

## Improved Training Configuration

### Define Updated Configuration
Define an updated configuration dictionary that includes parameters for a learning rate scheduler and early stopping.

In [None]:
improved_config = {
    'lr': best_params['lr'],
    'epochs': 50,  # Set a sufficiently large number of epochs, as early stopping will manage the actual training duration
    'scheduler_patience': 5, # Patience for ReduceLROnPlateau
    'scheduler_factor': 0.5, # Factor by which the learning rate will be reduced
    'early_stopping_patience': 5, # Number of epochs with no improvement after which training will be stopped
    'early_stopping_min_delta': 0.001 # Minimum change to be considered an improvement
}

print("Improved training configuration defined:")
for key, value in improved_config.items():
    print(f"  {key}: {value}")

## Enhanced Engine Module

The engine.py module has been updated to include:
- EarlyStopping class for preventing overfitting
- Learning rate scheduler integration
- Enhanced training loop with validation loss tracking

In [None]:
# Display the current engine.py content
with open('src/engine.py', 'r') as f:
    engine_content = f.read()
    print("Current engine.py content:")
    print("=" * 50)
    print(engine_content[:1000] + "..." if len(engine_content) > 1000 else engine_content)

## Training with Improvements

Train the model with learning rate scheduling and early stopping.

In [None]:
print("\nüöÄ STARTING TRAINING WITH LR SCHEDULER AND EARLY STOPPING...")

# Re-import modules to ensure we have the latest version
import importlib
import engine
importlib.reload(engine)
from engine import train_model

# Initialize a new model for improved training
model_improved = ResNet18_Grayscale(num_classes=1).to(device)

# Prepare data loaders using the best batch size from tuning
train_loader_improved = get_dataloader(X_train, Y_train, batch_size=best_params['batch_size'])
val_loader_improved = get_dataloader(X_val, Y_val, batch_size=best_params['batch_size'], shuffle=False)

# Train the model with the improved configuration
model_improved, hist_improved = train_model(model_improved, train_loader_improved, val_loader_improved, improved_config, device)

# Save the weights of the improved model
torch.save(model_improved.state_dict(), 'results/models/improved_best.pth')
print("üíæ Improved model weights saved to results/models/improved_best.pth")

## Save Improved Training History

In [None]:
# Define the file path for the improved model's training history
history_filepath_improved = 'results/improved_training_history.json'

# Save the hist_improved dictionary to a JSON file
with open(history_filepath_improved, 'w') as f:
    json.dump(hist_improved, f)

print(f"üíæ Improved training history saved to {history_filepath_improved}")

## Visualize Improved Training Metrics

In [None]:
epochs_improved = range(1, len(hist_improved['train_loss']) + 1)

# Plotting Training Loss for Improved Model
plt.figure(figsize=(10, 5))
plt.plot(epochs_improved, hist_improved['train_loss'], label='Training Loss', color='purple')
plt.title('Improved Model Training Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
training_loss_filepath_improved = 'results/improved_training_loss.png'
plt.savefig(training_loss_filepath_improved)
plt.show()
print(f"üìä Improved training loss plot saved to {training_loss_filepath_improved}")

# Plotting Validation F1-score for Improved Model
plt.figure(figsize=(10, 5))
plt.plot(epochs_improved, hist_improved['val_f1'], label='Validation F1-score', color='orange')
plt.title('Improved Model Validation F1-score over Epochs')
plt.xlabel('Epochs')
plt.ylabel('F1-score')
plt.grid(True)
plt.legend()
validation_f1_filepath_improved = 'results/improved_validation_f1.png'
plt.savefig(validation_f1_filepath_improved)
plt.show()
print(f"üìä Improved validation F1-score plot saved to {validation_f1_filepath_improved}")

# If validation loss is available, plot it too
if 'val_loss' in hist_improved:
    plt.figure(figsize=(10, 5))
    plt.plot(epochs_improved, hist_improved['val_loss'], label='Validation Loss', color='red')
    plt.title('Improved Model Validation Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.legend()
    validation_loss_filepath_improved = 'results/improved_validation_loss.png'
    plt.savefig(validation_loss_filepath_improved)
    plt.show()
    print(f"üìä Improved validation loss plot saved to {validation_loss_filepath_improved}")

## Model Evaluation on Test Set

In [None]:
# Evaluate both models on test set
test_loader = get_dataloader(X_test, Y_test, batch_size=best_params['batch_size'], shuffle=False)

print("\nüìä EVALUATING MODELS ON TEST SET...")

# Evaluate baseline model
from engine import validate
criterion = torch.nn.BCEWithLogitsLoss()

print("\nBaseline Model Test Results:")
baseline_test_metrics = validate(model_base, test_loader, criterion, device)
for metric, value in baseline_test_metrics.items():
    print(f"  {metric}: {value:.4f}")

print("\nImproved Model Test Results:")
improved_test_metrics = validate(model_improved, test_loader, criterion, device)
for metric, value in improved_test_metrics.items():
    print(f"  {metric}: {value:.4f}")

# Save test results
test_results = {
    'baseline': baseline_test_metrics,
    'improved': improved_test_metrics,
    'best_params': best_params
}

with open('results/test_results.json', 'w') as f:
    json.dump(test_results, f, indent=2)

print("\nüíæ Test results saved to results/test_results.json")

## Summary

### Training Complete!

This notebook has successfully:

1. **Hyperparameter Tuning**: Found optimal learning rate and batch size
2. **Baseline Training**: Trained a simple ResNet18 model
3. **Enhanced Training**: Implemented learning rate scheduling and early stopping
4. **Model Evaluation**: Compared both models on the test set
5. **Results Saved**: All models, histories, and plots saved to `results/` directory

### Files Generated:
- `results/models/baseline_best.pth` - Baseline model weights
- `results/models/improved_best.pth` - Improved model weights
- `results/baseline_training_history.json` - Baseline training history
- `results/improved_training_history.json` - Improved training history
- `results/test_results.json` - Test set evaluation results
- Various plots in `results/` directory

The improved model with learning rate scheduling and early stopping should show better convergence and potentially better performance than the baseline model.