<a href="https://colab.research.google.com/github/PChaudhary0403/Final_Year_Project/blob/main/notebooks/training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from google.colab import drive
drive.mount('/content/drive')
DATASET_PATH = "/content/drive/MyDrive/dataset/CBIS-DDSM"
import sys
sys.path.append("/content/drive/MyDrive/dataset")
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import timm
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import argparse
import warnings
warnings.filterwarnings('ignore')

# Import from our modules (you'd need to create these)
from bc_utils import (
    CBISDDSMDataset,
    BreastCancerClassifier,
    create_data_splits,
    EarlyStopping,
    save_checkpoint,
    load_checkpoint,
    train_epoch,
    validate
)

def main():
    parser = argparse.ArgumentParser(description='Train Breast Cancer Detection Model')
    parser.add_argument('--epochs', type=int, default=20, help='Number of training epochs')
    parser.add_argument('--batch_size', type=int, default=8, help='Batch size for training')
    parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
    parser.add_argument('--data_path', type=str, default='/content/dataset/CBIS-DDSM',
                        help='Path to CBIS-DDSM dataset')
    parser.add_argument('--checkpoint_path', type=str, default='/content/checkpoints/model.pth',
                        help='Path to save checkpoints')
    parser.add_argument('--model_path', type=str, default='/content/models/best_model.pth',
                        help='Path to save best model')
    parser.add_argument('--resume', action='store_true', help='Resume training from checkpoint')

    args = parser.parse_args()

    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Create data splits
    print("Loading dataset...")
    train_data, val_data, test_data = create_data_splits()

    # Data transforms (simplified)
    transform = transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Create datasets
    train_dataset = CBISDDSMDataset(*train_data, transform=transform, is_training=True)
    val_dataset = CBISDDSMDataset(*val_data, transform=transform, is_training=False)
    test_dataset = CBISDDSMDataset(*test_data, transform=transform, is_training=False)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,
                            num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False,
                          num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False,
                           num_workers=2, pin_memory=True)

    # Create model
    model = BreastCancerClassifier(num_classes=2).to(device)

    # Loss function and optimizer
    class_counts = np.bincount(train_data[1])
    class_weights = 1.0 / class_counts
    class_weights = class_weights / class_weights.sum() * len(class_weights)
    class_weights = torch.FloatTensor(class_weights).to(device)

    criterion = nn.CrossEntropyLoss(weight=class_weights)

    backbone_params = list(model.backbone.parameters())
    classifier_params = list(model.classifier.parameters())

    optimizer = optim.AdamW([
        {'params': backbone_params, 'lr': args.lr * 0.1},
        {'params': classifier_params, 'lr': args.lr}
    ], weight_decay=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=3, verbose=True
    )

    # Load checkpoint if resuming
    start_epoch, best_loss, best_accuracy = 0, float('inf'), 0.0
    if args.resume:
        start_epoch, best_loss, best_accuracy = load_checkpoint(
            model, optimizer, scheduler, args.checkpoint_path
        )

    # Training loop
    early_stopping = EarlyStopping(patience=5)
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    print("Starting training...")
    for epoch in range(start_epoch, args.epochs):
        print(f"\nEpoch {epoch+1}/{args.epochs}");

        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)

        # Validate
        val_loss, val_acc, _, _, _ = validate(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(".4f")
        # Update learning rate
        scheduler.step(val_acc)

        # Save checkpoint
        save_checkpoint(model, optimizer, scheduler, epoch+1, val_loss, val_acc, args.checkpoint_path)

        # Save best model
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            torch.save(model.state_dict(), args.model_path)
            print(".2f")
        # Early stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered!")
            break

    # Final evaluation
    print("\nEvaluating best model...")
    model.load_state_dict(torch.load(args.model_path, map_location=device))
    test_loss, test_acc, test_preds, test_labels, test_probs = validate(model, test_loader, criterion, device)

    print(".2f")
    # Calculate metrics
    from sklearn.metrics import precision_score, recall_score, f1_score
    precision = precision_score(test_labels, test_preds, average='weighted')
    recall = recall_score(test_labels, test_preds, average='weighted')
    f1 = f1_score(test_labels, test_preds, average='weighted')
    auc = roc_auc_score(test_labels, np.array(test_probs)[:, 1])

    print(".4f")
    print("Training completed successfully!")

if __name__ == "__main__":
    main()






Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


ModuleNotFoundError: No module named 'bc_utils'

In [4]:
import os

print("Exists?",
      os.path.exists("/content/drive/MyDrive/BreastCancerProject/Breast_Cancer_Detection_Colab.py"))

print("Files in folder:")
print(os.listdir("/content/drive/MyDrive/BreastCancerProject"))


Exists? True
Files in folder:
['Breast_Cancer_Detection_Colab.ipynb', 'Imagefolder', 'CBIS-DDSM', 'Breast_Cancer_Detection_Colab.py', '__pycache__']
