In [1]:
# ## 1. Preparation: Load Data and Define Dataset Class

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# --- 1a. Redefine the Dataset Class (copied from the EDA Notebook) ---
class CancerDataset(Dataset):
    """Custom Dataset for histopathologic cancer detection."""
    def __init__(self, df, data_path, transform=None):
        self.df = df
        self.data_path = data_path
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        image_id = self.df.iloc[idx]['id']
        label = self.df.iloc[idx]['label']
        image_path = os.path.join(self.data_path, 'train', f'{image_id}.tif')
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# --- 1b. Load data and create train/validation splits ---
DATA_PATH = 'E:\data\histopathologic' # ‼️ Make sure this path is correct
df_labels = pd.read_csv(os.path.join(DATA_PATH, 'train_labels.csv'))

# To evaluate the model, we need to split our data into a training and a validation set.
# stratify=df_labels['label'] ensures that the class distribution is the same in both splits.
train_df, val_df = train_test_split(df_labels, test_size=0.2, random_state=42, stratify=df_labels['label'])

print(f"Original dataset size: {len(df_labels)}")
print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")

# --- 1c. Define Image Transforms ---
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --- 1d. Create Dataset instances for training and validation ---
train_dataset = CancerDataset(df=train_df, data_path=DATA_PATH, transform=data_transforms)
val_dataset = CancerDataset(df=val_df, data_path=DATA_PATH, transform=data_transforms)


# ## 2. Create DataLoaders

# --- 2a. Define hyperparameters ---
BATCH_SIZE = 64 # Number of images to process in a batch

# --- 2b. Create DataLoader instances ---
# Create a DataLoader for the training set, with shuffling to randomize order each epoch
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
# Create a DataLoader for the validation set, no need to shuffle
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"\n✅ DataLoaders created successfully!")
print(f"Each batch will contain {BATCH_SIZE} images.")


# --- 2c. Test the DataLoader ---
print("\n--- Testing the DataLoader ---")
# A DataLoader is an iterator, we can get the first batch using next(iter(...))
images_batch, labels_batch = next(iter(train_loader))

print(f"Shape of the retrieved images batch: {images_batch.shape}")
print(f"Shape of the retrieved labels batch: {labels_batch.shape}")
print("(Image shape [64, 3, 96, 96] means [Batch Size, Channels, Height, Width])")

Original dataset size: 220025
Training set size: 176020
Validation set size: 44005

✅ DataLoaders created successfully!
Each batch will contain 64 images.

--- Testing the DataLoader ---
Shape of the retrieved images batch: torch.Size([64, 3, 96, 96])
Shape of the retrieved labels batch: torch.Size([64])
(Image shape [64, 3, 96, 96] means [Batch Size, Channels, Height, Width])


In [2]:
# ## 3. Model Definition

import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    """A simple CNN for histopathologic image classification."""
    
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Input: [Batch Size, 3, 96, 96]
        
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2) # Output: [Batch Size, 32, 48, 48]
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2) # Output: [Batch Size, 64, 24, 24]
        )

        self.conv_block3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2) # Output: [Batch Size, 128, 12, 12]
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 12 * 12, 256), # Input features: 128 channels * 12x12 image size
            nn.ReLU(),
            nn.Dropout(0.5), # Dropout helps prevent overfitting
            nn.Linear(256, 1) # Output a single logit for binary classification
        )

    def forward(self, x):
        """Defines the forward pass."""
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = self.classifier(x)
        return x

# Create an instance of the model to make sure it's defined correctly
model = SimpleCNN()
print("✅ SimpleCNN model created successfully!")
print(model)

✅ SimpleCNN model created successfully!
SimpleCNN(
  (conv_block1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1,

In [3]:
# ## 4. Define Loss Function and Optimizer

import torch
import torch.nn as nn
import torch.optim as optim

# --- 4a. Setup the device (GPU or CPU) ---
# This will check if a GPU is available and use it, otherwise it will use the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move the model to the selected device
model = SimpleCNN().to(device)


# --- 4b. Define the Loss Function ---
# For binary classification with a single logit output, BCEWithLogitsLoss is the best choice.
# It's numerically more stable than using a Sigmoid layer + BCELoss.
criterion = nn.BCEWithLogitsLoss()
print("\nLoss function defined: BCEWithLogitsLoss")


# --- 4c. Define the Optimizer ---
# Adam is a great all-purpose optimizer. It adapts the learning rate for each parameter.
# We pass the model's parameters to it so it knows what to update.
optimizer = optim.Adam(model.parameters(), lr=0.001) # lr is the learning rate
print("Optimizer defined: Adam with learning rate 0.001")


print("\n--- Notebook V2 Part 4 Complete ---")

Using device: cuda

Loss function defined: BCEWithLogitsLoss
Optimizer defined: Adam with learning rate 0.001

--- Notebook V2 Part 4 Complete ---


In [5]:
# ## 5. Training and Validation Loop

import torch
from tqdm.auto import tqdm # A library to create smart progress bars
import torch.nn.functional as F
import numpy as np

# --- 5a. Define the training function for one epoch ---

def train_one_epoch(model, train_loader, criterion, optimizer, device):
    """Trains the model for one epoch."""
    model.train() # Set the model to training mode
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    # Use tqdm for a nice progress bar
    progress_bar = tqdm(train_loader, desc="Training", leave=False)
    
    for images, labels in progress_bar:
        # Move data to the selected device (GPU/CPU)
        images = images.to(device)
        labels = labels.to(device).float().unsqueeze(1) # Ensure labels are float and have the right shape [batch_size, 1]
        
        # 1. Forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # 2. Backward pass and optimization
        optimizer.zero_grad() # Clear the gradients of all optimized variables
        loss.backward() # Backward pass: compute gradient of the loss with respect to model parameters
        optimizer.step() # Perform a single optimization step (parameter update)
        
        # --- Calculate metrics ---
        running_loss += loss.item() * images.size(0)
        
        # Convert outputs to probabilities and then to predicted classes (0 or 1)
        preds = torch.sigmoid(outputs) > 0.5
        correct_predictions += (preds == labels).sum().item()
        total_samples += labels.size(0)
        
        # Update progress bar
        progress_bar.set_postfix(loss=(running_loss / total_samples))

    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    
    return epoch_loss, epoch_acc


# --- 5b. Define the validation function ---

def validate(model, val_loader, criterion, device):
    """Evaluates the model on the validation set."""
    model.eval() # Set the model to evaluation mode
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    # No need to calculate gradients during validation
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc="Validation", leave=False)
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device).float().unsqueeze(1)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            preds = torch.sigmoid(outputs) > 0.5
            correct_predictions += (preds == labels).sum().item()
            total_samples += labels.size(0)

            progress_bar.set_postfix(loss=(running_loss / total_samples))

    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    
    return epoch_loss, epoch_acc


# --- 5c. The Main Training Loop ---

NUM_EPOCHS = 5 # Let's train for 5 epochs for a start

# Keep track of training and validation history
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("🚀 Starting training...")
for epoch in range(NUM_EPOCHS):
    
    # Train the model for one epoch
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate the model
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Print statistics for the current epoch
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
    print("-" * 30)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

print("✅ Training complete!")

  from .autonotebook import tqdm as notebook_tqdm


🚀 Starting training...


                                                                         


Epoch 1/5
  Train Loss: 0.3409 | Train Acc: 0.8648
  Val Loss:   0.2515 | Val Acc:   0.9005
------------------------------


                                                                         


Epoch 2/5
  Train Loss: 0.2348 | Train Acc: 0.9117
  Val Loss:   0.4418 | Val Acc:   0.8770
------------------------------


                                                                         


Epoch 3/5
  Train Loss: 0.1992 | Train Acc: 0.9253
  Val Loss:   0.1948 | Val Acc:   0.9312
------------------------------


                                                                         


Epoch 4/5
  Train Loss: 0.1759 | Train Acc: 0.9349
  Val Loss:   0.1873 | Val Acc:   0.9363
------------------------------


                                                                         


Epoch 5/5
  Train Loss: 0.1571 | Train Acc: 0.9424
  Val Loss:   0.1687 | Val Acc:   0.9397
------------------------------
✅ Training complete!




In [6]:
# ## 6. V2 Model: Transfer Learning with ResNet18

import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import numpy as np

# --- 6a. Load a pre-trained ResNet18 model ---
# We use the modern 'weights' argument to get the recommended ImageNet pre-trained weights
model_v2 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

print("✅ Pre-trained ResNet18 model loaded.")


# --- 6b. Freeze the convolutional base ---
# We freeze the weights of all layers in the pre-trained model.
# This means their weights will not be updated during training.
for param in model_v2.parameters():
    param.requires_grad = False
    
print("All layers of the pre-trained model have been frozen.")


# --- 6c. Replace the final layer (the "head") ---
# The original ResNet18 has a final fully connected layer ('fc') that outputs 1000 classes.
# We need to replace it with a new layer that outputs 1 logit for our binary task.

# Get the number of input features for the classifier
num_ftrs = model_v2.fc.in_features

# Replace the final layer
model_v2.fc = nn.Linear(num_ftrs, 1)

print("The final layer has been replaced with a new one for binary classification.")


# --- 6d. Move model to device and create a new optimizer ---
# We need to do this again for our new model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_v2.to(device)

# Create a new optimizer that will ONLY update the parameters of our new final layer
# All other layers are frozen, so we don't need to pass them to the optimizer
optimizer_v2 = optim.Adam(model_v2.fc.parameters(), lr=0.001)

print(f"\nNew model and optimizer are ready and moved to {device}.")


# --- 6e. Run the training and validation loop ---
# We reuse our existing train_one_epoch and validate functions
NUM_EPOCHS_V2 = 10 # Train for more epochs as requested
history_v2 = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print(f"\n🚀 Starting transfer learning training for {NUM_EPOCHS_V2} epochs...")
for epoch in range(NUM_EPOCHS_V2):
    
    # Train the model for one epoch
    train_loss, train_acc = train_one_epoch(model_v2, train_loader, criterion, optimizer_v2, device)
    
    # Validate the model
    val_loss, val_acc = validate(model_v2, val_loader, criterion, device)
    
    # Print statistics
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS_V2}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
    print("-" * 30)
    
    # Save history
    history_v2['train_loss'].append(train_loss)
    history_v2['train_acc'].append(train_acc)
    history_v2['val_loss'].append(val_loss)
    history_v2['val_acc'].append(val_acc)

print("✅ V2 model training complete!")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\luo/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100.0%


✅ Pre-trained ResNet18 model loaded.
All layers of the pre-trained model have been frozen.
The final layer has been replaced with a new one for binary classification.

New model and optimizer are ready and moved to cuda.

🚀 Starting transfer learning training for 10 epochs...


                                                                         


Epoch 1/10
  Train Loss: 0.4084 | Train Acc: 0.8209
  Val Loss:   0.3794 | Val Acc:   0.8358
------------------------------


                                                                         


Epoch 2/10
  Train Loss: 0.3951 | Train Acc: 0.8278
  Val Loss:   0.3798 | Val Acc:   0.8350
------------------------------


                                                                         


Epoch 3/10
  Train Loss: 0.3917 | Train Acc: 0.8295
  Val Loss:   0.3792 | Val Acc:   0.8351
------------------------------


                                                                         


Epoch 4/10
  Train Loss: 0.3906 | Train Acc: 0.8295
  Val Loss:   0.4035 | Val Acc:   0.8266
------------------------------


                                                                         


Epoch 5/10
  Train Loss: 0.3922 | Train Acc: 0.8282
  Val Loss:   0.3783 | Val Acc:   0.8369
------------------------------


                                                                         


Epoch 6/10
  Train Loss: 0.3895 | Train Acc: 0.8304
  Val Loss:   0.3728 | Val Acc:   0.8398
------------------------------


                                                                         


Epoch 7/10
  Train Loss: 0.3900 | Train Acc: 0.8301
  Val Loss:   0.3748 | Val Acc:   0.8383
------------------------------


                                                                         


Epoch 8/10
  Train Loss: 0.3903 | Train Acc: 0.8301
  Val Loss:   0.3732 | Val Acc:   0.8378
------------------------------


                                                                         


Epoch 9/10
  Train Loss: 0.3910 | Train Acc: 0.8294
  Val Loss:   0.3730 | Val Acc:   0.8382
------------------------------


                                                                         


Epoch 10/10
  Train Loss: 0.3897 | Train Acc: 0.8300
  Val Loss:   0.3707 | Val Acc:   0.8395
------------------------------
✅ V2 model training complete!




In [7]:
# ## 7. V3 Model: Full Fine-tuning with ResNet18

import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import numpy as np

# --- 7a. Load the pre-trained ResNet18 model again ---
model_v3 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# --- 7b. Replace the final layer (same as before) ---
num_ftrs = model_v3.fc.in_features
model_v3.fc = nn.Linear(num_ftrs, 1)

# --- 7c. 【KEY CHANGE】Unfreeze all layers ---
# We set requires_grad = True for all parameters to unfreeze them.
for param in model_v3.parameters():
    param.requires_grad = True

print("All layers of the model have been unfrozen.")

# --- 7d. Move model to device and create a new optimizer ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_v3.to(device)

# Create a new optimizer that will train ALL parameters, but with a smaller learning rate.
# A small learning rate is crucial for fine-tuning to avoid destroying the pre-trained weights.
optimizer_v3 = optim.Adam(model_v3.parameters(), lr=1e-4) # Using a much smaller learning rate: 0.0001

print(f"\nNew model for fine-tuning and new optimizer are ready.")


# --- 7e. Run the training and validation loop ---
NUM_EPOCHS_V3 = 10 
history_v3 = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print(f"\n🚀 Starting full fine-tuning for {NUM_EPOCHS_V3} epochs...")
# We can reuse our train_one_epoch and validate functions
for epoch in range(NUM_EPOCHS_V3):
    train_loss, train_acc = train_one_epoch(model_v3, train_loader, criterion, optimizer_v3, device)
    val_loss, val_acc = validate(model_v3, val_loader, criterion, device)
    
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS_V3}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
    print("-" * 30)
    
    history_v3['train_loss'].append(train_loss)
    # ... (saving history)

print("✅ V3 model fine-tuning complete!")

All layers of the model have been unfrozen.

New model for fine-tuning and new optimizer are ready.

🚀 Starting full fine-tuning for 10 epochs...


                                                                         


Epoch 1/10
  Train Loss: 0.1860 | Train Acc: 0.9292
  Val Loss:   0.1240 | Val Acc:   0.9550
------------------------------


                                                                         


Epoch 2/10
  Train Loss: 0.1033 | Train Acc: 0.9631
  Val Loss:   0.1043 | Val Acc:   0.9625
------------------------------


                                                                          


Epoch 3/10
  Train Loss: 0.0660 | Train Acc: 0.9767
  Val Loss:   0.1148 | Val Acc:   0.9598
------------------------------


                                                                          


Epoch 4/10
  Train Loss: 0.0433 | Train Acc: 0.9845
  Val Loss:   0.1404 | Val Acc:   0.9582
------------------------------


                                                                          


Epoch 5/10
  Train Loss: 0.0323 | Train Acc: 0.9886
  Val Loss:   0.1448 | Val Acc:   0.9574
------------------------------


                                                                          


Epoch 6/10
  Train Loss: 0.0267 | Train Acc: 0.9905
  Val Loss:   0.1245 | Val Acc:   0.9647
------------------------------


                                                                          


Epoch 7/10
  Train Loss: 0.0224 | Train Acc: 0.9924
  Val Loss:   0.1256 | Val Acc:   0.9660
------------------------------


                                                                          


Epoch 8/10
  Train Loss: 0.0191 | Train Acc: 0.9933
  Val Loss:   0.1492 | Val Acc:   0.9659
------------------------------


                                                                          


Epoch 9/10
  Train Loss: 0.0169 | Train Acc: 0.9940
  Val Loss:   0.1432 | Val Acc:   0.9663
------------------------------


                                                                          


Epoch 10/10
  Train Loss: 0.0163 | Train Acc: 0.9942
  Val Loss:   0.1444 | Val Acc:   0.9660
------------------------------
✅ V3 model fine-tuning complete!


