In [1]:
#import libraries
import os
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms, models
from PIL import Image

In [49]:
#load csv file & define image directory
data = pd.read_csv('balanced_dataset.csv')
image_dir = r'D:\Self Study\3 Cancer Image Classification\train_images'

In [50]:
class CancerImageDataset(Dataset):
    def __init__(self, csv_data, image_dir, transform=None):
        self.csv_data = csv_data
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.csv_data)

    def __getitem__(self, idx):
        # get the image name and label
        image_id = self.csv_data.iloc[idx]['isic_id']
        label = self.csv_data.iloc[idx]['target']
        
        # load the image
        image_path = os.path.join(self.image_dir, f"{image_id}.jpg")
        image = Image.open(image_path).convert("RGB")
        
        # apply transformations
        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(label, dtype=torch.float32)


In [51]:
# Define transformations (resize, normalize, and augment if needed)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.ToTensor(),         # Convert to tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

In [52]:
# Create the dataset
dataset = CancerImageDataset(data, image_dir, transform=transform)

# Split into training and validation datasets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])


In [None]:
# Create DataLoaders
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [54]:
# Define the device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [59]:
# Define the model
class MelanomaModel(nn.Module):
    def __init__(self):
        super(MelanomaModel, self).__init__()
        self.base_model = models.efficientnet_b3(pretrained=True)
        self.base_model.classifier = nn.Sequential(
            nn.Linear(self.base_model.classifier[1].in_features, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.base_model(x)

model = MelanomaModel().to(DEVICE)

In [60]:
# Loss and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [61]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs):
    for epoch in range(epochs):
        print(f"\nStarting epoch {epoch+1}/{epochs}")
        model.train()
        train_loss = 0.0
        
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(DEVICE), labels.to(DEVICE).float()

            # Forward pass
            optimizer.zero_grad()
            outputs = model(images).squeeze()

            # Loss computation
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Validation loop
        val_loss = 0.0
        model.eval()
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(DEVICE), labels.to(DEVICE).float()
                outputs = model(images).squeeze()
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")


In [None]:
# Train the model
EPOCHS = 4
train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS)


Starting epoch 1/4
Epoch 1/4, Train Loss: 0.6783, Val Loss: 0.6889

Starting epoch 2/4
Epoch 2/4, Train Loss: 0.6234, Val Loss: 0.6198

Starting epoch 3/4
Epoch 3/4, Train Loss: 0.5644, Val Loss: 0.5409

Starting epoch 4/4
Epoch 4/4, Train Loss: 0.4603, Val Loss: 0.4745


In [63]:
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to evaluate the model on 10 random images
def evaluate_random_images(model, dataset, num_images=10):
    # Set the model to evaluation mode
    model.eval()
    
    # Randomly select 10 indices from the dataset
    random_indices = random.sample(range(len(dataset)), num_images)
    
    # Initialize lists to store predictions and true labels
    true_labels = []
    predictions = []
    
    with torch.no_grad():
        for idx in random_indices:
            # Get the image and label
            image, label = dataset[idx]
            
            # Add batch dimension and move to DEVICE
            image = image.unsqueeze(0).to(DEVICE)
            label = label.to(DEVICE).float()
            
            # Forward pass
            output = model(image).squeeze()
            predicted_label = (output > 0.5).item()  # Convert sigmoid output to binary prediction
            
            # Store the true label and prediction
            true_labels.append(label.item())
            predictions.append(predicted_label)
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, zero_division=1)
    recall = recall_score(true_labels, predictions, zero_division=1)
    f1 = f1_score(true_labels, predictions, zero_division=1)
    
    # Print the results
    print(f"Random Evaluation on {num_images} Images:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"True Labels: {true_labels}")
    print(f"Predictions: {predictions}")

# Evaluate the model on 10 random images
evaluate_random_images(model, dataset, num_images=20)

Random Evaluation on 20 Images:
Accuracy: 0.9500
Precision: 0.9167
Recall: 1.0000
F1 Score: 0.9565
True Labels: [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0]
Predictions: [True, True, False, True, False, True, True, False, False, False, False, True, True, True, False, False, True, True, True, True]


In [66]:
# Save the model
model_path = "models/melanoma_model.pt"
torch.save(model.state_dict(), model_path)  
