In [None]:
# Mount Google Drive
from google.colab import drive
import zipfile
import os
drive.mount('/content/drive')

In [None]:
# Unzip dataset
!unzip -q /content/drive/MyDrive/HAM10000.zip -d /content/drive/MyDrive/

In [None]:
# Dataset paths
base_path = "/content/drive/MyDrive/HAM10000"
csv_path = f"{base_path}/HAM10000_metadata.csv"
img_folder1 = f"{base_path}/HAM10000_images_part_1"
img_folder2 = f"{base_path}/HAM10000_images_part_2"

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms, models
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load and preprocess metadata
df = pd.read_csv(csv_path)

def get_image_path(img_id):
    p1 = os.path.join(img_folder1, f"{img_id}.jpg")
    p2 = os.path.join(img_folder2, f"{img_id}.jpg")
    return p1 if os.path.exists(p1) else p2

df['path'] = df['image_id'].apply(get_image_path)

In [None]:
# Label encoding
le = LabelEncoder()
df['label'] = le.fit_transform(df['dx'])
class_names = le.classes_
print("Classes:", class_names)

In [None]:
# Train-test split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

In [None]:
# Transforms
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
# Dataset class
class SkinDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = Image.open(self.df.loc[idx, 'path']).convert("RGB")
        label = self.df.loc[idx, 'label']
        if self.transform:
            image = self.transform(image)
        return image, label

train_ds = SkinDataset(train_df, transform=transform)
val_ds = SkinDataset(val_df, transform=transform)

In [None]:
# Sampler and dataloaders
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_counts = train_df['label'].value_counts().sort_index()
weights = 1. / class_counts
class_weights = torch.FloatTensor(weights.values).to(device)

sample_weights = train_df['label'].map(weights).values
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(train_ds, batch_size=32, sampler=sampler, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)

In [None]:
# Model setup
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model = model.to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

In [None]:
# Evaluation
model.eval()
correct = 0
total = 0
all_labels = []
all_preds = []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
print(f"Validation Accuracy: {100 * correct / total:.2f}%")

In [None]:
# Confusion matrix
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()