In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/isic-2017/ISIC-2017_Training_Part3_GroundTruth.csv
/kaggle/input/isic-2017/ISIC-2017_Validation_Part3_GroundTruth.csv
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part3_GroundTruth.csv
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC_0015129_segmentation.png
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC_0014743_segmentation.png
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC_0014233_segmentation.png
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC_0016001_segmentation.png
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC_0014647_segmentation.png
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC-2017_Test_v2_Part1_GroundTruth/ISIC_0013948_segmentation.png
/kaggle/input/isic-2017/ISIC-2017_Test_v2_Part1_

In [3]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

from sklearn.metrics import classification_report, confusion_matrix

# Basic config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)


Device: cpu


In [4]:
# Paths
train_img_dir = '/kaggle/input/isic-2017/ISIC-2017_Training_Data/ISIC-2017_Training_Data'
train_csv = '/kaggle/input/isic-2017/ISIC-2017_Training_Part3_GroundTruth.csv'

val_img_dir = '/kaggle/input/isic-2017/ISIC-2017_Validation_Data/ISIC-2017_Validation_Data'
val_csv = '/kaggle/input/isic-2017/ISIC-2017_Validation_Part3_GroundTruth.csv'

# Hyperparameters
IMG_SIZE = 224
BATCH_SIZE = 16
NUM_EPOCHS = 3
LR = 1e-4
NUM_CLASSES = 3


In [5]:
# Check a few entries in the CSV
train_df = pd.read_csv(train_csv)
val_df = pd.read_csv(val_csv)

print("Train CSV head:")
print(train_df.head())

print("\nValidation CSV head:")
print(val_df.head())

# Check how many jpg files actually exist
train_jpg_count = 0
for img_id in train_df['image_id']:
    if os.path.exists(os.path.join(train_img_dir, img_id + '.jpg')):
        train_jpg_count += 1

val_jpg_count = 0
for img_id in val_df['image_id']:
    if os.path.exists(os.path.join(val_img_dir, img_id + '.jpg')):
        val_jpg_count += 1

print(f"\nTotal Train rows in CSV: {len(train_df)}, JPG found: {train_jpg_count}")
print(f"Total Val rows in CSV: {len(val_df)}, JPG found: {val_jpg_count}")


Train CSV head:
       image_id  melanoma  seborrheic_keratosis
0  ISIC_0000000       0.0                   0.0
1  ISIC_0000001       0.0                   0.0
2  ISIC_0000002       1.0                   0.0
3  ISIC_0000003       0.0                   0.0
4  ISIC_0000004       1.0                   0.0

Validation CSV head:
       image_id  melanoma  seborrheic_keratosis
0  ISIC_0001769       0.0                   0.0
1  ISIC_0001852       0.0                   0.0
2  ISIC_0001871       0.0                   0.0
3  ISIC_0003462       0.0                   0.0
4  ISIC_0003539       0.0                   0.0

Total Train rows in CSV: 2000, JPG found: 2000
Total Val rows in CSV: 150, JPG found: 150


In [6]:
class ISICDataset(Dataset):
    def __init__(self, img_dir, csv_file, transform=None):
        self.img_dir = img_dir
        self.df = pd.read_csv(csv_file)
        self.transform = transform

        # Keep only jpg images
        self.df = self.df[self.df['image_id'].apply(lambda x: os.path.exists(
            os.path.join(self.img_dir, x + '.jpg')))].reset_index(drop=True)

        # Make single label column: 0 = nevus, 1 = melanoma, 2 = seborrheic keratosis
        self.df['label'] = self.df[['melanoma', 'seborrheic_keratosis']].idxmax(axis=1)
        self.df['label'] = self.df['label'].map({
            'melanoma': 1,
            'seborrheic_keratosis': 2
        }).fillna(0).astype(int)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_id + '.jpg')
        image = Image.open(img_path).convert('RGB')
        label = self.df.iloc[idx]['label']

        if self.transform:
            image = self.transform(image)

        return image, label


In [7]:
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = ISICDataset(train_img_dir, train_csv, transform=train_transform)
val_dataset = ISICDataset(val_img_dir, val_csv, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}")


Train: 2000, Val: 150


In [8]:
model = models.resnet50(pretrained=True)

# Replace FC layer for 3 classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES)

model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 153MB/s]


In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)


In [10]:
best_acc = 0

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0
    correct = 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels)

    train_loss /= len(train_loader.dataset)
    train_acc = correct.double() / len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss = 0
    correct = 0

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)

            outputs = model(imgs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * imgs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels)

    val_loss /= len(val_loader.dataset)
    val_acc = correct.double() / len(val_loader.dataset)

    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        print("✅ Saved Best Model")


Epoch 1/3: 100%|██████████| 125/125 [14:25<00:00,  6.93s/it]


Train Loss: 0.3241, Train Acc: 0.8675 | Val Loss: 0.3204, Val Acc: 0.8400
✅ Saved Best Model


Epoch 2/3: 100%|██████████| 125/125 [14:08<00:00,  6.79s/it]


Train Loss: 0.2141, Train Acc: 0.9135 | Val Loss: 0.3055, Val Acc: 0.8667
✅ Saved Best Model


Epoch 3/3: 100%|██████████| 125/125 [14:15<00:00,  6.84s/it]


Train Loss: 0.1801, Train Acc: 0.9275 | Val Loss: 0.2938, Val Acc: 0.8800
✅ Saved Best Model
