# Skin dermatology

- dataset: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000?resource=download

In [8]:
%pip install --upgrade pip setuptools wheel
# PyTorch (CPU); if you already have torch with CUDA, you can skip this CPU line
%pip install torch torchvision torchaudio --upgrade
# Vision + metrics deps
%pip install timm scikit-learn matplotlib pandas opencv-python-headless tqdm seaborn
# Try pytorch-grad-cam first; if not available for your Python, we'll fallback to torchcam
try:
    %pip install pytorch-grad-cam
    print("Installed pytorch-grad-cam.")
except Exception as e:
    print("Could not install pytorch-grad-cam:", e)
    %pip install torchcam
    print("Installed torchcam as fallback.")


Collecting setuptools
  Using cached setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Collecting wheel
  Using cached wheel-0.45.1-py3-none-any.whl.metadata (2.3 kB)
Using cached setuptools-80.9.0-py3-none-any.whl (1.2 MB)
Using cached wheel-0.45.1-py3-none-any.whl (72 kB)
Installing collected packages: wheel, setuptools

  Attempting uninstall: wheel

    Found existing installation: wheel 0.41.2

    Uninstalling wheel-0.41.2:

      Successfully uninstalled wheel-0.41.2

   ---------------------------------------- 0/2 [wheel]
   ---------------------------------------- 0/2 [wheel]
  Attempting uninstall: setuptools
   ---------------------------------------- 0/2 [wheel]
   -------------------- ------------------- 1/2 [setuptools]
    Found existing installation: setuptools 68.2.2
   -------------------- ------------------- 1/2 [setuptools]
    Uninstalling setuptools-68.2.2:
   -------------------- ------------------- 1/2 [setuptools]
   -------------------- ------------------- 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
conda-repo-cli 1.0.75 requires requests_mock, which is not installed.
conda-repo-cli 1.0.75 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.


Collecting torch
  Downloading torch-2.9.0-cp311-cp311-win_amd64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.0-cp311-cp311-win_amd64.whl.metadata (5.9 kB)
Collecting torchaudio
  Downloading torchaudio-2.9.0-cp311-cp311-win_amd64.whl.metadata (6.9 kB)
Downloading torch-2.9.0-cp311-cp311-win_amd64.whl (109.3 MB)
   ---------------------------------------- 0.0/109.3 MB ? eta -:--:--
   - -------------------------------------- 3.4/109.3 MB 18.4 MB/s eta 0:00:06
   -- ------------------------------------- 6.8/109.3 MB 20.0 MB/s eta 0:00:06
   --- ------------------------------------ 9.2/109.3 MB 14.6 MB/s eta 0:00:07
   --- ------------------------------------ 10.2/109.3 MB 12.0 MB/s eta 0:00:09
   --- ------------------------------------ 10.7/109.3 MB 10.7 MB/s eta 0:00:10
   ---- ----------------------------------- 11.5/109.3 MB 9.0 MB/s eta 0:00:11
   ---- ----------------------------------- 12.6/109.3 MB 8.5 MB/s eta 0:00:12
   ---- ----------------------

  You can safely remove it manually.
  You can safely remove it manually.


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
INFO: pip is looking at multiple versions of opencv-python-headless to determine which version is compatible with other requirements. This could take a while.
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl (39.4 MB)
   ---------------------------------------- 0.0/39.4 MB ? eta -:--:--
   - -------------------------------------- 1.8/39.4 MB 8.4 MB/s eta 0:00:05
   -- ------------------------------------- 2.1/39.4 MB 6.5 MB/s eta 0:00:06
   -- ------------------------------------- 2.6/39.4 MB 4.0 MB/s eta 0:00:10
   --- ------------------------------------ 3.9/39.4 MB 4.4 MB/s eta 0:00:09
   ---- ----------------------------------- 4.7/39.4 MB 4.3 MB/s eta 0:00:09
   ------ --------------------------------- 6.3/39.4 MB 4.9 MB/s eta 0:00:07
   -------- --------

ERROR: Could not find a version that satisfies the requirement pytorch-grad-cam (from versions: none)
ERROR: No matching distribution found for pytorch-grad-cam


In [9]:
import os, glob, random, shutil, pathlib
from collections import Counter
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch, torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms
import timm

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Try Grad-CAM; fall back to torchcam if needed
HAS_PTCAM = False
HAS_TORCHCAM = False
try:
    from pytorch_grad_cam import GradCAM
    from pytorch_grad_cam.utils.image import show_cam_on_image
    HAS_PTCAM = True
    print("Using pytorch-grad-cam.")
except Exception as e:
    try:
        from torchcam.methods import GradCAM as TorchGradCAM
        HAS_TORCHCAM = True
        print("Using torchcam fallback.")
    except Exception as ee:
        print("No CAM library available; CAM cell will be skipped.")

# Reproducibility & training config
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

IMG_SIZE    = 224
BATCH_SIZE  = 32
EPOCHS      = 20
LR          = 3e-4
CONF_THRESH = 0.6
DEVICE      = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)


No CAM library available; CAM cell will be skipped.
Device: cpu


In [None]:
# Example: r"C:\Users\Neha - Personal\Desktop\skin-dermotology\dataset"
LOCAL_ROOT = r"C:\Users\Nitro\OneDrive\Desktop\skin-dermotology\dataset"

META_CSV = os.path.join(LOCAL_ROOT, "HAM10000_metadata.csv")

def find_dir_case_insensitive(root, name):
    for d in os.listdir(root):
        if d.lower() == name.lower() and os.path.isdir(os.path.join(root, d)):
            return os.path.join(root, d)
    return None

IMG_DIRS = [
    find_dir_case_insensitive(LOCAL_ROOT, "HAM10000_images_part_1"),
    find_dir_case_insensitive(LOCAL_ROOT, "HAM10000_images_part_2"),
]

assert os.path.exists(META_CSV), f"Missing: {META_CSV}"
for d in IMG_DIRS:
    assert d and os.path.isdir(d), f"Missing images folder: {d}"

print("Meta:", META_CSV)
print("Part1:", IMG_DIRS[0])
print("Part2:", IMG_DIRS[1])


Meta: C:\Users\Nitro\OneDrive\Desktop\skin-dermotology\dataset\HAM10000_metadata.csv
Part1: C:\Users\Nitro\OneDrive\Desktop\skin-dermotology\dataset\HAM10000_images_part_1
Part2: C:\Users\Nitro\OneDrive\Desktop\skin-dermotology\dataset\HAM10000_images_part_2


In [12]:
OUT_SPLIT = "data/split"   # will be created
CLASSES = ["akiec","bcc","bkl","df","mel","nv","vasc"]

# create folder tree
for split in ["train","val","test"]:
    for c in CLASSES:
        pathlib.Path(os.path.join(OUT_SPLIT, split, c)).mkdir(parents=True, exist_ok=True)

meta = pd.read_csv(META_CSV)
meta = meta[meta["dx"].isin(CLASSES)].copy()

def path_for(img_id: str):
    name = img_id + ".jpg"
    for d in IMG_DIRS:
        p = os.path.join(d, name)
        if os.path.exists(p): return p
    return None

meta["img_path"] = meta["image_id"].apply(path_for)
missing = int(meta["img_path"].isna().sum())
if missing:
    print(f"Warning: {missing} missing images; dropping.")
    meta = meta.dropna(subset=["img_path"])

# patient/lesion-wise split
lesions = meta["lesion_id"].dropna().unique().tolist()
random.shuffle(lesions)
n = len(lesions)
train_ids = set(lesions[:int(0.70*n)])
val_ids   = set(lesions[int(0.70*n):int(0.85*n)])
test_ids  = set(lesions[int(0.85*n):])

def which_split(lid):
    return "train" if lid in train_ids else ("val" if lid in val_ids else "test")

meta["split"] = meta["lesion_id"].apply(which_split)

# copy files
for _, r in tqdm(meta.iterrows(), total=len(meta), desc="Copying to data/split"):
    dst = os.path.join(OUT_SPLIT, r["split"], r["dx"], os.path.basename(r["img_path"]))
    if not os.path.exists(dst):
        shutil.copy2(r["img_path"], dst)

print(meta["split"].value_counts())
print("Class counts:\n", meta["dx"].value_counts())


Copying to data/split: 100%|██████████| 10015/10015 [46:21<00:00,  3.60it/s]   

split
train    7018
val      1512
test     1485
Name: count, dtype: int64
Class counts:
 dx
nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: count, dtype: int64



