In [5]:
import os
import shutil
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm
from sklearn.utils.class_weight import compute_class_weight
import opendatasets as od

# ==========================================
# 1. CONFIGURATION (RTX 3050 Optimized)
# ==========================================
class Config:
    # Hardware
    DEVICE = 'cuda'  # Force dGPU usage
    if not torch.cuda.is_available():
        raise RuntimeError("CUDA GPU not available. Install NVIDIA drivers and a CUDA-enabled PyTorch build.")
    NUM_WORKERS = 0  # Set to 0 for Windows/VS Code to avoid multiprocessing errors
    PIN_MEMORY = True
    
    # Training Hyperparameters
    BATCH_SIZE = 16        # Conservative for 4GB/6GB VRAM
    EPOCHS = 15            # Increased for "Finest Accuracy"
    LEARNING_RATE = 1e-4   # Lower LR for better convergence
    WEIGHT_DECAY = 1e-4    # Regularization
    
    # Paths
    BASE_DIR = os.getcwd()
    DATA_DIR = os.path.join(BASE_DIR, 'data')
    HAM_DIR = os.path.join(DATA_DIR, 'HAM10000')
    PAD_DIR = os.path.join(DATA_DIR, 'PAD_UFES')
    MODEL_SAVE_PATH = os.path.join(BASE_DIR, 'best_model.pth')

cfg = Config()
print(f"üöÄ Device: {cfg.DEVICE} ({torch.cuda.get_device_name(0)})")

# ==========================================
# 2. DATA DOWNLOAD & SETUP
# ==========================================
def setup_datasets():
    print("\n[1/4] Setting up Datasets...")
    
    # Check for kaggle.json
    if not os.path.exists(os.path.join(cfg.BASE_DIR, '../../kaggle.json')):
        print("‚ö†Ô∏è  WARNING: 'kaggle.json' not found in current directory.")
        print("   If the download fails, please create it with your API key.")

    # --- DOWNLOAD HAM10000 ---
    if not os.path.exists(cfg.HAM_DIR):
        print("‚¨áÔ∏è  Downloading HAM10000...")
        od.download('https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000', data_dir=cfg.DATA_DIR)
        
        # Organize HAM10000
        print("üì¶ Organizing HAM10000...")
        os.makedirs(os.path.join(cfg.HAM_DIR, 'images'), exist_ok=True)
        source = os.path.join(cfg.DATA_DIR, 'skin-cancer-mnist-ham10000')
        
        # Merge parts
        for part in ['HAM10000_images_part_1', 'HAM10000_images_part_2']:
            part_path = os.path.join(source, part)
            if os.path.exists(part_path):
                for f in os.listdir(part_path):
                    shutil.move(os.path.join(part_path, f), os.path.join(cfg.HAM_DIR, 'images'))
        
        shutil.move(os.path.join(source, 'HAM10000_metadata.csv'), os.path.join(cfg.HAM_DIR, 'metadata.csv'))
        # Cleanup
        # shutil.rmtree(source) # Optional: Keep raw if needed
    else:
        print("‚úÖ HAM10000 already ready.")

    # --- DOWNLOAD PAD-UFES-20 ---
    if not os.path.exists(cfg.PAD_DIR):
        print("‚¨áÔ∏è  Downloading PAD-UFES-20...")
        od.download('https://www.kaggle.com/datasets/mahdavi1202/skin-cancer', data_dir=cfg.DATA_DIR)
        
        # Rename folder for consistency
        src = os.path.join(cfg.DATA_DIR, 'skin-cancer')
        if os.path.exists(src):
            os.rename(src, cfg.PAD_DIR)
    else:
        print("‚úÖ PAD-UFES-20 already ready.")

# ==========================================
# 3. DATASET & AUGMENTATION
# ==========================================
class HAMDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.img_dir = os.path.join(root_dir, 'images')
        self.df = pd.read_csv(os.path.join(root_dir, 'metadata.csv'))
        self.transform = transform
        
        # Encode Labels
        self.classes = sorted(self.df['dx'].unique())
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['image_id'] + '.jpg')
        image = Image.open(img_path).convert('RGB')
        label = self.class_to_idx[row['dx']]
        
        if self.transform:
            image = self.transform(image)
        return image, label

def get_transforms():
    # Advanced Augmentation for "Finest Accuracy"
    train_transform = transforms.Compose([
        transforms.Resize((240, 240)), # Slightly larger for EfficientNet
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(20),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    return train_transform

# ==========================================
# 4. TRAINING ENGINE
# ==========================================
def train_model():
    print("\n[2/4] Initializing Training...")
    
    # 1. Prepare Data
    dataset = HAMDataset(cfg.HAM_DIR, transform=get_transforms())
    loader = DataLoader(dataset, batch_size=cfg.BATCH_SIZE, shuffle=True, 
                        num_workers=cfg.NUM_WORKERS, pin_memory=cfg.PIN_MEMORY)
    
    # 2. Calculate Class Weights (Crucial for Imbalanced Medical Data)
    print("‚öñÔ∏è  Calculating Class Weights to handle imbalance...")
    labels = [dataset.class_to_idx[x] for x in dataset.df['dx']]
    class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(cfg.DEVICE)
    print(f"   Weights: {class_weights.cpu().numpy()}")

    # 3. Setup Model (EfficientNet-B1 is slightly better than B0, still fits 3050)
    print("üß† Loading EfficientNet-B0...")
    model = models.efficientnet_b0(weights='DEFAULT')
    
    # Replace Head
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, 7) # 7 Classes
    model = model.to(cfg.DEVICE)
    
    # 4. Optimization
    optimizer = optim.AdamW(model.parameters(), lr=cfg.LEARNING_RATE, weight_decay=cfg.WEIGHT_DECAY)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)

    # 5. Training Loop
    print(f"\n[3/4] Starting Training for {cfg.EPOCHS} Epochs...")
    best_loss = float('inf')
    
    for epoch in range(cfg.EPOCHS):
        model.train()
        loop = tqdm(loader, desc=f"Epoch {epoch+1}/{cfg.EPOCHS}")
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in loop:
            images, labels = images.to(cfg.DEVICE), labels.to(cfg.DEVICE)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            loop.set_postfix(loss=loss.item(), acc=100 * correct / total)
        
        epoch_loss = running_loss / len(loader)
        epoch_acc = 100 * correct / total
        
        # Step Scheduler
        scheduler.step(epoch_loss)
        
        # Save Best Model
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), cfg.MODEL_SAVE_PATH)
            tqdm.write(f"   ‚≠ê New Best Model Saved! (Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%)")

    print(f"\n[4/4] Setup & Pretraining Complete.")
    print(f"‚úÖ Best weights saved to: {cfg.MODEL_SAVE_PATH}")
    print("   You are now ready to Fine-Tune on PAD-UFES-20.")

if __name__ == "__main__":
    setup_datasets()
    train_model()

RuntimeError: CUDA GPU not available. Install NVIDIA drivers and a CUDA-enabled PyTorch build.

In [6]:
import torch
print(torch.cuda.is_available())  # True if ready
print(torch.cuda.get_device_name(0))


False


AssertionError: Torch not compiled with CUDA enabled

In [7]:
!pip show torch

Name: torch
Version: 2.9.1+cpu
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org
Author: 
Author-email: PyTorch Team <packages@pytorch.org>
License: BSD-3-Clause
Location: C:\Users\Lenovo\AppData\Local\Programs\Python\Python312\Lib\site-packages
Requires: filelock, fsspec, jinja2, networkx, setuptools, sympy, typing-extensions
Required-by: sentence-transformers, torchaudio, torchvision
