loading data from kaggle

In [None]:
# Install the Kaggle library
!pip install kaggle
!pip install timm
# Move the kaggle.json file to the correct location
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# Set permissions for the file
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets list -s "Skin Cancer MNIST: HAM10000"
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000
import zipfile

# Unzip the dataset
with zipfile.ZipFile("skin-cancer-mnist-ham10000.zip", "r") as zip_ref:
    zip_ref.extractall("data")


loading data 

In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Consolidate images into a single directory
images_dir = "./data/HAM10000_images_all"
os.makedirs(images_dir, exist_ok=True)

for part in ["/content/data/HAM10000_images_part_1", "/content/data/HAM10000_images_part_2"]:
    for file_name in os.listdir(part):
        shutil.move(os.path.join(part, file_name), images_dir)

# Load metadata
metadata_path = "./data/HAM10000_metadata.csv"
metadata = pd.read_csv(metadata_path)

# Add image paths and handle missing data
metadata["image_path"] = metadata["image_id"].apply(lambda x: os.path.join(images_dir, f"{x}.jpg"))
metadata["age"].fillna(metadata["age"].median(), inplace=True)
metadata["sex"].fillna("unknown", inplace=True)

# Encode labels
label_encoder = LabelEncoder()
metadata["label"] = label_encoder.fit_transform(metadata["dx"])

# Split into training and validation sets
train_df, val_df = train_test_split(metadata, test_size=0.2, stratify=metadata["label"], random_state=42)

print(f"Train Samples: {len(train_df)}, Validation Samples: {len(val_df)}")


In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# --------------------------
# 📂 Step 1: Consolidate Images into a Single Directory
# --------------------------

# Paths to image parts and the consolidated directory
part_dirs = ["./data/HAM10000_images_part_1", "./data/HAM10000_images_part_2"]
images_dir = "./data/HAM10000_images_all"
os.makedirs(images_dir, exist_ok=True)

# Move images from both parts into one directory
for part in part_dirs:
    for file_name in os.listdir(part):
        src = os.path.join(part, file_name)
        dest = os.path.join(images_dir, file_name)
        if os.path.isfile(src):
            shutil.move(src, dest)

print("✅ Images consolidated into one directory.")

# --------------------------
# 📊 Step 2: Load and Process Metadata
# --------------------------

# Path to metadata CSV
metadata_path = "./data/HAM10000_metadata.csv"
metadata = pd.read_csv(metadata_path)

# Add image paths to metadata
metadata["image_path"] = metadata["image_id"].apply(lambda x: os.path.join(images_dir, f"{x}.jpg"))

# Handle missing data
metadata["age"].fillna(metadata["age"].median(), inplace=True)
metadata["sex"].fillna("unknown", inplace=True)

# Encode diagnosis labels
label_encoder = LabelEncoder()
metadata["label"] = label_encoder.fit_transform(metadata["dx"])

print("✅ Metadata loaded and processed.")

# --------------------------
# 📚 Step 3: Split Dataset into Training and Validation
# --------------------------

# Stratified train-validation split (80% train, 20% validation)
train_df, val_df = train_test_split(
    metadata,
    test_size=0.2,
    stratify=metadata["label"],
    random_state=42
)

# Save splits to CSV files for easy loading
train_df.to_csv("./data/HAM10000_train.csv", index=False)
val_df.to_csv("./data/HAM10000_val.csv", index=False)

print(f"✅ Train Samples: {len(train_df)}, Validation Samples: {len(val_df)}")
print("✅ Train and Validation CSV files saved.")


training

In [None]:
import torch
import torch.optim as optim
import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp
import timm
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
import pandas as pd
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F
import torch.nn as nn

# --------------------------
# 🛠️ HybridMHSA Definition
# --------------------------
class HybridMHSA(nn.Module):
    def __init__(self, dim, num_heads, dropout=0.1, pretrained_mhsa=None):
        super(HybridMHSA, self).__init__()
        self.mhsa = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads, dropout=dropout)
        self.conv = nn.Conv1d(dim, dim, kernel_size=3, padding=1, groups=num_heads)
        self.norm = nn.LayerNorm(dim)
        self.dropout = nn.Dropout(dropout)

        if pretrained_mhsa:
            with torch.no_grad():
                self.mhsa.in_proj_weight.copy_(pretrained_mhsa.in_proj_weight)
                self.mhsa.in_proj_bias.copy_(pretrained_mhsa.in_proj_bias)
                self.mhsa.out_proj.weight.copy_(pretrained_mhsa.out_proj.weight)
                self.mhsa.out_proj.bias.copy_(pretrained_mhsa.out_proj.bias)

    def forward(self, x):
        attn_output, _ = self.mhsa(x, x, x)
        attn_output = self.dropout(attn_output)

        conv_output = self.conv(x.permute(1, 2, 0))
        conv_output = conv_output.permute(2, 0, 1)

        x = attn_output + conv_output
        x = self.norm(x)
        return x


# --------------------------
# 🖥️ Modified ViT Model
# --------------------------
class ModifiedViT(nn.Module):
    def __init__(self, pretrained_model_name, num_classes, dropout=0.1):
        super(ModifiedViT, self).__init__()
        self.base_model = timm.create_model(pretrained_model_name, pretrained=True, drop_path_rate=dropout)
        self.num_classes = num_classes

        # Replace MHSA with HybridMHSA
        for name, module in self.base_model.named_modules():
            if isinstance(module, nn.MultiheadAttention):
                setattr(
                    self.base_model,
                    name,
                    HybridMHSA(
                        dim=module.embed_dim,
                        num_heads=module.num_heads,
                        dropout=dropout,
                        pretrained_mhsa=module
                    )
                )

        # Update classification head
        self.base_model.head = nn.Linear(self.base_model.head.in_features, num_classes)

    def forward(self, x):
        return self.base_model(x)


# --------------------------
# 📊 TPU Training Loop with Early Stopping
# --------------------------

def get_transform(epoch):
    # Define different transformations for different epochs
    if epoch%2==0:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(30),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
        ])
    elif epoch%3==0 and epoch%5==0:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomVerticalFlip(),
            transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.8, 1.2)),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
        ])
    else:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(30),
            transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.8, 1.2)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
        ])


def get_batch_size(epoch):
    # Example: Reduce batch size after every 100 epochs
    if epoch%2==0:
        return 32
    elif epoch%3==0 and epoch%5==0:
        return 16
    elif epoch%5==0:
        return 8
    else:
        return 64


def freeze_layers(model):
    # Freeze some layers based on the naming pattern or your preference
    for name, param in model.named_parameters():
        if 'mhsa' in name:  # Example: freeze MHSA layers
            param.requires_grad = False
    xm.master_print("Layers frozen.")


def unfreeze_layers(model):
    # Unfreeze layers that were previously frozen
    for name, param in model.named_parameters():
        if 'mhsa' in name:  # Example: unfreeze MHSA layers
            param.requires_grad = True
    xm.master_print("Layers unfrozen.")


def train_tpu(rank, world_size, num_epochs, train_loader, val_loader, checkpoint=None):
    device = xm.xla_device()
    model = ModifiedViT('vit_base_patch16_224_in21k', num_classes=7, dropout=0.1).to(device)

    # Optimizer setup
    optimizer = optim.AdamW([
        {'params': [p for n, p in model.named_parameters() if 'mhsa' in n], 'lr': 1e-4},
        {'params': [p for n, p in model.named_parameters() if 'head' in n], 'lr': 1e-4},
        {'params': [p for n, p in model.named_parameters() if 'mhsa' not in n and 'head' not in n], 'lr': 1e-6}
    ], weight_decay=1e-4)

    # Load checkpoint if available
    best_model_state = None
    epoch_start = 0
    best_val_loss = float('inf')
    prev_val_acc = 0.79
    if checkpoint:
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch_start = checkpoint['epoch'] + 1
        best_val_loss = checkpoint['best_val_loss']
    else:
        epoch_start = 0
        best_val_loss = float('inf')

    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

    early_stopping_counter = 0
    early_stopping_patience = 10

    for epoch in range(epoch_start, num_epochs):
        # Dynamically adjust transformations and batch size for the current epoch
        transform = get_transform(epoch)
        batch_size = get_batch_size(epoch)

        # Create new data loaders with updated transformations and batch size
        train_dataset = HAM10000Dataset('./data/HAM10000_train.csv', transform=transform, grayscale=False)
        val_dataset = HAM10000Dataset('./data/HAM10000_val.csv', transform=transform, grayscale=False)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        model.train()
        para_loader = pl.ParallelLoader(train_loader, [device])
        train_loss, train_correct = 0.0, 0

        for inputs, labels in para_loader.per_device_loader(device):
            optimizer.zero_grad()
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            xm.optimizer_step(optimizer)

            train_loss += loss.item() * inputs.size(0)
            train_correct += (outputs.argmax(dim=1) == labels).sum().item()

        train_acc = train_correct / len(train_loader.dataset)

        # Validation loop
        model.eval()
        val_loss, val_correct = 0.0, 0
        para_loader = pl.ParallelLoader(val_loader, [device])

        with torch.no_grad():
            for inputs, labels in para_loader.per_device_loader(device):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                val_correct += (outputs.argmax(dim=1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)

        # Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        if early_stopping_counter >= early_stopping_patience:
            xm.master_print("Early stopping triggered.")
            break

        # Freezing/Unfreezing logic based on loss trends
        if train_loss < val_loss * 0.9:
            xm.master_print("Overfitting detected. Freezing some layers.")
            freeze_layers(model)
        elif train_loss > val_loss * 1.2 and val_acc < prev_val_acc:
            xm.master_print("Not converging with stagnant validation accuracy. Unfreezing some layers.")
            unfreeze_layers(model)

        prev_val_acc = val_acc
        scheduler.step(val_loss)

        xm.master_print(f"Epoch {epoch+1}/{num_epochs} | "
                        f"Train Loss: {train_loss/len(train_loader.dataset):.4f}, "
                        f"Train Acc: {train_acc:.4f}, "
                        f"Val Loss: {val_loss/len(val_loader.dataset):.4f}, "
                        f"Val Acc: {val_acc:.4f}")

    if best_model_state is not None:
        torch.save(best_model_state, 'best_model.pth')
        xm.master_print("Best model saved!")

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_val_loss': best_val_loss
    }
    torch.save(checkpoint, 'checkpoint.pth')
    xm.master_print("Checkpoint saved!")


# 🚀 DataLoader Definition for HAM10000
class HAM10000Dataset(Dataset):
    def __init__(self, csv_path, transform=None, grayscale=None):
        self.metadata = pd.read_csv(csv_path)
        self.transform = transform
        self.grayscale = grayscale  # Flag to apply grayscale

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        row = self.metadata.iloc[idx]
        image = Image.open(row['image_path']).convert('RGB')

        if self.grayscale:
            # Convert to grayscale before any other transformations
            image = transforms.Grayscale(num_output_channels=3)(image)  # Converting to 3-channel grayscale

        if self.transform:
            image = self.transform(image)

        label = row['label']
        return image, label


# 🚀 Main TPU Training Entry
def main():
    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(30),
        transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.8, 1.2)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.RandomCrop(224),
        transforms.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3),
        transforms.RandomErasing(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
    ])

    # Initialize data loaders
    train_dataset = HAM10000Dataset('./data/HAM10000_train.csv', transform=transform)
    val_dataset = HAM10000Dataset('./data/HAM10000_val.csv', transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    num_epochs = 300  # Train for 300 epochs or more
    checkpoint = None

    # Start training
    xmp.spawn(train_tpu, args=(8, num_epochs, train_loader, val_loader, checkpoint), nprocs=1)


if __name__ == "__main__":
    main()
