In [None]:
import os 
import numpy as np
from PIL import Image
from torchvision import transforms
import torchvision.datasets as datasets
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import ViTForImageClassification
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
# Define dataset paths
train_dir = '/kaggle/input/skin-cancer-img-augmented/skincancerimg/train'
test_dir = '/kaggle/input/skin-cancer-img-augmented/skincancerimg/test'

In [None]:
# Custom dataset class
class CustomDataset(datasets.ImageFolder):
    def __getitem__(self, index):
        path, target = self.samples[index]
        image = Image.open(path).convert('RGB')  # Ensure RGB format
        image = np.array(image)
        
        # Apply transformations
        image = Image.fromarray(image)
        if self.transform:
            image = self.transform(image)
        
        return image, target

In [None]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.RandomHorizontalFlip(p=0.5),  # Flip images horizontally
    transforms.RandomVerticalFlip(p=0.5),    # Flip images vertically
    transforms.RandomRotation(degrees=30),   # Rotate up to 30 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Brightness/contrast variations
    transforms.RandomAffine(degrees=15, shear=10, scale=(0.8, 1.2)),  # Shear & Scale variations
    transforms.RandomResizedCrop(384, scale=(0.8, 1.0)),  # Random cropping for better generalization
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.179, 0.179, 0.179], std=[0.175, 0.175, 0.175])  # Precomputed mean/std
])

In [None]:
# Load datasets
train_dataset = CustomDataset(root=train_dir, transform=transform)
test_dataset = CustomDataset(root=test_dir, transform=transform)

# DataLoader
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load ViT model with 9 classes
model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch32-384', 
    num_labels=9, 
    ignore_mismatched_sizes=True  # Fixes shape mismatch
)
model.to(device)

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
epochs = 5
losses, accuracies = [], []

In [None]:
for epoch in range(epochs):
    model.train()
    loop = tqdm(train_loader, leave=True)
    epoch_loss = 0
    
    for inputs, labels in loop:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs.logits, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        loop.set_description(f"Epoch [{epoch+1}/{epochs}]")
        loop.set_postfix(loss=loss.item())

    losses.append(epoch_loss / len(train_loader))

    # Evaluation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    accuracies.append(accuracy)
    print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
epochs = 10
losses, accuracies = [], []

for epoch in range(epochs):
    model.train()
    loop = tqdm(train_loader, leave=True)
    epoch_loss = 0
    
    for inputs, labels in loop:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs.logits, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        loop.set_description(f"Epoch [{epoch+1}/{epochs}]")
        loop.set_postfix(loss=loss.item())

    losses.append(epoch_loss / len(train_loader))

    # Evaluation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    accuracies.append(accuracy)
    print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Plot Accuracy
plt.figure(figsize=(10, 5))
plt.plot(accuracies, label='Accuracy', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Plot')
plt.legend()
plt.grid(True)
plt.show()

# Plot Loss
plt.figure(figsize=(10, 5))
plt.plot(losses, label='Loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Plot')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Ensure model is in evaluation mode
model.eval()

# Initialize variables
correct, total, test_loss = 0, 0, 0
all_preds, all_labels = [], []
criterion = torch.nn.CrossEntropyLoss()

# Disable gradient calculation for evaluation
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs.logits, labels)
        test_loss += loss.item()

        # Get predictions
        _, predicted = torch.max(outputs.logits, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        # Store predictions & actual labels for confusion matrix
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute final accuracy & loss
test_loss /= len(test_loader)
accuracy = correct / total

# Print results
print(f"Test Accuracy: {accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

# Generate Confusion Matrix
conf_matrix = confusion_matrix(all_labels, all_preds)

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
disp = ConfusionMatrixDisplay(conf_matrix, display_labels=train_dataset.classes)
disp.plot(cmap="Blues", values_format="d")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Save trained model
torch.save(model.state_dict(), '/kaggle/working/VIT_multiclass_V2.pth')
print("Model saved!")