In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from transformers import ViTForImageClassification, ViTFeatureExtractor
from tqdm import tqdm
import pandas as pd
import os


In [2]:
# Define the paths to your training and testing datasets
train_data_dir = "C:/Users/Amir/Desktop/skin cancer paper/skin cancer isic/Skin Cancer ISIC/Skin cancer ISIC The International Skin Imaging Collaboration/train"  # Replace with the path to your training dataset
test_data_dir = "C:/Users/Amir/Desktop/skin cancer paper/skin cancer isic/Skin Cancer ISIC/Skin cancer ISIC The International Skin Imaging Collaboration/test"  # Replace with the path to your testing dataset

In [3]:
# Load the ViT feature extractor
model_name = "google/vit-base-patch16-224"
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)

# Define transformations for the images
def preprocess_image(image):
    return feature_extractor(image, return_tensors="pt")["pixel_values"].squeeze(0)



preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [6]:
# Load the dataset
train_dataset = datasets.ImageFolder(train_data_dir, transform=preprocess_image)
test_dataset = datasets.ImageFolder(test_data_dir, transform=preprocess_image)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the pre-trained ViT model
num_classes = len(train_dataset.classes)  # Number of classes in your dataset
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=num_classes,
    ignore_mismatched_sizes=True
)

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([9]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([9, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7

In [8]:
# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Initialize a DataFrame to store metrics
metrics_df = pd.DataFrame(columns=["Epoch", "Train Loss", "Train Accuracy", "Val Loss", "Val Accuracy"])

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    # Wrap train_loader with tqdm for a progress bar
    train_loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}] Training")
    for images, labels in train_loop:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images).logits
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track metrics
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Update the progress bar description
        train_loop.set_postfix(loss=loss.item(), accuracy=(correct / total) * 100)

    # Compute training metrics
    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    # Validation phase
    model.eval()
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0

    # Wrap val_loader with tqdm for a progress bar
    val_loop = tqdm(test_loader, desc=f"Epoch [{epoch+1}/{num_epochs}] Validation")
    with torch.no_grad():
        for images, labels in val_loop:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            # Track metrics
            val_running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

            # Update the progress bar description
            val_loop.set_postfix(loss=loss.item(), accuracy=(val_correct / val_total) * 100)

    # Compute validation metrics
    val_loss = val_running_loss / len(test_loader)
    val_accuracy = 100 * val_correct / val_total

    # Print epoch metrics
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

    # Save metrics to DataFrame
    metrics_df = metrics_df.append({
        "Epoch": epoch + 1,
        "Train Loss": train_loss,
        "Train Accuracy": train_accuracy,
        "Val Loss": val_loss,
        "Val Accuracy": val_accuracy
    }, ignore_index=True)

# Save metrics to a CSV file
metrics_df.to_csv("training_metrics.csv", index=False)
print("Training metrics saved to training_metrics.csv")

Epoch [1/10] Training: 100%|██████████| 70/70 [01:34<00:00,  1.35s/it, accuracy=58.2, loss=0.765]
Epoch [1/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, accuracy=56.8, loss=2.35] 
  metrics_df = metrics_df.append({


Epoch [1/10], Train Loss: 1.2060, Train Accuracy: 58.15%, Val Loss: 1.4930, Val Accuracy: 56.78%


Epoch [2/10] Training: 100%|██████████| 70/70 [01:36<00:00,  1.38s/it, accuracy=77.1, loss=0.77] 
Epoch [2/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, accuracy=63.6, loss=1.54] 
  metrics_df = metrics_df.append({


Epoch [2/10], Train Loss: 0.6203, Train Accuracy: 77.13%, Val Loss: 1.2814, Val Accuracy: 63.56%


Epoch [3/10] Training: 100%|██████████| 70/70 [01:36<00:00,  1.37s/it, accuracy=85.9, loss=0.207]
Epoch [3/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, accuracy=59.3, loss=1.03] 
  metrics_df = metrics_df.append({


Epoch [3/10], Train Loss: 0.3417, Train Accuracy: 85.93%, Val Loss: 1.2436, Val Accuracy: 59.32%


Epoch [4/10] Training: 100%|██████████| 70/70 [01:36<00:00,  1.37s/it, accuracy=90.2, loss=0.444] 
Epoch [4/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, accuracy=57.6, loss=1.91] 
  metrics_df = metrics_df.append({


Epoch [4/10], Train Loss: 0.2307, Train Accuracy: 90.22%, Val Loss: 1.8088, Val Accuracy: 57.63%


Epoch [5/10] Training: 100%|██████████| 70/70 [01:35<00:00,  1.37s/it, accuracy=90.5, loss=0.412] 
Epoch [5/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, accuracy=59.3, loss=2.3]  
  metrics_df = metrics_df.append({


Epoch [5/10], Train Loss: 0.2178, Train Accuracy: 90.53%, Val Loss: 1.6287, Val Accuracy: 59.32%


Epoch [6/10] Training: 100%|██████████| 70/70 [01:37<00:00,  1.39s/it, accuracy=92.9, loss=0.206] 
Epoch [6/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, accuracy=61, loss=2.01]    
  metrics_df = metrics_df.append({


Epoch [6/10], Train Loss: 0.1494, Train Accuracy: 92.85%, Val Loss: 1.6550, Val Accuracy: 61.02%


Epoch [7/10] Training: 100%|██████████| 70/70 [01:37<00:00,  1.39s/it, accuracy=92.1, loss=0.104]  
Epoch [7/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, accuracy=63.6, loss=1.87]  
  metrics_df = metrics_df.append({


Epoch [7/10], Train Loss: 0.1283, Train Accuracy: 92.09%, Val Loss: 1.7791, Val Accuracy: 63.56%


Epoch [8/10] Training: 100%|██████████| 70/70 [01:37<00:00,  1.39s/it, accuracy=92.1, loss=0.172] 
Epoch [8/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, accuracy=61, loss=1.7]    
  metrics_df = metrics_df.append({


Epoch [8/10], Train Loss: 0.1173, Train Accuracy: 92.05%, Val Loss: 1.7811, Val Accuracy: 61.02%


Epoch [9/10] Training: 100%|██████████| 70/70 [01:37<00:00,  1.40s/it, accuracy=93.1, loss=0.265]  
Epoch [9/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, accuracy=64.4, loss=1.51] 
  metrics_df = metrics_df.append({


Epoch [9/10], Train Loss: 0.1205, Train Accuracy: 93.08%, Val Loss: 1.8923, Val Accuracy: 64.41%


Epoch [10/10] Training: 100%|██████████| 70/70 [01:37<00:00,  1.39s/it, accuracy=89.5, loss=0.368] 
Epoch [10/10] Validation: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, accuracy=59.3, loss=1.68] 

Epoch [10/10], Train Loss: 0.2199, Train Accuracy: 89.55%, Val Loss: 1.8227, Val Accuracy: 59.32%
Training metrics saved to training_metrics.csv



  metrics_df = metrics_df.append({
