In [1]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000
! mkdir data
! unzip -q skin-cancer-mnist-ham10000.zip -d data

Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
Downloading skin-cancer-mnist-ham10000.zip to /content
100% 5.20G/5.20G [04:10<00:00, 23.5MB/s]
100% 5.20G/5.20G [04:10<00:00, 22.3MB/s]


In [1]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification, ViTFeatureExtractor
from torchvision import transforms
from torch import nn, optim
from collections import defaultdict
from sklearn.metrics import accuracy_score
from PIL import Image
import itertools
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from itertools import product
from tqdm import tqdm  # For progress bar

# Constants
DATA_DIR = 'data/'
METADATA_FILE = os.path.join(DATA_DIR, 'HAM10000_metadata.csv')
IMAGE_DIRS = [os.path.join(DATA_DIR, 'HAM10000_images_part_1'),
              os.path.join(DATA_DIR, 'HAM10000_images_part_2')]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Configurable hyperparameters
EPOCHS = 10   # Set the number of epochs here
BATCH_SIZE = 256   # Set the batch size here
PATIENCE = 5  # Early stopping patience (number of epochs with no improvement)

# Load Metadata
metadata = pd.read_csv(METADATA_FILE)

# Encode the string labels into numeric values
label_encoder = LabelEncoder()
metadata['dx_encoded'] = label_encoder.fit_transform(metadata['dx'])

# Initialize Feature Extractor
MODEL_NAME = 'google/vit-base-patch32-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(MODEL_NAME)

# Augmentation Transforms
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

# Custom Dataset (Using Encoded Labels)
class SkinCancerDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, image_dirs, feature_extractor, transform=None):
        self.dataframe = dataframe
        self.image_dirs = image_dirs
        self.feature_extractor = feature_extractor
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_id = row['image_id']
        # Search for the image in the provided directories
        for directory in self.image_dirs:
            image_path = os.path.join(directory, f"{image_id}.jpg")
            if os.path.exists(image_path):
                break
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        # Using 'dx_encoded' as the label column
        label = torch.tensor(row['dx_encoded'])  # Use the encoded labels
        return image, label

# Your provided stratifiedKFold class
class stratifiedKFold:
    def __init__(self, n_split, shuffle, random_state):
        self.n_split = n_split
        self.shuffle = shuffle
        self.random_state = random_state

    def split(self, X, y):
        if (self.shuffle and self.random_state) is not None:
            np.random.seed(self.random_state)

        idx = np.arange(len(y))
        if self.shuffle:
            np.random.shuffle(idx)

        y_idx = defaultdict(list)
        for i, label in zip(idx, y):
            y_idx[label].append(i)

        splits = [[] for _ in range(self.n_split)]

        for label, i in y_idx.items():
            np.random.shuffle(i)
            split_portions = [len(i) // self.n_split] * self.n_split
            for j in range(len(i) % self.n_split):
                split_portions[j] += 1

            first = 0
            for k in range(self.n_split):
                last = first + split_portions[k]
                splits[k].extend(i[first:last])
                first = last

        for i in range(self.n_split):
            test_idx = splits[i]
            train_idx = np.concatenate([splits[j] for j in range(self.n_split) if j != i])
            yield train_idx, test_idx

    def get_n_splits(self):
        return self.n_split

# Stratified sampling to get 30% of the data
def stratified_sample(dataframe, labels, sample_size=0.3, random_state=42):
    n_samples = int(sample_size * len(dataframe))
    skf = stratifiedKFold(n_split=int(1 / sample_size), shuffle=True, random_state=random_state)

    # We only need the first split for 30% sampling
    for train_idx, test_idx in skf.split(dataframe, labels):
        return dataframe.iloc[test_idx[:n_samples]]

# Sample 30% of the data
metadata_sample = stratified_sample(metadata, metadata['dx_encoded'], sample_size=0.3)

# Reset index for the sampled data
metadata_sample = metadata_sample.reset_index(drop=True)

# Define hyperparameters to test (learning rate and attention heads)
hyperparameters = [
    {'lr': [1e-3, 1e-4]},
    {'attention_heads': [8, 16]}  # Example values
]

# Training and evaluation loops with progress bar
def train_epoch(model, train_loader, optimizer, criterion, device, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    # Create a progress bar for batch processing
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1} Training", leave=False)

    for batch_idx, (images, labels) in enumerate(progress_bar):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits  # logits from ViTForImageClassification
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Update progress bar with batch loss and accuracy
        progress_bar.set_postfix({'Batch Loss': loss.item(), 'Batch Acc': 100 * correct / total})

    # Return average loss and accuracy for the epoch
    return running_loss / total, correct / total

def eval_epoch(model, val_loader, criterion, device, epoch):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    # Create a progress bar for batch processing during evaluation
    progress_bar = tqdm(val_loader, desc=f"Epoch {epoch + 1} Validation", leave=False)

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(progress_bar):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits  # logits from ViTForImageClassification
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Update progress bar with batch loss and accuracy
            progress_bar.set_postfix({'Batch Loss': loss.item(), 'Batch Acc': 100 * correct / total})

    # Return average loss and accuracy for the epoch
    return running_loss / total, correct / total

# Cross-validation function for hyperparameter tuning
def cross_validate_hyperparameter_tuning(X, y, hyperparameters, device, total_folds=5):
    outer_fold = stratifiedKFold(n_split=total_folds, shuffle=True, random_state=42)  # 5-fold CV for hyperparameter tuning
    outer_results = []

   # Get the product of all hyperparameter combinations
    all_combinations = list(itertools.product(*hyperparameters.values()))
    # Total number of combinations
    total_combinations = len(all_combinations)

    for fold, (train_idx, test_idx) in enumerate(outer_fold.split(X, y), 1):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        # Set up hyperparameter combinations
        param_keys = [list(d.keys())[0] for d in hyperparameters]
        param_values = [list(d.values())[0] for d in hyperparameters]
        combinations = product(*param_values)
        hyper_combo = [{param_keys[i]: combo[i] for i in range(len(param_keys))} for combo in combinations]

        print(f"\n=== Fold {fold}/{total_folds} ===")

        for combo_num, param in enumerate(hyper_combo, 1):
            LEARNING_RATE = param['lr']
            ATTENTION_HEADS = param['attention_heads']

            # Initialize the model with current hyperparameters
            model = ViTForImageClassification.from_pretrained(MODEL_NAME, num_labels=len(label_encoder.classes_), num_attention_heads=ATTENTION_HEADS)
            model.to(device)
            optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
            criterion = nn.CrossEntropyLoss()

            best_val_loss = np.inf
            epochs_no_improve = 0  # Early stopping counter

            print(f"Starting hyperparameter combination {combo_num}/{total_combinations}: lr={LEARNING_RATE}, attention_heads={ATTENTION_HEADS}")

            # Train on inner folds with early stopping
            for epoch in range(EPOCHS):
                # Create datasets
                train_dataset = SkinCancerDataset(X_train, IMAGE_DIRS, feature_extractor, transform=train_transform)
                val_dataset = SkinCancerDataset(X_test, IMAGE_DIRS, feature_extractor, transform=val_test_transform)

                train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

                train_loss, train_acc = train_epoch(model, train_loader, optimizer, criterion, device, epoch)
                val_loss, val_acc = eval_epoch(model, val_loader, criterion, device, epoch)

                # Print progress for each epoch
                print(f'Epoch {epoch + 1}/{EPOCHS} (Fold {fold}, Combo {combo_num}): Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}%, '
                      f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.2f}%')

                # Check if validation loss improved
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    epochs_no_improve = 0  # Reset counter if validation improves
                else:
                    epochs_no_improve += 1

                # Early stopping if no improvement for PATIENCE epochs
                if epochs_no_improve == PATIENCE:
                    print(f"Early stopping at epoch {epoch + 1} (Fold {fold}, Combo {combo_num})")
                    break

            # Store results for the fold and combination
            outer_results.append((param, val_acc))

            # Print progress for hyperparameter tuning
            print(f"Completed {combo_num}/{total_combinations} hyperparameter combinations for Fold {fold}")

    return outer_results

# Hyperparameters to test (learning rate and attention heads)
hyperparameters = [
    {'lr': [1e-3, 1e-4]},
    {'attention_heads': [8, 16]}  # Example values
]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [1]:


# Running hyperparameter tuning with progress tracking
results_tuning = cross_validate_hyperparameter_tuning(metadata, metadata['dx_encoded'], hyperparameters, device)

# Output the best hyperparameters and accuracy
best_result = max(results_tuning, key=lambda x: x[1])
print("Best Hyperparameters:", best_result[0])
print("Best Accuracy:", best_result[1])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



=== Fold 1/5 ===


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 1/2: lr=0.001, attention_heads=8




Epoch 1/10 (Fold 1, Combo 1): Train Loss: 1.0969, Train Acc: 64.84%, Val Loss: 0.9243, Val Acc: 65.94%




Epoch 2/10 (Fold 1, Combo 1): Train Loss: 0.9096, Train Acc: 67.64%, Val Loss: 0.9243, Val Acc: 63.44%




Epoch 3/10 (Fold 1, Combo 1): Train Loss: 0.8834, Train Acc: 67.89%, Val Loss: 0.8436, Val Acc: 68.28%




Epoch 4/10 (Fold 1, Combo 1): Train Loss: 0.8425, Train Acc: 68.81%, Val Loss: 0.8167, Val Acc: 69.18%




Epoch 5/10 (Fold 1, Combo 1): Train Loss: 0.7818, Train Acc: 71.02%, Val Loss: 0.7402, Val Acc: 71.02%




Epoch 6/10 (Fold 1, Combo 1): Train Loss: 0.7648, Train Acc: 71.45%, Val Loss: 0.7554, Val Acc: 70.72%




Epoch 7/10 (Fold 1, Combo 1): Train Loss: 0.7196, Train Acc: 72.61%, Val Loss: 0.7009, Val Acc: 73.67%




Epoch 8/10 (Fold 1, Combo 1): Train Loss: 0.7071, Train Acc: 73.13%, Val Loss: 0.7203, Val Acc: 72.17%




Epoch 9/10 (Fold 1, Combo 1): Train Loss: 0.6921, Train Acc: 73.88%, Val Loss: 0.6849, Val Acc: 74.06%




Epoch 10/10 (Fold 1, Combo 1): Train Loss: 0.6658, Train Acc: 75.44%, Val Loss: 0.6924, Val Acc: 73.97%
Completed 1/2 hyperparameter combinations for Fold 1


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 2/2: lr=0.001, attention_heads=16




Epoch 1/10 (Fold 1, Combo 2): Train Loss: 1.0986, Train Acc: 64.97%, Val Loss: 0.9579, Val Acc: 66.98%




Epoch 2/10 (Fold 1, Combo 2): Train Loss: 0.9239, Train Acc: 67.57%, Val Loss: 0.9656, Val Acc: 68.08%




Epoch 3/10 (Fold 1, Combo 2): Train Loss: 0.8905, Train Acc: 67.93%, Val Loss: 0.8446, Val Acc: 68.68%




Epoch 4/10 (Fold 1, Combo 2): Train Loss: 0.8105, Train Acc: 70.12%, Val Loss: 0.9062, Val Acc: 69.68%




Epoch 5/10 (Fold 1, Combo 2): Train Loss: 0.7722, Train Acc: 71.74%, Val Loss: 0.7595, Val Acc: 71.12%




Epoch 6/10 (Fold 1, Combo 2): Train Loss: 0.7258, Train Acc: 73.11%, Val Loss: 0.7073, Val Acc: 72.82%




Epoch 7/10 (Fold 1, Combo 2): Train Loss: 0.6935, Train Acc: 74.00%, Val Loss: 0.7000, Val Acc: 73.17%




Epoch 8/10 (Fold 1, Combo 2): Train Loss: 0.6821, Train Acc: 73.90%, Val Loss: 0.7109, Val Acc: 73.32%




Epoch 9/10 (Fold 1, Combo 2): Train Loss: 0.6544, Train Acc: 75.48%, Val Loss: 0.7061, Val Acc: 73.57%




Epoch 10/10 (Fold 1, Combo 2): Train Loss: 0.6459, Train Acc: 75.94%, Val Loss: 0.6730, Val Acc: 76.51%
Completed 2/2 hyperparameter combinations for Fold 1


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 3/2: lr=0.0001, attention_heads=8




Epoch 1/10 (Fold 1, Combo 3): Train Loss: 1.0708, Train Acc: 67.64%, Val Loss: 0.9009, Val Acc: 69.53%




Epoch 2/10 (Fold 1, Combo 3): Train Loss: 0.7639, Train Acc: 74.71%, Val Loss: 0.7300, Val Acc: 75.16%




Epoch 3/10 (Fold 1, Combo 3): Train Loss: 0.6344, Train Acc: 79.03%, Val Loss: 0.6260, Val Acc: 79.40%




Epoch 4/10 (Fold 1, Combo 3): Train Loss: 0.5362, Train Acc: 82.57%, Val Loss: 0.5690, Val Acc: 81.25%




Epoch 5/10 (Fold 1, Combo 3): Train Loss: 0.4619, Train Acc: 85.23%, Val Loss: 0.5004, Val Acc: 82.19%




Epoch 6/10 (Fold 1, Combo 3): Train Loss: 0.3936, Train Acc: 87.39%, Val Loss: 0.5066, Val Acc: 82.59%




Epoch 7/10 (Fold 1, Combo 3): Train Loss: 0.3361, Train Acc: 89.10%, Val Loss: 0.5344, Val Acc: 82.44%




Epoch 8/10 (Fold 1, Combo 3): Train Loss: 0.2934, Train Acc: 90.80%, Val Loss: 0.4833, Val Acc: 83.69%




Epoch 9/10 (Fold 1, Combo 3): Train Loss: 0.2293, Train Acc: 92.62%, Val Loss: 0.4725, Val Acc: 85.14%




Epoch 10/10 (Fold 1, Combo 3): Train Loss: 0.2132, Train Acc: 93.36%, Val Loss: 0.4924, Val Acc: 82.44%
Completed 3/2 hyperparameter combinations for Fold 1


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 4/2: lr=0.0001, attention_heads=16




Epoch 1/10 (Fold 1, Combo 4): Train Loss: 1.0629, Train Acc: 66.42%, Val Loss: 0.8846, Val Acc: 69.98%




Epoch 2/10 (Fold 1, Combo 4): Train Loss: 0.7324, Train Acc: 76.48%, Val Loss: 0.7319, Val Acc: 75.01%




Epoch 3/10 (Fold 1, Combo 4): Train Loss: 0.6077, Train Acc: 80.07%, Val Loss: 0.5860, Val Acc: 80.45%




Epoch 4/10 (Fold 1, Combo 4): Train Loss: 0.5176, Train Acc: 83.38%, Val Loss: 0.5895, Val Acc: 80.20%




Epoch 5/10 (Fold 1, Combo 4): Train Loss: 0.4227, Train Acc: 86.55%, Val Loss: 0.5069, Val Acc: 82.94%




Epoch 6/10 (Fold 1, Combo 4): Train Loss: 0.3670, Train Acc: 88.78%, Val Loss: 0.5216, Val Acc: 82.74%




Epoch 7/10 (Fold 1, Combo 4): Train Loss: 0.2963, Train Acc: 90.99%, Val Loss: 0.4909, Val Acc: 83.54%




Epoch 8/10 (Fold 1, Combo 4): Train Loss: 0.2710, Train Acc: 91.60%, Val Loss: 0.4465, Val Acc: 84.94%




Epoch 9/10 (Fold 1, Combo 4): Train Loss: 0.2017, Train Acc: 94.19%, Val Loss: 0.4498, Val Acc: 84.94%




Epoch 10/10 (Fold 1, Combo 4): Train Loss: 0.1704, Train Acc: 95.13%, Val Loss: 0.4804, Val Acc: 85.44%
Completed 4/2 hyperparameter combinations for Fold 1

=== Fold 2/5 ===


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 1/2: lr=0.001, attention_heads=8




Epoch 1/10 (Fold 2, Combo 1): Train Loss: 1.1347, Train Acc: 65.06%, Val Loss: 1.0290, Val Acc: 67.08%




Epoch 2/10 (Fold 2, Combo 1): Train Loss: 0.9746, Train Acc: 67.13%, Val Loss: 0.9296, Val Acc: 67.28%




Epoch 3/10 (Fold 2, Combo 1): Train Loss: 0.9005, Train Acc: 67.55%, Val Loss: 0.9011, Val Acc: 68.23%




Epoch 4/10 (Fold 2, Combo 1): Train Loss: 0.8569, Train Acc: 68.38%, Val Loss: 0.8751, Val Acc: 69.28%




Epoch 5/10 (Fold 2, Combo 1): Train Loss: 0.8262, Train Acc: 69.08%, Val Loss: 0.8643, Val Acc: 67.63%




Epoch 6/10 (Fold 2, Combo 1): Train Loss: 0.8056, Train Acc: 69.74%, Val Loss: 0.8420, Val Acc: 70.27%




Epoch 7/10 (Fold 2, Combo 1): Train Loss: 0.7768, Train Acc: 70.87%, Val Loss: 0.7753, Val Acc: 70.67%




Epoch 8/10 (Fold 2, Combo 1): Train Loss: 0.7509, Train Acc: 71.34%, Val Loss: 0.7616, Val Acc: 71.02%




Epoch 9/10 (Fold 2, Combo 1): Train Loss: 0.7177, Train Acc: 72.70%, Val Loss: 0.7351, Val Acc: 71.77%




Epoch 10/10 (Fold 2, Combo 1): Train Loss: 0.7003, Train Acc: 73.21%, Val Loss: 0.7630, Val Acc: 71.62%
Completed 1/2 hyperparameter combinations for Fold 2


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 2/2: lr=0.001, attention_heads=16




Epoch 1/10 (Fold 2, Combo 2): Train Loss: 1.0940, Train Acc: 65.08%, Val Loss: 0.9707, Val Acc: 67.03%




Epoch 2/10 (Fold 2, Combo 2): Train Loss: 0.8995, Train Acc: 67.74%, Val Loss: 0.8844, Val Acc: 67.78%




Epoch 3/10 (Fold 2, Combo 2): Train Loss: 0.8339, Train Acc: 69.23%, Val Loss: 0.8298, Val Acc: 69.53%




Epoch 4/10 (Fold 2, Combo 2): Train Loss: 0.7965, Train Acc: 69.75%, Val Loss: 0.7915, Val Acc: 71.02%




Epoch 5/10 (Fold 2, Combo 2): Train Loss: 0.7469, Train Acc: 71.95%, Val Loss: 0.7610, Val Acc: 72.02%




Epoch 6/10 (Fold 2, Combo 2): Train Loss: 0.7121, Train Acc: 72.93%, Val Loss: 0.7502, Val Acc: 71.37%




Epoch 7/10 (Fold 2, Combo 2): Train Loss: 0.6684, Train Acc: 74.82%, Val Loss: 0.7229, Val Acc: 73.67%




Epoch 8/10 (Fold 2, Combo 2): Train Loss: 0.6875, Train Acc: 73.97%, Val Loss: 0.6969, Val Acc: 73.97%




Epoch 9/10 (Fold 2, Combo 2): Train Loss: 0.6580, Train Acc: 75.14%, Val Loss: 0.7306, Val Acc: 73.37%




Epoch 10/10 (Fold 2, Combo 2): Train Loss: 0.6357, Train Acc: 75.94%, Val Loss: 0.6897, Val Acc: 75.16%
Completed 2/2 hyperparameter combinations for Fold 2


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 3/2: lr=0.0001, attention_heads=8




Epoch 1/10 (Fold 2, Combo 3): Train Loss: 1.0795, Train Acc: 66.50%, Val Loss: 0.8557, Val Acc: 71.57%




Epoch 2/10 (Fold 2, Combo 3): Train Loss: 0.7668, Train Acc: 74.73%, Val Loss: 0.7163, Val Acc: 76.06%




Epoch 3/10 (Fold 2, Combo 3): Train Loss: 0.6342, Train Acc: 78.93%, Val Loss: 0.6439, Val Acc: 78.20%




Epoch 4/10 (Fold 2, Combo 3): Train Loss: 0.5227, Train Acc: 82.87%, Val Loss: 0.6065, Val Acc: 79.00%




Epoch 5/10 (Fold 2, Combo 3): Train Loss: 0.4687, Train Acc: 84.51%, Val Loss: 0.5524, Val Acc: 80.95%




Epoch 6/10 (Fold 2, Combo 3): Train Loss: 0.4003, Train Acc: 87.03%, Val Loss: 0.5141, Val Acc: 82.59%




Epoch 7/10 (Fold 2, Combo 3): Train Loss: 0.3473, Train Acc: 88.81%, Val Loss: 0.5192, Val Acc: 82.54%




Epoch 8/10 (Fold 2, Combo 3): Train Loss: 0.2888, Train Acc: 91.06%, Val Loss: 0.4862, Val Acc: 83.59%




Epoch 9/10 (Fold 2, Combo 3): Train Loss: 0.2545, Train Acc: 92.13%, Val Loss: 0.4833, Val Acc: 83.34%




Epoch 10/10 (Fold 2, Combo 3): Train Loss: 0.2164, Train Acc: 93.75%, Val Loss: 0.4883, Val Acc: 83.69%
Completed 3/2 hyperparameter combinations for Fold 2


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 4/2: lr=0.0001, attention_heads=16




Epoch 1/10 (Fold 2, Combo 4): Train Loss: 1.0817, Train Acc: 66.50%, Val Loss: 0.8614, Val Acc: 72.02%




Epoch 2/10 (Fold 2, Combo 4): Train Loss: 0.7749, Train Acc: 73.91%, Val Loss: 0.8261, Val Acc: 68.98%




Epoch 3/10 (Fold 2, Combo 4): Train Loss: 0.6340, Train Acc: 79.01%, Val Loss: 0.6147, Val Acc: 78.95%




Epoch 4/10 (Fold 2, Combo 4): Train Loss: 0.5173, Train Acc: 83.62%, Val Loss: 0.5875, Val Acc: 79.95%




Epoch 5/10 (Fold 2, Combo 4): Train Loss: 0.4447, Train Acc: 85.77%, Val Loss: 0.5858, Val Acc: 78.85%




Epoch 6/10 (Fold 2, Combo 4): Train Loss: 0.3767, Train Acc: 88.05%, Val Loss: 0.4713, Val Acc: 83.94%




Epoch 7/10 (Fold 2, Combo 4): Train Loss: 0.3003, Train Acc: 90.91%, Val Loss: 0.4453, Val Acc: 85.24%




Epoch 8/10 (Fold 2, Combo 4): Train Loss: 0.2533, Train Acc: 92.22%, Val Loss: 0.4399, Val Acc: 85.24%




Epoch 9/10 (Fold 2, Combo 4): Train Loss: 0.2028, Train Acc: 94.28%, Val Loss: 0.5142, Val Acc: 82.04%




Epoch 10/10 (Fold 2, Combo 4): Train Loss: 0.1927, Train Acc: 94.26%, Val Loss: 0.4347, Val Acc: 86.18%
Completed 4/2 hyperparameter combinations for Fold 2

=== Fold 3/5 ===


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 1/2: lr=0.001, attention_heads=8




Epoch 1/10 (Fold 3, Combo 1): Train Loss: 1.0937, Train Acc: 65.78%, Val Loss: 0.9615, Val Acc: 68.10%




Epoch 2/10 (Fold 3, Combo 1): Train Loss: 0.9445, Train Acc: 67.11%, Val Loss: 0.9148, Val Acc: 68.30%




Epoch 3/10 (Fold 3, Combo 1): Train Loss: 0.8916, Train Acc: 68.43%, Val Loss: 0.8563, Val Acc: 69.70%




Epoch 4/10 (Fold 3, Combo 1): Train Loss: 0.8249, Train Acc: 69.17%, Val Loss: 0.7841, Val Acc: 71.34%




Epoch 5/10 (Fold 3, Combo 1): Train Loss: 0.7839, Train Acc: 70.76%, Val Loss: 0.7440, Val Acc: 71.99%




Epoch 6/10 (Fold 3, Combo 1): Train Loss: 0.7473, Train Acc: 72.04%, Val Loss: 0.7272, Val Acc: 72.99%




Epoch 7/10 (Fold 3, Combo 1): Train Loss: 0.7850, Train Acc: 71.09%, Val Loss: 0.7239, Val Acc: 72.19%




Epoch 8/10 (Fold 3, Combo 1): Train Loss: 0.7439, Train Acc: 71.34%, Val Loss: 0.6950, Val Acc: 74.49%




Epoch 9/10 (Fold 3, Combo 1): Train Loss: 0.7002, Train Acc: 73.84%, Val Loss: 0.6862, Val Acc: 73.84%




Epoch 10/10 (Fold 3, Combo 1): Train Loss: 0.6915, Train Acc: 74.08%, Val Loss: 0.6997, Val Acc: 72.94%
Completed 1/2 hyperparameter combinations for Fold 3


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 2/2: lr=0.001, attention_heads=16




Epoch 1/10 (Fold 3, Combo 2): Train Loss: 1.1072, Train Acc: 64.73%, Val Loss: 0.9457, Val Acc: 67.75%




Epoch 2/10 (Fold 3, Combo 2): Train Loss: 0.9226, Train Acc: 67.36%, Val Loss: 0.8749, Val Acc: 68.70%




Epoch 3/10 (Fold 3, Combo 2): Train Loss: 0.8636, Train Acc: 69.06%, Val Loss: 0.8791, Val Acc: 68.30%




Epoch 4/10 (Fold 3, Combo 2): Train Loss: 0.8199, Train Acc: 69.33%, Val Loss: 0.8484, Val Acc: 69.35%




Epoch 5/10 (Fold 3, Combo 2): Train Loss: 0.7743, Train Acc: 70.77%, Val Loss: 0.7425, Val Acc: 72.59%




Epoch 6/10 (Fold 3, Combo 2): Train Loss: 0.7568, Train Acc: 72.00%, Val Loss: 0.7164, Val Acc: 73.74%




Epoch 7/10 (Fold 3, Combo 2): Train Loss: 0.7252, Train Acc: 73.04%, Val Loss: 0.7063, Val Acc: 73.29%




Epoch 8/10 (Fold 3, Combo 2): Train Loss: 0.6993, Train Acc: 73.49%, Val Loss: 0.6629, Val Acc: 75.29%




Epoch 9/10 (Fold 3, Combo 2): Train Loss: 0.6709, Train Acc: 74.25%, Val Loss: 0.7040, Val Acc: 73.64%




Epoch 10/10 (Fold 3, Combo 2): Train Loss: 0.6475, Train Acc: 75.15%, Val Loss: 0.7158, Val Acc: 73.69%
Completed 2/2 hyperparameter combinations for Fold 3


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 3/2: lr=0.0001, attention_heads=8




Epoch 1/10 (Fold 3, Combo 3): Train Loss: 1.0559, Train Acc: 65.80%, Val Loss: 0.8569, Val Acc: 72.39%




Epoch 2/10 (Fold 3, Combo 3): Train Loss: 0.7783, Train Acc: 73.76%, Val Loss: 0.6925, Val Acc: 77.53%




Epoch 3/10 (Fold 3, Combo 3): Train Loss: 0.6454, Train Acc: 78.62%, Val Loss: 0.6176, Val Acc: 79.73%




Epoch 4/10 (Fold 3, Combo 3): Train Loss: 0.5497, Train Acc: 81.85%, Val Loss: 0.5473, Val Acc: 81.68%




Epoch 5/10 (Fold 3, Combo 3): Train Loss: 0.4734, Train Acc: 84.61%, Val Loss: 0.5281, Val Acc: 81.73%




Epoch 6/10 (Fold 3, Combo 3): Train Loss: 0.3925, Train Acc: 87.48%, Val Loss: 0.4816, Val Acc: 83.08%




Epoch 7/10 (Fold 3, Combo 3): Train Loss: 0.3367, Train Acc: 89.14%, Val Loss: 0.4810, Val Acc: 83.18%




Epoch 8/10 (Fold 3, Combo 3): Train Loss: 0.2948, Train Acc: 90.95%, Val Loss: 0.4717, Val Acc: 84.37%




Epoch 9/10 (Fold 3, Combo 3): Train Loss: 0.2453, Train Acc: 92.46%, Val Loss: 0.4844, Val Acc: 84.57%




Epoch 10/10 (Fold 3, Combo 3): Train Loss: 0.1949, Train Acc: 94.32%, Val Loss: 0.5316, Val Acc: 82.63%
Completed 3/2 hyperparameter combinations for Fold 3


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 4/2: lr=0.0001, attention_heads=16




Epoch 1/10 (Fold 3, Combo 4): Train Loss: 1.0638, Train Acc: 66.51%, Val Loss: 0.8407, Val Acc: 74.14%




Epoch 2/10 (Fold 3, Combo 4): Train Loss: 0.7538, Train Acc: 75.62%, Val Loss: 0.6534, Val Acc: 80.08%




Epoch 3/10 (Fold 3, Combo 4): Train Loss: 0.6017, Train Acc: 80.73%, Val Loss: 0.5749, Val Acc: 81.18%




Epoch 4/10 (Fold 3, Combo 4): Train Loss: 0.4850, Train Acc: 84.64%, Val Loss: 0.5274, Val Acc: 82.88%




Epoch 5/10 (Fold 3, Combo 4): Train Loss: 0.4180, Train Acc: 86.86%, Val Loss: 0.4882, Val Acc: 83.13%




Epoch 6/10 (Fold 3, Combo 4): Train Loss: 0.3363, Train Acc: 89.90%, Val Loss: 0.4652, Val Acc: 83.82%




Epoch 7/10 (Fold 3, Combo 4): Train Loss: 0.2659, Train Acc: 92.21%, Val Loss: 0.4786, Val Acc: 84.42%




Epoch 8/10 (Fold 3, Combo 4): Train Loss: 0.2277, Train Acc: 93.10%, Val Loss: 0.4485, Val Acc: 83.77%




Epoch 9/10 (Fold 3, Combo 4): Train Loss: 0.1872, Train Acc: 94.43%, Val Loss: 0.5289, Val Acc: 81.18%




Epoch 10/10 (Fold 3, Combo 4): Train Loss: 0.1582, Train Acc: 95.33%, Val Loss: 0.4980, Val Acc: 85.87%
Completed 4/2 hyperparameter combinations for Fold 3

=== Fold 4/5 ===


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 1/2: lr=0.001, attention_heads=8




Epoch 1/10 (Fold 4, Combo 1): Train Loss: 1.1071, Train Acc: 66.79%, Val Loss: 0.9995, Val Acc: 65.58%




Epoch 2/10 (Fold 4, Combo 1): Train Loss: 0.9097, Train Acc: 67.93%, Val Loss: 0.9677, Val Acc: 63.44%




Epoch 3/10 (Fold 4, Combo 1): Train Loss: 0.8689, Train Acc: 68.84%, Val Loss: 0.8578, Val Acc: 67.83%




Epoch 4/10 (Fold 4, Combo 1): Train Loss: 0.7997, Train Acc: 69.99%, Val Loss: 0.8149, Val Acc: 69.78%




Epoch 5/10 (Fold 4, Combo 1): Train Loss: 0.8010, Train Acc: 70.25%, Val Loss: 0.7809, Val Acc: 70.48%




Epoch 6/10 (Fold 4, Combo 1): Train Loss: 0.7368, Train Acc: 71.96%, Val Loss: 0.7534, Val Acc: 70.73%




Epoch 7/10 (Fold 4, Combo 1): Train Loss: 0.7070, Train Acc: 73.21%, Val Loss: 0.7732, Val Acc: 71.38%




Epoch 8/10 (Fold 4, Combo 1): Train Loss: 0.7064, Train Acc: 73.26%, Val Loss: 0.7283, Val Acc: 72.33%




Epoch 9/10 (Fold 4, Combo 1): Train Loss: 0.6750, Train Acc: 74.09%, Val Loss: 0.7145, Val Acc: 73.83%




Epoch 10/10 (Fold 4, Combo 1): Train Loss: 0.6438, Train Acc: 75.46%, Val Loss: 0.6932, Val Acc: 73.93%
Completed 1/2 hyperparameter combinations for Fold 4


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 2/2: lr=0.001, attention_heads=16




Epoch 1/10 (Fold 4, Combo 2): Train Loss: 1.1156, Train Acc: 64.06%, Val Loss: 1.0353, Val Acc: 65.98%




Epoch 2/10 (Fold 4, Combo 2): Train Loss: 0.9204, Train Acc: 67.69%, Val Loss: 0.8978, Val Acc: 66.38%




Epoch 3/10 (Fold 4, Combo 2): Train Loss: 0.8406, Train Acc: 69.50%, Val Loss: 0.8869, Val Acc: 68.93%




Epoch 4/10 (Fold 4, Combo 2): Train Loss: 0.8025, Train Acc: 70.34%, Val Loss: 0.9742, Val Acc: 67.98%




Epoch 5/10 (Fold 4, Combo 2): Train Loss: 0.7671, Train Acc: 71.32%, Val Loss: 0.7509, Val Acc: 73.08%




Epoch 6/10 (Fold 4, Combo 2): Train Loss: 0.7410, Train Acc: 72.34%, Val Loss: 0.7344, Val Acc: 72.28%




Epoch 7/10 (Fold 4, Combo 2): Train Loss: 0.6910, Train Acc: 73.99%, Val Loss: 0.7388, Val Acc: 72.73%




Epoch 8/10 (Fold 4, Combo 2): Train Loss: 0.6680, Train Acc: 74.53%, Val Loss: 0.9016, Val Acc: 65.43%




Epoch 9/10 (Fold 4, Combo 2): Train Loss: 0.7324, Train Acc: 72.37%, Val Loss: 0.7640, Val Acc: 72.18%




Epoch 10/10 (Fold 4, Combo 2): Train Loss: 0.6461, Train Acc: 74.99%, Val Loss: 0.7045, Val Acc: 73.93%
Completed 2/2 hyperparameter combinations for Fold 4


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 3/2: lr=0.0001, attention_heads=8




Epoch 1/10 (Fold 4, Combo 3): Train Loss: 1.0798, Train Acc: 66.04%, Val Loss: 0.9366, Val Acc: 69.53%




Epoch 2/10 (Fold 4, Combo 3): Train Loss: 0.7533, Train Acc: 74.97%, Val Loss: 0.8219, Val Acc: 71.48%




Epoch 3/10 (Fold 4, Combo 3): Train Loss: 0.6320, Train Acc: 79.41%, Val Loss: 0.6569, Val Acc: 78.92%




Epoch 4/10 (Fold 4, Combo 3): Train Loss: 0.5311, Train Acc: 82.62%, Val Loss: 0.5877, Val Acc: 80.17%




Epoch 5/10 (Fold 4, Combo 3): Train Loss: 0.4478, Train Acc: 85.27%, Val Loss: 0.5261, Val Acc: 82.32%




Epoch 6/10 (Fold 4, Combo 3): Train Loss: 0.3772, Train Acc: 87.69%, Val Loss: 0.5119, Val Acc: 83.62%




Epoch 7/10 (Fold 4, Combo 3): Train Loss: 0.3220, Train Acc: 89.90%, Val Loss: 0.5048, Val Acc: 82.42%




Epoch 8/10 (Fold 4, Combo 3): Train Loss: 0.2597, Train Acc: 91.89%, Val Loss: 0.4653, Val Acc: 83.92%




Epoch 9/10 (Fold 4, Combo 3): Train Loss: 0.2234, Train Acc: 92.95%, Val Loss: 0.4851, Val Acc: 84.17%




Epoch 10/10 (Fold 4, Combo 3): Train Loss: 0.1875, Train Acc: 94.22%, Val Loss: 0.4685, Val Acc: 84.07%
Completed 3/2 hyperparameter combinations for Fold 4


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 4/2: lr=0.0001, attention_heads=16




Epoch 1/10 (Fold 4, Combo 4): Train Loss: 1.0577, Train Acc: 66.44%, Val Loss: 0.9078, Val Acc: 70.08%




Epoch 2/10 (Fold 4, Combo 4): Train Loss: 0.7617, Train Acc: 74.57%, Val Loss: 0.7470, Val Acc: 75.52%




Epoch 3/10 (Fold 4, Combo 4): Train Loss: 0.6165, Train Acc: 79.83%, Val Loss: 0.7296, Val Acc: 75.52%




Epoch 4/10 (Fold 4, Combo 4): Train Loss: 0.5099, Train Acc: 83.75%, Val Loss: 0.5854, Val Acc: 81.37%




Epoch 5/10 (Fold 4, Combo 4): Train Loss: 0.4273, Train Acc: 86.14%, Val Loss: 0.5112, Val Acc: 82.87%




Epoch 6/10 (Fold 4, Combo 4): Train Loss: 0.3578, Train Acc: 88.37%, Val Loss: 0.5691, Val Acc: 81.07%




Epoch 7/10 (Fold 4, Combo 4): Train Loss: 0.3040, Train Acc: 90.80%, Val Loss: 0.4974, Val Acc: 82.07%




Epoch 8/10 (Fold 4, Combo 4): Train Loss: 0.2391, Train Acc: 92.69%, Val Loss: 0.4689, Val Acc: 84.27%




Epoch 9/10 (Fold 4, Combo 4): Train Loss: 0.2050, Train Acc: 93.84%, Val Loss: 0.4885, Val Acc: 83.87%




Epoch 10/10 (Fold 4, Combo 4): Train Loss: 0.1647, Train Acc: 94.91%, Val Loss: 0.4590, Val Acc: 86.16%
Completed 4/2 hyperparameter combinations for Fold 4

=== Fold 5/5 ===


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 1/2: lr=0.001, attention_heads=8




Epoch 1/10 (Fold 5, Combo 1): Train Loss: 1.1631, Train Acc: 64.87%, Val Loss: 1.0623, Val Acc: 67.50%




Epoch 2/10 (Fold 5, Combo 1): Train Loss: 0.9718, Train Acc: 66.65%, Val Loss: 0.9174, Val Acc: 68.00%




Epoch 3/10 (Fold 5, Combo 1): Train Loss: 0.8978, Train Acc: 67.81%, Val Loss: 0.8609, Val Acc: 68.10%




Epoch 4/10 (Fold 5, Combo 1): Train Loss: 0.8372, Train Acc: 69.34%, Val Loss: 0.8186, Val Acc: 69.35%




Epoch 5/10 (Fold 5, Combo 1): Train Loss: 0.8270, Train Acc: 69.52%, Val Loss: 0.7873, Val Acc: 70.20%




Epoch 6/10 (Fold 5, Combo 1): Train Loss: 0.7614, Train Acc: 71.39%, Val Loss: 0.7388, Val Acc: 72.00%




Epoch 7/10 (Fold 5, Combo 1): Train Loss: 0.7301, Train Acc: 72.81%, Val Loss: 0.7387, Val Acc: 73.30%




Epoch 8/10 (Fold 5, Combo 1): Train Loss: 0.7057, Train Acc: 73.01%, Val Loss: 0.6876, Val Acc: 74.15%




Epoch 9/10 (Fold 5, Combo 1): Train Loss: 0.6999, Train Acc: 73.13%, Val Loss: 0.6931, Val Acc: 74.40%




Epoch 10/10 (Fold 5, Combo 1): Train Loss: 0.6829, Train Acc: 73.76%, Val Loss: 0.7060, Val Acc: 73.35%
Completed 1/2 hyperparameter combinations for Fold 5


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 2/2: lr=0.001, attention_heads=16




Epoch 1/10 (Fold 5, Combo 2): Train Loss: 1.1122, Train Acc: 65.64%, Val Loss: 0.9372, Val Acc: 66.40%




Epoch 2/10 (Fold 5, Combo 2): Train Loss: 0.9128, Train Acc: 67.62%, Val Loss: 0.8601, Val Acc: 69.45%




Epoch 3/10 (Fold 5, Combo 2): Train Loss: 0.8440, Train Acc: 69.33%, Val Loss: 0.8307, Val Acc: 69.50%




Epoch 4/10 (Fold 5, Combo 2): Train Loss: 0.8013, Train Acc: 70.07%, Val Loss: 0.7791, Val Acc: 70.80%




Epoch 5/10 (Fold 5, Combo 2): Train Loss: 0.7451, Train Acc: 72.35%, Val Loss: 0.7400, Val Acc: 71.95%




Epoch 6/10 (Fold 5, Combo 2): Train Loss: 0.7171, Train Acc: 73.66%, Val Loss: 0.7487, Val Acc: 71.50%




Epoch 7/10 (Fold 5, Combo 2): Train Loss: 0.6895, Train Acc: 73.60%, Val Loss: 0.7070, Val Acc: 73.80%




Epoch 8/10 (Fold 5, Combo 2): Train Loss: 0.6998, Train Acc: 73.45%, Val Loss: 0.7075, Val Acc: 73.50%




Epoch 9/10 (Fold 5, Combo 2): Train Loss: 0.6447, Train Acc: 75.65%, Val Loss: 0.6755, Val Acc: 75.05%




Epoch 10/10 (Fold 5, Combo 2): Train Loss: 0.6161, Train Acc: 76.46%, Val Loss: 0.7164, Val Acc: 71.00%
Completed 2/2 hyperparameter combinations for Fold 5


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 3/2: lr=0.0001, attention_heads=8




Epoch 1/10 (Fold 5, Combo 3): Train Loss: 1.0615, Train Acc: 66.11%, Val Loss: 0.8351, Val Acc: 71.55%




Epoch 2/10 (Fold 5, Combo 3): Train Loss: 0.7362, Train Acc: 75.75%, Val Loss: 0.6812, Val Acc: 78.35%




Epoch 3/10 (Fold 5, Combo 3): Train Loss: 0.6003, Train Acc: 80.11%, Val Loss: 0.5812, Val Acc: 81.40%




Epoch 4/10 (Fold 5, Combo 3): Train Loss: 0.5175, Train Acc: 83.21%, Val Loss: 0.5831, Val Acc: 80.90%




Epoch 5/10 (Fold 5, Combo 3): Train Loss: 0.4644, Train Acc: 84.68%, Val Loss: 0.4826, Val Acc: 83.80%




Epoch 6/10 (Fold 5, Combo 3): Train Loss: 0.3792, Train Acc: 87.82%, Val Loss: 0.4880, Val Acc: 83.70%




Epoch 7/10 (Fold 5, Combo 3): Train Loss: 0.3419, Train Acc: 89.23%, Val Loss: 0.5034, Val Acc: 83.50%




Epoch 8/10 (Fold 5, Combo 3): Train Loss: 0.2854, Train Acc: 90.85%, Val Loss: 0.4387, Val Acc: 85.10%




Epoch 9/10 (Fold 5, Combo 3): Train Loss: 0.2423, Train Acc: 92.23%, Val Loss: 0.5033, Val Acc: 83.15%




Epoch 10/10 (Fold 5, Combo 3): Train Loss: 0.2016, Train Acc: 93.76%, Val Loss: 0.4744, Val Acc: 84.80%
Completed 3/2 hyperparameter combinations for Fold 5


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting hyperparameter combination 4/2: lr=0.0001, attention_heads=16




Epoch 1/10 (Fold 5, Combo 4): Train Loss: 1.0892, Train Acc: 66.51%, Val Loss: 0.8462, Val Acc: 72.40%




Epoch 2/10 (Fold 5, Combo 4): Train Loss: 0.7513, Train Acc: 75.30%, Val Loss: 0.7225, Val Acc: 75.90%




Epoch 3/10 (Fold 5, Combo 4): Train Loss: 0.6170, Train Acc: 79.56%, Val Loss: 0.6024, Val Acc: 80.45%




Epoch 4/10 (Fold 5, Combo 4): Train Loss: 0.5262, Train Acc: 83.14%, Val Loss: 0.5213, Val Acc: 83.15%




Epoch 5/10 (Fold 5, Combo 4): Train Loss: 0.4313, Train Acc: 86.30%, Val Loss: 0.5114, Val Acc: 82.70%




Epoch 6/10 (Fold 5, Combo 4): Train Loss: 0.3704, Train Acc: 88.40%, Val Loss: 0.4812, Val Acc: 83.85%




Epoch 7/10 (Fold 5, Combo 4): Train Loss: 0.2980, Train Acc: 90.99%, Val Loss: 0.4650, Val Acc: 84.75%




Epoch 8/10 (Fold 5, Combo 4): Train Loss: 0.2712, Train Acc: 91.50%, Val Loss: 0.4766, Val Acc: 84.70%




Epoch 9/10 (Fold 5, Combo 4): Train Loss: 0.1997, Train Acc: 94.24%, Val Loss: 0.4213, Val Acc: 86.25%


                                                                                                    

Epoch 10/10 (Fold 5, Combo 4): Train Loss: 0.1618, Train Acc: 95.11%, Val Loss: 0.4682, Val Acc: 84.25%
Completed 4/2 hyperparameter combinations for Fold 5
Best Hyperparameters: {'lr': 0.0001, 'attention_heads': 16}
Best Accuracy: 0.8618453865336658




In [3]:
# Loop through all the hyperparameter test results
for i, (params, accuracy) in enumerate(results_tuning):
    print(f"Test {i+1}: Hyperparameters: {params}, Accuracy: {accuracy:.4f}")

# Output the best hyperparameters and accuracy
best_result = max(results_tuning, key=lambda x: x[1])
print("\nBest Hyperparameters:", best_result[0])
print("Best Accuracy:", best_result[1])

Test 1: Hyperparameters: {'lr': 0.001, 'attention_heads': 8}, Accuracy: 0.7397
Test 2: Hyperparameters: {'lr': 0.001, 'attention_heads': 16}, Accuracy: 0.7651
Test 3: Hyperparameters: {'lr': 0.0001, 'attention_heads': 8}, Accuracy: 0.8244
Test 4: Hyperparameters: {'lr': 0.0001, 'attention_heads': 16}, Accuracy: 0.8544
Test 5: Hyperparameters: {'lr': 0.001, 'attention_heads': 8}, Accuracy: 0.7162
Test 6: Hyperparameters: {'lr': 0.001, 'attention_heads': 16}, Accuracy: 0.7516
Test 7: Hyperparameters: {'lr': 0.0001, 'attention_heads': 8}, Accuracy: 0.8369
Test 8: Hyperparameters: {'lr': 0.0001, 'attention_heads': 16}, Accuracy: 0.8618
Test 9: Hyperparameters: {'lr': 0.001, 'attention_heads': 8}, Accuracy: 0.7294
Test 10: Hyperparameters: {'lr': 0.001, 'attention_heads': 16}, Accuracy: 0.7369
Test 11: Hyperparameters: {'lr': 0.0001, 'attention_heads': 8}, Accuracy: 0.8263
Test 12: Hyperparameters: {'lr': 0.0001, 'attention_heads': 16}, Accuracy: 0.8587
Test 13: Hyperparameters: {'lr': 0.00

In [None]:
# Constants for saving models
RESULT_SAVE_DIR = 'drive/MyDrive/ViT_Result'  # Directory to save results
MODEL_SAVE_DIR = 'drive/MyDrive/ViT_Model'  # Directory to save models

best_params = {'lr': 0.0001, 'attention_heads': 16}

# Configurable hyperparameters
EPOCHS = 100   # Set the number of epochs here
BATCH_SIZE = 512  # Set the batch size here
PATIENCE = 10 # Early stopping patience (number of epochs with no improvement)

# Create a training function for each fold
def train_model(X_train, y_train, X_test, y_test, device, params, fold):
    model = ViTForImageClassification.from_pretrained(
        MODEL_NAME,
        num_labels=len(set(y_train)),
        num_attention_heads=params['attention_heads']
    )
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=params['lr'])
    criterion = nn.CrossEntropyLoss()

    train_dataset = SkinCancerDataset(X_train, IMAGE_DIRS, feature_extractor, transform=train_transform)
    test_dataset = SkinCancerDataset(X_test, IMAGE_DIRS, feature_extractor, transform=val_test_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    best_val_acc = 0
    patience_counter = 0

    # To store the epoch-wise results
    epoch_results = {
        'epoch': [],
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    # Training loop
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = correct / total

        # Validation step
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images).logits
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = correct / total

        # Append the results for the current epoch
        epoch_results['epoch'].append(epoch + 1)
        epoch_results['train_loss'].append(train_loss / total)
        epoch_results['train_acc'].append(train_acc * 100)
        epoch_results['val_loss'].append(val_loss / total)
        epoch_results['val_acc'].append(val_acc * 100)

        # Print epoch progress
        print(f"Fold {fold+1}, Epoch {epoch+1}/{EPOCHS} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.2f}%")

        # Early stopping logic
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= PATIENCE:
            print("Early stopping triggered.")
            break

    # Save epoch-wise results to CSV file
    epoch_df = pd.DataFrame(epoch_results)
    accuracy_file_name = f"{RESULT_SAVE_DIR}/fold{fold+1}_accuracy.csv"
    epoch_df.to_csv(accuracy_file_name, index=False)
    print(f"Epoch accuracy and loss saved to {accuracy_file_name}")

    # After training, evaluate on the test fold and save predictions
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images).logits
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
            actuals.extend(labels.cpu().numpy())

    # Save results to a CSV file
    result_df = pd.DataFrame({
        'Prediction': predictions,
        'Actual': actuals
    })
    file_name = f"{RESULT_SAVE_DIR}/fold{fold+1}.csv"
    result_df.to_csv(file_name, index=False)
    print(f"Fold {fold+1} predictions saved to {file_name}")

    return model, best_val_acc

# Main 20-fold Cross-Validation with stratifiedKFold
X = metadata
y = metadata['dx_encoded']

# Initialize stratified KFold with 20 splits
skf = stratifiedKFold(n_split=20, shuffle=True, random_state=42)

best_model_state_dict = None  # To store the best model's state_dict
best_model_acc = 0  # To track the best validation accuracy across folds

# Iterate through stratified KFold splits
for fold_idx, (train_idx, test_idx) in enumerate(skf.split(X, y)):
    print(f"\n=== Starting Fold {fold_idx+1}/20 ===")
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # Train the model and test
    model, val_acc = train_model(X_train, y_train, X_test, y_test, device, best_params, fold_idx)

    # Check if this fold's model has the best validation accuracy so far
    if val_acc > best_model_acc:
        best_model_acc = val_acc
        best_model_state_dict = model.state_dict()  # Save the best model state
        # Save the best model's state_dict to a file
        best_model_path = os.path.join(MODEL_SAVE_DIR, 'best_model.pth')
        torch.save(best_model_state_dict, best_model_path)
        print(f"New best model saved from Fold {fold_idx+1} with validation accuracy: {val_acc:.4f}")



=== Starting Fold 1/20 ===


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1, Epoch 1/100 - Train Loss: 11259.0754, Train Acc: 63.81%, Val Loss: 487.1171, Val Acc: 67.66%
Fold 1, Epoch 2/100 - Train Loss: 8187.9549, Train Acc: 71.05%, Val Loss: 415.6670, Val Acc: 74.01%
Fold 1, Epoch 3/100 - Train Loss: 6885.5593, Train Acc: 76.82%, Val Loss: 344.3634, Val Acc: 79.37%
Fold 1, Epoch 4/100 - Train Loss: 5721.7124, Train Acc: 81.04%, Val Loss: 321.1661, Val Acc: 78.17%
Fold 1, Epoch 5/100 - Train Loss: 4969.4738, Train Acc: 83.61%, Val Loss: 278.1323, Val Acc: 81.15%
Fold 1, Epoch 6/100 - Train Loss: 4538.0464, Train Acc: 84.78%, Val Loss: 261.3093, Val Acc: 83.13%
Fold 1, Epoch 7/100 - Train Loss: 3734.1205, Train Acc: 88.21%, Val Loss: 265.8607, Val Acc: 81.94%
Fold 1, Epoch 8/100 - Train Loss: 3252.6198, Train Acc: 89.69%, Val Loss: 273.0413, Val Acc: 81.15%
Fold 1, Epoch 9/100 - Train Loss: 2852.2734, Train Acc: 91.05%, Val Loss: 236.3373, Val Acc: 83.53%
Fold 1, Epoch 10/100 - Train Loss: 2336.1528, Train Acc: 92.99%, Val Loss: 272.0247, Val Acc: 83.13

In [None]:
# Precision-recall curves