# Vision Transformers

In [None]:
import os
import torch
import pandas as pd
import numpy as np
import torchvision.transforms as transforms
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image


class SkinToneDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = transform
        self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image_path = row["image_path"]  # Assuming CSV has "image_path" column
        label = row["label"]
        fitzpatrick_scale = row["fitzpatrick_scale"]
        fitzpatrick_centaur = row["fitzpatrick_centaur"]

        image = Image.open(image_path).convert("RGB")  # Ensure RGB format

        # Apply Transformations
        if self.transform:
            image = self.transform(image)

        # Extract Features for ViT (resize and normalize)
        image = self.feature_extractor(image, return_tensors="pt")["pixel_values"].squeeze(0)

        return image, torch.tensor(label, dtype=torch.long)  # Convert label to tensor


In [None]:
# File Paths
csv_path = "data/augmented_dataset_plus_unused1.csv"

# Load dataset WITHOUT transformations
dataset = SkinToneDataset(csv_path)

# Split into Training and Validation Sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])


# Define DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)




In [None]:
# Load pre-trained Vision Transformer
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=len(dataset.data["label"].unique()),
    ignore_mismatched_sizes=True
)


# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss

# Loss function
criterion = CrossEntropyLoss()

# Optimizer (AdamW is used for Transformer-based models)
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)


In [None]:
# Create a mapping from text labels to numbers
label_mapping = {label: idx for idx, label in enumerate(sorted(dataset.data["label"].unique()))}
dataset.data["label"] = dataset.data["label"].map(label_mapping)

# Verify conversion
print("Label mapping:", label_mapping)
print("Updated labels:", dataset.data["label"].unique())


Label mapping: {'acne': 0, 'acne-vulgaris': 1, 'actinic-keratosis': 2, 'basal-cell-carcinoma': 3, 'basal-cell-carcinoma-morpheiform': 4, 'dermatofibroma': 5, 'dermatomyositis': 6, 'dyshidrotic-eczema': 7, 'eczema': 8, 'epidermal-nevus': 9, 'folliculitis': 10, 'kaposi-sarcoma': 11, 'keloid': 12, 'malignant-melanoma': 13, 'melanoma': 14, 'mycosis-fungoides': 15, 'prurigo-nodularis': 16, 'pyogenic-granuloma': 17, 'seborrheic-keratosis': 18, 'squamous-cell-carcinoma': 19, 'superficial-spreading-melanoma-ssm': 20}
Updated labels: [ 4 16 14 17  9  0 19 12  6 18  3 20 15 13  1  8  2 10 11  5  7]


In [None]:
from sklearn.metrics import f1_score
import pandas as pd

def train_model(model, train_loader, val_loader, epochs=5):
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward Pass
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            # Backward Pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Track Accuracy
            total_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {train_acc:.4f}")

        # Validate after each epoch
        validate_model(model, val_loader)

def validate_model(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    image_paths = []

    with torch.no_grad():
        for batch in val_loader:
            if len(batch) == 3:
                images, labels, paths = batch  # Ensure val_loader returns paths too
            else:
                images, labels = batch
                paths = ["unknown_path"] * len(labels)  # Placeholder paths if not provided

            images, labels = images.to(device), labels.to(device)
            outputs = model(images).logits
            preds = outputs.argmax(dim=1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())  # Store predictions
            all_labels.extend(labels.cpu().numpy())  # Store true labels
            image_paths.extend(paths)  # Store image paths

    val_acc = correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted')  # Calculate weighted F1-score

    print(f"Validation Accuracy: {val_acc:.4f}, F1 Score: {f1:.4f}")

    # Create DataFrame for validation results
    val_df = pd.DataFrame({
        "image_path": image_paths,
        "true_label": all_labels,
        "predicted_label": all_preds
    })

    # Save to CSV or print the first few rows for review
    val_df.to_csv("validation_results.csv", index=False)
    print(val_df.head())  # Show a preview of the results

    return val_df  # Return DataFrame for further analysis if needed

# Train for 5 epochs
train_model(model, train_loader, val_loader, epochs=5)



Epoch [1/5], Loss: 418.1836, Accuracy: 0.4652
Validation Accuracy: 0.6546, F1 Score: 0.6315
     image_path  true_label  predicted_label
0  unknown_path          17               17
1  unknown_path          17               19
2  unknown_path           6               10
3  unknown_path           1                1
4  unknown_path          10               10
Epoch [2/5], Loss: 184.2396, Accuracy: 0.7901
Validation Accuracy: 0.7447, F1 Score: 0.7406
     image_path  true_label  predicted_label
0  unknown_path          17               17
1  unknown_path          17               19
2  unknown_path           6               15
3  unknown_path           1                1
4  unknown_path          10               10
Epoch [3/5], Loss: 83.6419, Accuracy: 0.9318
Validation Accuracy: 0.7799, F1 Score: 0.7781
     image_path  true_label  predicted_label
0  unknown_path          17               17
1  unknown_path          17               19
2  unknown_path           6                6
3  un

# Below is the Orignal Code Run for best performing model; **Kaggle F1: 0.68839 (89% Sure)
## To Compare Performance for other models
### **QUESTION:** Why does Performance drastically change for different runs with the same architecture???
  - ------------------------------------> **!!!!!!!!!!!!!!!!!!!!!!!---INVESITAGE MORE---!!!!!!!!!!!!!!!!!!!!!!!**

In [None]:
# def train_model(model, train_loader, val_loader, epochs=5):
#     model.train()

#     for epoch in range(epochs):
#         total_loss = 0
#         correct = 0
#         total = 0

#         for images, labels in train_loader:
#             images, labels = images.to(device), labels.to(device)

#             # Forward Pass
#             outputs = model(images).logits
#             loss = criterion(outputs, labels)

#             # Backward Pass
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             # Track Accuracy
#             total_loss += loss.item()
#             preds = outputs.argmax(dim=1)
#             correct += (preds == labels).sum().item()
#             total += labels.size(0)

#         train_acc = correct / total
#         print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {train_acc:.4f}")

#         # Validate after each epoch
#         validate_model(model, val_loader)

# def validate_model(model, val_loader):
#     model.eval()
#     correct = 0
#     total = 0

#     with torch.no_grad():
#         for images, labels in val_loader:
#             images, labels = images.to(device), labels.to(device)
#             outputs = model(images).logits
#             preds = outputs.argmax(dim=1)
#             correct += (preds == labels).sum().item()
#             total += labels.size(0)

#     val_acc = correct / total
#     print(f"Validation Accuracy: {val_acc:.4f}")

# # Train for 5 epochs
# train_model(model, train_loader, val_loader, epochs=5)


Epoch [1/5], Loss: 282.8152, Accuracy: 0.5410
Validation Accuracy: 0.7544
Epoch [2/5], Loss: 108.3549, Accuracy: 0.8574
Validation Accuracy: 0.8223
Epoch [3/5], Loss: 47.2240, Accuracy: 0.9560
Validation Accuracy: 0.8401
Epoch [4/5], Loss: 20.4189, Accuracy: 0.9939
Validation Accuracy: 0.8643
Epoch [5/5], Loss: 10.1488, Accuracy: 0.9996
Validation Accuracy: 0.8530


In [None]:
# Filter validation set to include only Fitzpatrick type 6
val_subset = val_dataset[val_dataset["fitzpatrick"] == 6]

# Count correct predictions
correct = (val_subset["pred_label"] == val_subset["true_label"]).sum()

# Count total predictions for Fitzpatrick 6
total = len(val_subset)

# Compute accuracy
fitzpatrick_6_accuracy = correct / total if total > 0 else 0

print(f"Accuracy for Fitzpatrick Type 6: {fitzpatrick_6_accuracy:.4f} ({correct}/{total} correct)")

In [None]:
test_df_filename = "data/combined_test.csv"

In [None]:
def generate_predictions(model, test_loader, test_df):
    model.eval()
    predictions = []

    with torch.no_grad():
        for images, image_paths in test_loader:  # Use image_paths instead of image_id
            images = images.to(device)

            # Get model predictions
            outputs = model(images).logits
            preds = outputs.argmax(dim=1)  # Get highest probability class

            # Store predictions
            for image_path, pred in zip(image_paths, preds.cpu().numpy()):
                predictions.append({"image_path": image_path, "label_num": pred})  # Store label number

    # Convert predictions to DataFrame
    pred_df = pd.DataFrame(predictions)

    # Merge predictions into the original test DataFrame using "image_path"
    test_df = test_df.merge(pred_df, on="image_path", how="left")

    return test_df

class TestDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = transform
        self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image_path = row["image_path"]  # Use image_path
        if not image_path.endswith(".jpg"):
          image_path += ".jpg"
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        image = self.feature_extractor(image, return_tensors="pt")["pixel_values"].squeeze(0)

        return image, image_path



In [None]:
test_dataset = TestDataset(test_df_filename)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)



In [None]:
test_df = pd.read_csv(test_df_filename)
test_df["image_path"] = test_df["image_path"].astype(str) + ".jpg"

pd.set_option('display.max_colwidth', None)
test_df.head()



Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,qc,ddi_scale,image_path
0,0844ae634f0e6e7ef1f73c2aeecbae0e,2,2,,12,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/0844ae634f0e6e7ef1f73c2aeecbae0e.jpg
1,3b290d262098f761d719aa07cf36c040,4,3,,34,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/3b290d262098f761d719aa07cf36c040.jpg
2,cf561d08ac46d0fda678bff6621005ee,2,3,,12,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/cf561d08ac46d0fda678bff6621005ee.jpg
3,e6371069be05c6b0a95b4b3f1bacc9a5,4,3,,34,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/e6371069be05c6b0a95b4b3f1bacc9a5.jpg
4,f76cddb37265f97508f159078dcc7e7c,5,5,,56,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/f76cddb37265f97508f159078dcc7e7c.jpg


In [None]:
test_df = generate_predictions(model, test_loader, test_df)

In [None]:
test_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,qc,ddi_scale,image_path,label_num
0,0844ae634f0e6e7ef1f73c2aeecbae0e,2,2,,12,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/0844ae634f0e6e7ef1f73c2aeecbae0e.jpg,8
1,3b290d262098f761d719aa07cf36c040,4,3,,34,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/3b290d262098f761d719aa07cf36c040.jpg,3
2,cf561d08ac46d0fda678bff6621005ee,2,3,,12,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/cf561d08ac46d0fda678bff6621005ee.jpg,19
3,e6371069be05c6b0a95b4b3f1bacc9a5,4,3,,34,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/e6371069be05c6b0a95b4b3f1bacc9a5.jpg,1
4,f76cddb37265f97508f159078dcc7e7c,5,5,,56,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/f76cddb37265f97508f159078dcc7e7c.jpg,10


In [None]:
reverse_label_mapping = {v: k for k, v in label_mapping.items()}
test_df["label"] = test_df["label_num"].map(reverse_label_mapping)
test_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,qc,ddi_scale,image_path,label_num,label
0,0844ae634f0e6e7ef1f73c2aeecbae0e,2,2,,12,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/0844ae634f0e6e7ef1f73c2aeecbae0e.jpg,8,eczema
1,3b290d262098f761d719aa07cf36c040,4,3,,34,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/3b290d262098f761d719aa07cf36c040.jpg,3,basal-cell-carcinoma
2,cf561d08ac46d0fda678bff6621005ee,2,3,,12,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/cf561d08ac46d0fda678bff6621005ee.jpg,19,squamous-cell-carcinoma
3,e6371069be05c6b0a95b4b3f1bacc9a5,4,3,,34,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/e6371069be05c6b0a95b4b3f1bacc9a5.jpg,1,acne-vulgaris
4,f76cddb37265f97508f159078dcc7e7c,5,5,,56,/content/drive/My Drive/UCLA_AJL_Team16/bttai-ajl-2025/test/test/f76cddb37265f97508f159078dcc7e7c.jpg,10,folliculitis


In [None]:
submission = test_df.drop(columns=['fitzpatrick_scale','fitzpatrick_centaur','qc','ddi_scale','image_path','label_num'], inplace=False)
print(submission.shape) #Should ALWAYS be (1227,2)
submission.head()

(1227, 2)


Unnamed: 0,md5hash,label
0,0844ae634f0e6e7ef1f73c2aeecbae0e,eczema
1,3b290d262098f761d719aa07cf36c040,basal-cell-carcinoma
2,cf561d08ac46d0fda678bff6621005ee,squamous-cell-carcinoma
3,e6371069be05c6b0a95b4b3f1bacc9a5,acne-vulgaris
4,f76cddb37265f97508f159078dcc7e7c,folliculitis


In [None]:
submission.to_csv("test_predictions2.csv", index=False)
#F1 score of 0.68839 (1st without the mismatched data was 0.50750)