In [37]:
import os
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split, Dataset
import torch.optim as optim
from tqdm import tqdm
import torchvision
from torchvision import datasets, transforms
import torchvision.models as models
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
from torch.optim.lr_scheduler import ReduceLROnPlateau
from efficientnet_pytorch import EfficientNet
%matplotlib inline

# Data Augmentation for the training dataset
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for the validation dataset
transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load and split the dataset
combined_data_path = './Dataset/train'
full_dataset = datasets.ImageFolder(root=combined_data_path, transform=transform_train)
num_train = len(full_dataset)
num_val = int(0.2 * num_train)
train_subset, val_subset = random_split(full_dataset, [num_train - num_val, num_val])
val_subset.dataset.transform = transform_val  # Apply validation transformation

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

# Model, Loss, and Optimizer
# vit_l32 = models.vit_l_32(pretrained=True)
# vit_h14 = models.vit_h_14(pretrained=True)
# vit_l16 = models.vit_l_16(pretrained=True)
vit_l16 = torch.hub.load("facebookresearch/swag", model="vit_l16")

class ModifiedViT(nn.Module):
    def __init__(self, pretrained_model):
        super(ModifiedViT, self).__init__()
        self.pretrained = pretrained_model
        self.pretrained.head = nn.Identity()  # Remove the existing linear layer

        # Freeze all pretrained layers
        for param in self.pretrained.parameters():
            param.requires_grad = False

        # Assuming the attribute is named 'hidden_size'
        # num_feature = self.pretrained.  # Replace with the correct attribute
        self.new_head = nn.Sequential(
            nn.Linear(1024, 100),  # Adjust the input size to match the output size of the ViT model
        )

    def forward(self, x):
        x = self.pretrained(x)
        x = self.new_head(x)
        return x


# Create the modified model
model = ModifiedViT(vit_l16)

# Move to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.new_head.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

# Training with dynamic data augmentation
num_epochs = 30
train_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_losses.append(total_loss / len(train_loader))
    train_accuracies.append(100 * correct / total)

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    val_accuracies.append(100 * correct / total)
    scheduler.step(total_loss / len(train_loader))

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_losses[-1]:.4f}, Training Accuracy: {train_accuracies[-1]:.2f}%, Validation Accuracy: {val_accuracies[-1]:.2f}%')



Using cache found in C:\Users\Michael/.cache\torch\hub\facebookresearch_swag_main


In [None]:
# Plotting the results
plt.figure(figsize=(12, 6))

# Plot for training loss
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot for training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.show()

In [None]:
# Custom dataset loader to handle unlabeled data.
class UnlabeledDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_list = os.listdir(root_dir)

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_list[idx])
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, self.image_list[idx]

test_dataset = UnlabeledDataset(root_dir='./dataset/test', transform=transform_val)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# Model's evaluation mode
model.eval()

test_predictions = []

# Assuming the model is already in evaluation mode and device is defined
with torch.no_grad():
    for images, paths in tqdm(test_loader, desc='Predicting labels'):
        images = images.to(device)

        # Get predictions
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predicted_labels = [full_dataset.classes[p] for p in predicted.cpu().numpy()]

        # Go through the batch and add to our prediction list, including image paths
        for path, label in zip(paths, predicted_labels):
            test_predictions.append((Path(path).name, label))  # Appending a tuple of filename and label

with open('./CSV Files/submission_efficientNet.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['ID', 'Label'])
    writer.writerows(test_predictions)  # Writing all predictions at once