In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

In [None]:
from torchvision.transforms import InterpolationMode

# Train Transform (with vertical flip and increased rotation)
train_transform = transforms.Compose([
    transforms.Resize([224], interpolation=InterpolationMode.BICUBIC),  # Resize to 384x384
    transforms.RandomHorizontalFlip(p=0.5),  # Random horizontal flip with 50% probability
    transforms.RandomVerticalFlip(p=0.5),  # Random vertical flip with 50% probability
    transforms.RandomRotation(degrees=30),  # Random rotation within ±30 degrees
    transforms.RandomCrop([224], padding=4),  # Random crop with padding
    transforms.ToTensor(),  # Convert to tensor and scale to [0.0, 1.0]
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Validation (Inference) Transform (unchanged)
val_transform = transforms.Compose([
    transforms.Resize([224], interpolation=InterpolationMode.BICUBIC),  # Resize to 384x384
    transforms.CenterCrop([224]),  # Center crop to 384x384
    transforms.ToTensor(),  # Convert to tensor and scale to [0.0, 1.0]
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

In [None]:
root_dir = "/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/"
train_dataset = datasets.ImageFolder(root_dir+'train',transform=train_transform)
# full_dataset = datasets.ImageFolder(root_dir+'train',transform=train_transform)
# train_size  = int(0.9*len(full_dataset))
# val_size = len(full_dataset) - train_size
# train_dataset,val_dataset = random_split(
#     full_dataset,[train_size,val_size]
# )
# train_dataset.dataset = datasets.ImageFolder(
#     root_dir + 'train',transform=train_transform
# )
# val_dataset.dataset = datasets.ImageFolder(
#     root_dir + 'train', transform=val_transform
# )

In [None]:
# print(
#     {        
#     "train_size": len(train_dataset),
#     "val_size": len(val_dataset),
#     # "test_size": len(test_dataset)
#     }
# )

In [None]:
import os
num_cpus = os.cpu_count()
num_cpus

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=num_cpus)
# val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=num_cpus)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

model = models.vit_h_14(
    weights = models.ViT_H_14_Weights.IMAGENET1K_SWAG_LINEAR_V1
)

In [None]:
for param in model.parameters():
    param.requires_grad = False
model.heads = nn.Sequential(
    nn.Linear(
        in_features=1280,
        out_features = 128,
        bias=True
    ),
    nn.BatchNorm1d(128),
    nn.GELU(),
    nn.Dropout(0.25),
    nn.Linear(
        in_features=128,
        out_features=10,
        bias=True
    )
)
for param in model.heads.parameters():
    param.requires_grad = True


In [None]:
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)  # Wrap the model for multi-GPU support

# Move model to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = model.to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)

In [None]:
import torch
from sklearn.metrics import f1_score
from tqdm import tqdm

def evaluate_model(model, dataloader, device):
    """
    Evaluate a PyTorch model on a DataLoader and calculate accuracy and F1 score.

    Args:
        model (torch.nn.Module): Trained PyTorch model.
        dataloader (torch.utils.data.DataLoader): DataLoader for evaluation.
        device (torch.device): Device to run the evaluation on ('cuda' or 'cpu').

    Returns:
        accuracy (float): Accuracy of the model on the dataset.
        f1 (float): F1 score of the model on the dataset (macro-averaged).
    """
    model.eval()  # Set the model to evaluation mode
    all_preds = []
    all_labels = []

    with torch.no_grad():  # Disable gradient computation for efficiency
        for inputs, labels in tqdm(dataloader, desc="Validating Model", total=len(dataloader)):
            # Move inputs and labels to the specified device
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Get predicted class indices
            preds = torch.argmax(outputs, dim=1)
            
            # Accumulate predictions and labels
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    # Concatenate all predictions and labels into single tensors
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    # Calculate accuracy
    correct = (all_preds == all_labels).sum().item()
    total = all_labels.size(0)
    accuracy = correct / total

    # Calculate F1 score (macro-averaged)
    f1 = f1_score(all_labels.numpy(), all_preds.numpy(), average="macro")

    return accuracy, f1

In [None]:
from tqdm import tqdm
import torch
from torch.cuda.amp import GradScaler, autocast
import os

num_epochs = 5

# Initialize GradScaler for mixed precision training
scaler = GradScaler()

# Empty unused variables and clear GPU memory
torch.cuda.empty_cache()

# Directory to save the latest checkpoint
checkpoint_dir = "./checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)  # Create the directory if it doesn't exist

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    train_loss = 0
    with tqdm(train_dataloader, desc=f"Epoch [{epoch+1}/{num_epochs}]", unit="batch") as pbar:
        for images, labels in pbar:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass with mixed precision
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            # Backward pass with scaled gradients
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item()
            pbar.set_postfix({"loss": f"{train_loss/len(train_dataloader):.4f}"})
    
    # Evaluate the model after each epoch
    # accuracy, f1 = evaluate_model(model, val_dataloader, device)
    # print(f"Epoch {epoch+1} - accuracy: {accuracy}, f1_score: {f1}")

    # Save the latest checkpoint
    # checkpoint_path = os.path.join(checkpoint_dir, "latest_checkpoint.pth")
    # torch.save({
    #     'epoch': epoch + 1,
    #     'model_state_dict': model.state_dict(),
    #     'optimizer_state_dict': optimizer.state_dict(),
    #     'scaler_state_dict': scaler.state_dict(),
    #     'loss': train_loss / len(train_dataloader),
    #     'accuracy': accuracy,
    #     'f1_score': f1,
    # }, checkpoint_path)
    # print(f"Checkpoint saved at {checkpoint_path}")

In [None]:
from torchvision.io import read_image
import pandas as pd
from PIL import Image
from tqdm import tqdm
import os
import torch

def classify_images_to_csv(image_folder, model, transform, output_csv):
    """
    Classifies images in a folder using a PyTorch model and saves predictions to a CSV file,
    with progress tracking using tqdm.

    Args:
        image_folder (str): Path to the folder containing images.
        model (torch.nn.Module): Trained PyTorch model for classification.
        transform (torchvision.transforms.Compose): Transformations for preprocessing images.
        output_csv (str): Path for saving the output CSV file.

    Returns:
        None
    """
    # Ensure the model is in evaluation mode
    model.eval()

    # Prepare a list to store results
    results = []

    # List all image files
    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    total = len(image_files)

    # Process each image in the folder with tqdm progress bar
    for image_name in tqdm(image_files, desc="Processing Images", total=total):
        # Read and preprocess the image
        image_path = os.path.join(image_folder, image_name)
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0)  # Add batch dimension

        # Perform inference
        with torch.no_grad():
            outputs = model(image)  # Get raw outputs
            probabilities = torch.softmax(outputs, dim=1)  # Apply softmax
            label = torch.argmax(probabilities, dim=1).item()  # Get the predicted label

        # Store the result
        results.append({
            "Image_ID": image_name.split('.')[0],
            "Label": label
        })

    # Save results to a CSV file
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

In [None]:
classify_images_to_csv('/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test',model,val_transform,"/kaggle/working/submission.csv")