In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/sample_submission.csv
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0612.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0946.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0755.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0178.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_1593.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_1975.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0777.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0998.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0073.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_1769.jpg
/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test/Image_0123.jpg
/kaggle/input/

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
from PIL import Image
import os
from tqdm import tqdm
from sklearn.metrics import f1_score
from transformers import Swinv2ForImageClassification, AutoImageProcessor
import pandas as pd

# --------------------- 1️⃣ Data Transformations ---------------------
transform = transforms.Compose([
    transforms.Resize((192, 192)),  # SwinV2 large uses 192x192
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --------------------- 2️⃣ Load Train Dataset ---------------------
train_dataset = datasets.ImageFolder(root="/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)  # Reduce batch size if OOM

# --------------------- 3️⃣ Load Test Dataset ---------------------
class TestDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.image_paths = [os.path.join(root, img) for img in os.listdir(root)]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")  # Ensure RGB format
        if self.transform:
            img = self.transform(img)
        filename = os.path.basename(img_path)  # Extract only the filename
        return img, filename

test_dataset = TestDataset(root="/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)  # Reduce batch size if needed

# --------------------- 4️⃣ Load Pretrained SwinV2 Large ---------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the SwinV2 model and processor
model_name = "microsoft/swinv2-large-patch4-window12-192-22k"
image_processor = AutoImageProcessor.from_pretrained(model_name)
model = Swinv2ForImageClassification.from_pretrained(model_name)

# Get number of classes
num_classes = len(train_dataset.classes)

# Freeze all parameters of the model
for param in model.parameters():
    param.requires_grad = False

# Replace the classifier (head) with a new one
model.classifier = nn.Sequential(
    nn.Linear(model.config.hidden_size, 128),
    nn.BatchNorm1d(128),
    nn.GELU(),
    nn.Dropout(0.25),
    nn.Linear(128, num_classes)  # Final classification layer
)

# Unfreeze the parameters in the classifier
for param in model.classifier.parameters():
    param.requires_grad = True

model.to(device)

# --------------------- 5️⃣ Loss, Optimizer & Scheduler ---------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.classifier.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
scaler = GradScaler()  # Mixed Precision Training

# --------------------- 6️⃣ Training Function ---------------------
def train(model, train_loader, optimizer, criterion, scaler, scheduler, num_epochs=8):
    model.train()
    for epoch in range(num_epochs):
        total_loss, correct, total = 0, 0, 0
        all_preds, all_targets = [], []

        for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()

            with autocast():  # Mixed Precision Training
                outputs = model(inputs).logits  # Note: access .logits from the output
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

        f1 = f1_score(all_targets, all_preds, average='macro')  # Compute Macro F1-score
        acc = 100. * correct / total
        scheduler.step()
        
        print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}, Accuracy = {acc:.2f}%, F1-score = {f1:.4f}")

# --------------------- 7️⃣ Test Function (Predicts Labels for Unlabeled Test Data) ---------------------
def predict(model, test_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, filenames in tqdm(test_loader, desc="Testing"):
            inputs = inputs.to(device)
            outputs = model(inputs).logits  # Note: access .logits from the output
            _, predicted = outputs.max(1)  # Get class index
            
            # Store results as (filename, predicted_label)
            for i in range(len(filenames)):
                predictions.append((filenames[i], predicted[i].item()))
    
    return predictions

# --------------------- 8️⃣ Run Training & Make Predictions ---------------------
train(model, train_loader, optimizer, criterion, scaler, scheduler, num_epochs=8)

# Get Predictions on Test Data
test_predictions = predict(model, test_loader)

# Print First 5 Predictions
for filename, label in test_predictions[:5]:
    print(f"{filename} -> Predicted Class: {label}")

# --------------------- 9️⃣ Save Predictions to CSV ---------------------
def predict_and_save_csv(model, test_loader, output_file="submission.csv"):
    model.eval()
    predictions = []

    with torch.no_grad():
        for inputs, filenames in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs).logits  # Note: access .logits
            _, predicted = outputs.max(1)  # Get predicted class index

            # Remove file extensions from filenames
            for i in range(len(filenames)):
                image_id = os.path.splitext(filenames[i])[0]  # Removes ".jpeg", ".png", etc.
                predictions.append([image_id, predicted[i].item()])

    # Convert to DataFrame
    df = pd.DataFrame(predictions, columns=["Image_ID", "Label"])
    
    # Save to CSV (without index column)
    df.to_csv(output_file, index=False)
    print(f"✅ Submission file saved: {output_file}")

# 🔹 Generate CSV from predictions
output_file = "/kaggle/working/submission.csv"
predict_and_save_csv(model, test_loader, output_file=output_file)


preprocessor_config.json:   0%|          | 0.00/240 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/915M [00:00<?, ?B/s]

  scaler = GradScaler()  # Mixed Precision Training
  with autocast():  # Mixed Precision Training
Epoch 1/8:   4%|▍         | 27/625 [00:05<01:31,  6.54it/s]Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))
Epoch 1/8:   5%|▍         | 30/625 [00:06<01:29,  6.63it/s]

model.safetensors:   0%|          | 0.00/915M [00:00<?, ?B/s]

Epoch 1/8: 100%|██████████| 625/625 [01:39<00:00,  6.29it/s]


Epoch 1: Loss = 594.6557, Accuracy = 81.46%, F1-score = 0.8146


  with autocast():  # Mixed Precision Training
Epoch 2/8: 100%|██████████| 625/625 [01:46<00:00,  5.85it/s]


Epoch 2: Loss = 256.8573, Accuracy = 91.55%, F1-score = 0.9155


  with autocast():  # Mixed Precision Training
Epoch 3/8: 100%|██████████| 625/625 [01:46<00:00,  5.85it/s]


Epoch 3: Loss = 178.0503, Accuracy = 93.48%, F1-score = 0.9348


  with autocast():  # Mixed Precision Training
Epoch 4/8: 100%|██████████| 625/625 [01:47<00:00,  5.83it/s]


Epoch 4: Loss = 144.3943, Accuracy = 94.61%, F1-score = 0.9461


  with autocast():  # Mixed Precision Training
Epoch 5/8: 100%|██████████| 625/625 [01:47<00:00,  5.82it/s]


Epoch 5: Loss = 116.0758, Accuracy = 95.45%, F1-score = 0.9545


  with autocast():  # Mixed Precision Training
Epoch 6/8: 100%|██████████| 625/625 [01:46<00:00,  5.85it/s]


Epoch 6: Loss = 103.8757, Accuracy = 95.86%, F1-score = 0.9586


  with autocast():  # Mixed Precision Training
Epoch 7/8: 100%|██████████| 625/625 [01:47<00:00,  5.83it/s]


Epoch 7: Loss = 90.7227, Accuracy = 96.36%, F1-score = 0.9636


  with autocast():  # Mixed Precision Training
Epoch 8/8: 100%|██████████| 625/625 [01:47<00:00,  5.83it/s]


Epoch 8: Loss = 83.5095, Accuracy = 96.87%, F1-score = 0.9687


Testing: 100%|██████████| 125/125 [00:48<00:00,  2.58it/s]

Image_0612.jpg -> Predicted Class: 5
Image_0946.jpg -> Predicted Class: 4
Image_0755.jpg -> Predicted Class: 8
Image_0178.jpg -> Predicted Class: 2
Image_1593.jpg -> Predicted Class: 5





✅ Submission file saved: /kaggle/working/submission.csv
