In [1]:
# Installation and imports
!pip install timm wandb
!nvidia-smi # Check GPU

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import timm
from tqdm.notebook import tqdm
import random
from pathlib import Path
from google.colab import drive
import warnings
warnings.filterwarnings('ignore')

Wed Dec  4 20:35:13 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
from google.colab import drive
import zipfile

drive.mount('/content/drive')

# Unzip Dataset
zip_path = '/content/drive/My Drive/DL_FinalProject.zip'
extract_path = '/content/dataset'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)



Mounted at /content/drive


In [3]:
!rm -rf '/content/dataset/__MACOSX'
!ls '/content/dataset/DL_FinalProject'


categories.csv	sample_submission.csv  test  train  train_labeled.csv


In [4]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

# Set device and random seed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

Wed Dec  4 20:36:12 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [5]:
from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch

class PlantDogDataset(Dataset):
    def __init__(self, img_dir, csv_file=None, transform=None, is_labeled=True):
        """
        Args:
            img_dir (str): Directory with all the images.
            csv_file (str): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied on a sample.
            is_labeled (bool): Whether this is a labeled dataset.
        """
        self.img_dir = img_dir
        self.transform = transform
        self.is_labeled = is_labeled

        if is_labeled and csv_file is not None:
            self.data = pd.read_csv(csv_file)
            if self.data.iloc[0, 1].dtype == 'O':  # If labels are strings
                unique_labels = self.data.iloc[:, 1].unique()
                self.label_map = {label: idx for idx, label in enumerate(sorted(unique_labels))}
                self.data.iloc[:, 1] = self.data.iloc[:, 1].map(self.label_map)
        else:
            # For unlabeled data, just get list of image files
            self.data = pd.DataFrame({
                'filename': [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
            })

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            if self.is_labeled:
                img_name = os.path.join(self.img_dir, self.data.iloc[idx, 0])
                label = self.data.iloc[idx, 1]
                image = Image.open(img_name).convert('RGB')

                if self.transform:
                    image = self.transform(image)

                return image, torch.tensor(label, dtype=torch.long)
            else:
                # Return both image and filename for unlabeled data
                img_name = os.path.join(self.img_dir, self.data.iloc[idx, 0])
                image = Image.open(img_name).convert('RGB')

                if self.transform:
                    image = self.transform(image)

                # Return both the image tensor and the filename
                return image, self.data.iloc[idx, 0]

        except Exception as e:
            print(f"Error loading image at index {idx}: {str(e)}")
            if self.is_labeled:
                return torch.zeros((3, 224, 224)), torch.tensor(0, dtype=torch.long)
            return torch.zeros((3, 224, 224)), ""

In [6]:
from torch.cuda.amp import autocast, GradScaler
from timm.optim import create_optimizer_v2


class SSLTrainer:
    def __init__(self, train_loader, num_classes=135, num_epochs=10):
        # Initialize Swin-L model
        self.model = timm.create_model(
            'swin_large_patch4_window12_384',
            pretrained=True,
            num_classes=num_classes,
        )

        # Freeze all layers first
        for param in self.model.parameters():
            param.requires_grad = False

        # Unfreeze the last transformer stage
        for param in self.model.layers[-1].parameters():
            param.requires_grad = True

        # Unfreeze the head
        for param in self.model.head.parameters():
            param.requires_grad = True

        self.model = self.model.to(device)

        # Optimizer for unfrozen layers
        param_groups = [
            {'params': self.model.head.parameters(), 'lr': 8e-4},
            {'params': self.model.layers[-1].parameters(), 'lr': 1.5e-4}
        ]

        self.criterion = nn.CrossEntropyLoss(label_smoothing=0.15)
        self.optimizer = create_optimizer_v2(
            param_groups,
            opt='adamw',
            weight_decay=0.01,
            betas=(0.9, 0.999)
        )

        # Scheduler with different max_lr for each parameter group
        self.scheduler = optim.lr_scheduler.OneCycleLR(
            self.optimizer,
            max_lr=[5e-4, 1e-4],  # Different max_lr for each group
            epochs=num_epochs,
            steps_per_epoch=len(train_loader),
            pct_start=0.15,
            anneal_strategy='cos',
            div_factor=15.0
        )

        self.scaler = GradScaler()

    def train_epoch(self, labeled_loader, unlabeled_loader, epoch):
        self.model.train()
        total_loss = 0
        correct = 0
        total = 0

        pbar = tqdm(zip(labeled_loader, unlabeled_loader),
                  total=min(len(labeled_loader), len(unlabeled_loader)))

        for i, (labeled_batch, unlabeled_batch) in enumerate(pbar):
            if i % 10 == 0:
                torch.cuda.empty_cache()

            labeled_images, labels = labeled_batch
            labeled_images = labeled_images.to(device)
            labels = labels.view(-1).long().to(device)

            try:
                if isinstance(unlabeled_batch, torch.Tensor):
                    unlabeled_images = unlabeled_batch.to(device)
                else:
                    unlabeled_images = unlabeled_batch[0].to(device)

                # Zero the gradients
                self.optimizer.zero_grad()

                # Forward pass and loss calculation inside autocast
                with autocast():
                    # Labeled data
                    outputs = self.model(labeled_images)
                    labeled_loss = self.criterion(outputs, labels)

                    # Unlabeled data with higher confidence threshold
                    unlabeled_outputs = self.model(unlabeled_images)
                    pseudo_probs = torch.softmax(unlabeled_outputs, dim=1)
                    max_probs, pseudo_labels = torch.max(pseudo_probs, dim=1)

                    # Higher confidence threshold since we're using Swin-L
                    confidence_threshold = 0.95
                    mask = max_probs > confidence_threshold

                    # Calculate unlabeled loss
                    unlabeled_loss = torch.tensor(0.0, device=device)
                    if torch.any(mask):
                        confident_outputs = unlabeled_outputs[mask]
                        confident_pseudo_labels = pseudo_labels[mask]
                        unlabeled_loss = self.criterion(confident_outputs, confident_pseudo_labels)

                    # Combined loss with reduced weight for unlabeled data
                    loss = labeled_loss + 0.4 * unlabeled_loss

                # Backward and optimize
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                pred = torch.argmax(outputs, dim=1)
                correct += (pred == labels).sum().item()
                total += labels.size(0)
                total_loss += loss.item()

                pbar.set_description(
                    f'Epoch {epoch} - Loss: {loss.item():.4f} - Acc: {100.*correct/total:.2f}%'
                )

            except RuntimeError as e:
                print(f"CUDA error in batch {i}: {e}")
                torch.cuda.empty_cache()
                continue

        avg_loss = total_loss / len(labeled_loader)
        avg_acc = 100. * correct / total
        return avg_loss, avg_acc

    def validate(self, val_loader):
        self.model.eval()
        total_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc='Validating'):
                try:
                    images = images.to(device)
                    labels = labels.view(-1).long().to(device)

                    with autocast():
                        outputs = self.model(images)
                        loss = self.criterion(outputs, labels)

                    total_loss += loss.item()
                    pred = torch.argmax(outputs, dim=1)
                    correct += (pred == labels).sum().item()
                    total += labels.size(0)

                except RuntimeError as e:
                    print(f"CUDA error in validation: {e}")
                    torch.cuda.empty_cache()
                    continue

        avg_loss = total_loss / len(val_loader)
        avg_acc = 100. * correct / total
        return avg_loss, avg_acc

In [7]:
#Data transforms for SWIN model
train_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomAutocontrast(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [8]:
from tqdm import tqdm
import torch

def predict_test(model, test_loader, device):
    """
    Generate predictions for test data.

    Args:
        model: Trained PyTorch model.
        test_loader: DataLoader for test dataset.
        device: Device to perform computation on (e.g., "cuda" or "cpu").

    Returns:
        predictions: List of predicted category IDs.
        filenames: List of corresponding image filenames.
    """
    model.eval()
    predictions = []
    filenames = []

    with torch.no_grad():
        for images, file_ids in tqdm(test_loader, desc="Predicting test data"):
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)  # Get predicted class IDs
            predictions.extend(preds.cpu().numpy())  # Convert to NumPy array
            filenames.extend(file_ids)  # Collect filenames

    return predictions, filenames

import os
import pandas as pd
from torch.utils.data import DataLoader

def create_submission(model, test_dir, categories_file, val_transform, device, output_csv="submission.csv"):
    """
    Create submission file for Kaggle.

    Args:
        model: Trained PyTorch model.
        test_dir: Directory containing test images.
        categories_file: Path to the file mapping category IDs to category names.
        val_transform: Transformations applied to test images.
        device: Device to perform computation on.
        output_csv: Name of the output CSV file.

    Returns:
        None
    """
    # Load categories mapping
    categories_df = pd.read_csv(categories_file)
    id_to_category = dict(zip(range(len(categories_df)), categories_df['category']))

    # Create test dataset and DataLoader
    test_dataset = PlantDogDataset(
        img_dir=test_dir,
        transform=val_transform,
        is_labeled=False
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=2,
        pin_memory=True
    )

    # Generate predictions and filenames
    predictions, filenames = predict_test(model, test_loader, device)

    # Map predictions to category names
    category_predictions = [id_to_category[pred] for pred in predictions]

    # Create submission DataFrame
    submission_df = pd.DataFrame({
        'image': filenames,
        'id': predictions
    })

    # Save to CSV
    submission_file = os.path.join(os.getcwd(), output_csv)
    submission_df.to_csv(submission_file, index=False)
    print(f"Submission file saved to: {submission_file}")


In [9]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

def main():
    # Set random seed for reproducibility
    set_seed(42)

    # Set paths
    base_path = '/content/dataset/DL_FinalProject'
    labeled_dir = os.path.join(base_path, 'train/labeled')
    unlabeled_dir = os.path.join(base_path, 'train/unlabeled')
    test_dir = os.path.join(base_path, 'test')
    train_csv = os.path.join(base_path, 'train_labeled.csv')
    categories_file = os.path.join(base_path, 'categories.csv')  # Ensure this file exists

    # Define transformations for train, validation, and test (384x384 for Swin-L)
    train_transform = transforms.Compose([
        transforms.Resize((384, 384)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
        transforms.RandomAutocontrast(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize((384, 384)),
        transforms.CenterCrop(384),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # Create datasets
    train_dataset = PlantDogDataset(
        img_dir=labeled_dir,
        csv_file=train_csv,
        transform=train_transform,
        is_labeled=True
    )

    # Split labeled dataset into training and validation sets
    train_size = int(0.9 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(
        train_dataset, [train_size, val_size]
    )

    # Create unlabeled dataset
    unlabeled_dataset = PlantDogDataset(
        img_dir=unlabeled_dir,
        transform=train_transform,
        is_labeled=False
    )

    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=4,
        shuffle=True,
        num_workers=2,
        pin_memory=True,
        drop_last=True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=4,
        shuffle=False,
        num_workers=2,
        pin_memory=True
    )

    unlabeled_loader = DataLoader(
        unlabeled_dataset,
        batch_size=4,
        shuffle=True,
        num_workers=2,
        pin_memory=True,
        drop_last=True
    )

    print(f"Number of training samples: {len(train_dataset)}")
    print(f"Number of validation samples: {len(val_dataset)}")
    print(f"Number of unlabeled samples: {len(unlabeled_dataset)}")

    # Initialize SSL trainer (you should have this class in your code)
    trainer = SSLTrainer(train_loader=train_loader, num_classes=135, num_epochs=10)
    num_epochs = 10
    best_val_acc = 0

    # Training loop
    for epoch in range(num_epochs):
        torch.cuda.empty_cache()  # Clear CUDA memory to prevent out-of-memory errors

        train_loss, train_acc = trainer.train_epoch(train_loader, unlabeled_loader, epoch)
        val_loss, val_acc = trainer.validate(val_loader)

        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        trainer.scheduler.step()

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': trainer.model.state_dict(),
                'optimizer_state_dict': trainer.optimizer.state_dict(),
                'best_val_acc': best_val_acc,
            }, 'best_model.pth')
            print(f"Best model saved with validation accuracy: {best_val_acc:.2f}%")

        print('-' * 50)

    # Load the best model for submission
    best_model_path = 'best_model.pth'
    if os.path.exists(best_model_path):
        checkpoint = torch.load(best_model_path)
        trainer.model.load_state_dict(checkpoint['model_state_dict'])
        trainer.model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        print(f"Loaded best model from epoch {checkpoint['epoch']} with validation accuracy {checkpoint['best_val_acc']:.2f}%")

    # Generate submission file
    create_submission(
        model=trainer.model,
        test_dir=test_dir,
        categories_file=categories_file,
        val_transform=val_transform,
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    )
    print("Submission generation complete.")
    print("Current working directory:", os.getcwd())

if __name__ == "__main__":
    main()

Number of training samples: 8868
Number of validation samples: 986
Number of unlabeled samples: 22995


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

Epoch 0 - Loss: 1.8708 - Acc: 58.34%: 100%|██████████| 2217/2217 [13:14<00:00,  2.79it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.52it/s]



Epoch 1/10:
Train Loss: 2.8724, Train Acc: 58.34%
Val Loss: 1.6800, Val Acc: 86.11%
Best model saved with validation accuracy: 86.11%
--------------------------------------------------


Epoch 1 - Loss: 1.4129 - Acc: 87.73%: 100%|██████████| 2217/2217 [13:14<00:00,  2.79it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.51it/s]



Epoch 2/10:
Train Loss: 1.7045, Train Acc: 87.73%
Val Loss: 1.5083, Val Acc: 89.55%
Best model saved with validation accuracy: 89.55%
--------------------------------------------------


Epoch 2 - Loss: 1.6384 - Acc: 90.87%: 100%|██████████| 2217/2217 [13:14<00:00,  2.79it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.52it/s]



Epoch 3/10:
Train Loss: 1.5708, Train Acc: 90.87%
Val Loss: 1.4717, Val Acc: 89.96%
Best model saved with validation accuracy: 89.96%
--------------------------------------------------


Epoch 3 - Loss: 2.2760 - Acc: 92.94%: 100%|██████████| 2217/2217 [13:14<00:00,  2.79it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.52it/s]



Epoch 4/10:
Train Loss: 1.4962, Train Acc: 92.94%
Val Loss: 1.4416, Val Acc: 90.77%
Best model saved with validation accuracy: 90.77%
--------------------------------------------------


Epoch 4 - Loss: 2.2927 - Acc: 94.16%: 100%|██████████| 2217/2217 [13:14<00:00,  2.79it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.51it/s]



Epoch 5/10:
Train Loss: 1.4508, Train Acc: 94.16%
Val Loss: 1.4282, Val Acc: 91.89%
Best model saved with validation accuracy: 91.89%
--------------------------------------------------


Epoch 5 - Loss: 1.2234 - Acc: 95.33%: 100%|██████████| 2217/2217 [13:12<00:00,  2.80it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.51it/s]



Epoch 6/10:
Train Loss: 1.4081, Train Acc: 95.33%
Val Loss: 1.4133, Val Acc: 92.19%
Best model saved with validation accuracy: 92.19%
--------------------------------------------------


Epoch 6 - Loss: 1.1839 - Acc: 96.17%: 100%|██████████| 2217/2217 [13:13<00:00,  2.79it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.54it/s]



Epoch 7/10:
Train Loss: 1.3840, Train Acc: 96.17%
Val Loss: 1.4003, Val Acc: 92.60%
Best model saved with validation accuracy: 92.60%
--------------------------------------------------


Epoch 7 - Loss: 1.2224 - Acc: 96.70%: 100%|██████████| 2217/2217 [13:12<00:00,  2.80it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.52it/s]



Epoch 8/10:
Train Loss: 1.3568, Train Acc: 96.70%
Val Loss: 1.3814, Val Acc: 92.90%
Best model saved with validation accuracy: 92.90%
--------------------------------------------------


Epoch 8 - Loss: 1.2423 - Acc: 97.30%: 100%|██████████| 2217/2217 [13:12<00:00,  2.80it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.53it/s]



Epoch 9/10:
Train Loss: 1.3369, Train Acc: 97.30%
Val Loss: 1.3867, Val Acc: 93.31%
Best model saved with validation accuracy: 93.31%
--------------------------------------------------


Epoch 9 - Loss: 1.2250 - Acc: 97.83%: 100%|██████████| 2217/2217 [13:12<00:00,  2.80it/s]
Validating: 100%|██████████| 247/247 [00:37<00:00,  6.54it/s]



Epoch 10/10:
Train Loss: 1.3200, Train Acc: 97.83%
Val Loss: 1.3875, Val Acc: 92.49%
--------------------------------------------------
Loaded best model from epoch 8 with validation accuracy 93.31%


Predicting test data: 100%|██████████| 257/257 [10:50<00:00,  2.53s/it]

Submission file saved to: /content/submission.csv
Submission generation complete.
Current working directory: /content





In [10]:
print("Current working directory:", os.getcwd())


Current working directory: /content


In [87]:
!ls /content


best_model.pth	dataset  drive	sample_data  submission.csv


In [34]:
!mkdir -p /content/drive/MyDrive/DL_Submission


In [11]:
!cp /content/best_model.pth /content/drive/MyDrive/DL_Submission/best_model.pth
!cp /content/submission.csv /content/drive/MyDrive/DL_Submission/submission_10e.csv


In [12]:
import pandas as pd

# Read both the submission and categories files
df = pd.read_csv('/content/submission.csv')
categories_df = pd.read_csv('/content/dataset/DL_FinalProject/categories.csv')

# Create a mapping from category names to category IDs
category_to_id = dict(zip(categories_df['category'], categories_df.index))

# Convert string categories to numeric IDs using the mapping
# Only do this if the id column contains strings
if df['id'].dtype == 'object':  # if the column contains strings
    df['id'] = df['id'].map(category_to_id)

# Ensure we have the right columns in the right order
df = df[['image', 'id']]

# Ensure id column is integer type
df['id'] = df['id'].astype(int)

# Save with the exact format needed, without index
df.to_csv('/content/drive/MyDrive/DL_Submission/formatted_submission_swinL_10e.csv', index=False)

# Verify the format
print("\nFirst few rows of the reformatted submission:")
print(df.head())



First few rows of the reformatted submission:
       image   id
0  40237.jpg  106
1  39976.jpg    9
2  39162.jpg    6
3  40543.jpg   26
4  34094.jpg    2
