In [1]:
!git clone https://github.com/Swastik166/AI_ML_GC_2025.git

Cloning into 'AI_ML_GC_2025'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 16 (delta 2), reused 0 (delta 0), pack-reused 0 (from 0)[K
Unpacking objects: 100% (16/16), 20.02 KiB | 5.00 MiB/s, done.


In [1]:
cd /ai_gc/submission_notebook/AI_ML_GC_2025

/ai_gc/submission_notebook/AI_ML_GC_2025


In [2]:
ls

[0m[01;34mdata[0m/  main.py  [01;34mmodels[0m/  predict.py  [01;34mutils[0m/


In [8]:
#ONLY RUN ONCE
#DATA PROCESSING
import os
from tqdm import tqdm
import random
import shutil
import json

data = '/ai_gc/AI-ML GC 2025 Dataset'
train = os.path.join(data, 'train')
test = os.path.join(data, 'test')

main_folder = '/ai_gc/submission_notebook/data'
train_folder = os.path.join(main_folder, 'train')
val_folder = os.path.join(main_folder, 'val')  
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)

# Set validation split percentage
val_split = 0.05

train_labels = {}
val_labels = {}

class_folders = sorted(os.listdir(train))

total_train_images = 0
total_val_images = 0

for f in tqdm(class_folders, desc="Processing class folders"):
    class_path = os.path.join(train, f) #each folder in train
    if not os.path.isdir(class_path):
        print(f"Skipping {class_path} as it is not a directory")
        continue
        
    class_id = int(f.split('.')[0])
    
    # Get all valid images in this class
    valid_images = [img for img in os.listdir(class_path) 
                    if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    random.shuffle(valid_images)

    # Calculate split point
    val_count = max(1, int(len(valid_images) * val_split))
    train_count = len(valid_images) - val_count

    # Split images
    train_images = valid_images[:train_count]
    val_images = valid_images[train_count:]
    
    # Process train images
    for img in train_images:
        new_img = f + img
        train_labels[new_img] = class_id
        
        src_path = os.path.join(class_path, img)
        dst_path = os.path.join(train_folder, new_img)
        
        if not os.path.exists(dst_path) or os.path.getsize(src_path) != os.path.getsize(dst_path):
            shutil.copy(src_path, dst_path)
        
        total_train_images += 1
    
    # Process validation images
    for img in val_images:
        new_img = f + img
        val_labels[new_img] = class_id
        
        src_path = os.path.join(class_path, img)
        dst_path = os.path.join(val_folder, new_img)
        
        if not os.path.exists(dst_path) or os.path.getsize(src_path) != os.path.getsize(dst_path):
            shutil.copy(src_path, dst_path)
        
        total_val_images += 1

print(f"Processed {total_train_images} training images and {total_val_images} validation images across {len(class_folders)} classes")
print(f"Labels dictionary contains {len(train_labels)} entries")
print(f"Labels dictionary contains {len(val_labels)} entries")
#save labels to a file
labels_path = os.path.join(main_folder, 'train_labels.json')
with open(labels_path, 'w') as f:
    json.dump(train_labels, f)
labels_path = os.path.join(main_folder, 'val_labels.json')
with open(labels_path, 'w') as f:
    json.dump(val_labels, f)

Processing class folders: 100%|██████████| 200/200 [00:01<00:00, 182.20it/s]

Processed 9019 training images and 395 validation images across 200 classes
Labels dictionary contains 9019 entries
Labels dictionary contains 395 entries





In [3]:
!python main.py --data_dir /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/data --output_base_dir /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs --model_name 'swinv2_base_window12to16_192to256.ms_in22k_ft_in1k' --image_size 256 --epochs 12 --batch_size 64 --gradual_unfreeze_schedule '[{"epoch": 4, "layers": ["layers.3", "norm"], "lr": 3e-5}, {"epoch": 8, "layers": ["layers.2"], "lr": 1e-5}]' --label_smoothing 0.1 

Using accelerator: gpu, devices: 1
Parsed gradual unfreeze schedule: [{'epoch': 4, 'layers': ['layers.3', 'norm'], 'lr': 3e-05}, {'epoch': 8, 'layers': ['layers.2'], 'lr': 1e-05}]
Seed set to 42
Output directory created: /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2_base_window12to16_192to256.ms_in22k_ft_in1k_gradual_unfreeze_256px_lr0.0001_bs64_aug1_ls0.1
Using Training Transforms: ['RandomResizedCrop', 'RandomHorizontalFlip', 'RandomRotation', 'ColorJitter', 'RandomErasing', 'PILToTensor', 'ConvertImageDtype', 'Normalize']
Setting up datamodule to determine number of classes...
Loading data from: /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/data/train
Found 9019 original images.
Created label map with 200 classes.
Loading data from: /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/data/val
Found 395 original images.
Using provided label map with 200 classes.
Setup complete for stage: fit
Number of classes detected:

In [4]:
!python predict.py --checkpoint_path /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2_base_window12to16_192to256.ms_in22k_ft_in1k_gradual_unfreeze_256px_lr0.0001_bs64_aug1_ls0.1/checkpoints/best_model-epoch=11-val_loss=1.274-val_acc=0.9139.ckpt --test_data_dir /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/final_data/test --label_map_path /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2_base_window12to16_192to256.ms_in22k_ft_in1k_gradual_unfreeze_256px_lr0.0001_bs64_aug1_ls0.1/int_to_label.pkl --output_csv /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2base_aug1_12ep_pred.csv --image_size 256 --batch_size 128

Loading label map from: /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2_base_window12to16_192to256.ms_in22k_ft_in1k_gradual_unfreeze_256px_lr0.0001_bs64_aug1_ls0.1/int_to_label.pkl
Loading label map from: /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2_base_window12to16_192to256.ms_in22k_ft_in1k_gradual_unfreeze_256px_lr0.0001_bs64_aug1_ls0.1/int_to_label.pkl
Loaded 200 labels.
Loading model from checkpoint: /raid/aiccg/rbsunoj/swastik/SIDE_STUFF/ai_gc/submission_notebook/outputs/swinv2_base_window12to16_192to256.ms_in22k_ft_in1k_gradual_unfreeze_256px_lr0.0001_bs64_aug1_ls0.1/checkpoints/best_model-epoch=11-val_loss=1.274-val_acc=0.9139.ckpt
Creating model: swinv2_base_window12to16_192to256.ms_in22k_ft_in1k (pretrained=True, classes=200)
Loaded model from timm.
Gradual Unfreezing is active for SwinV2. Initializing with frozen backbone.
Initial freeze for gradual unfreezing: Freezing all layers except the head.
Identified 