## Importing Files


In [17]:
import os
BASE_DIR = '/content/drive/MyDrive/data'
TRAIN_DIR = os.path.join(BASE_DIR, 'train_images')
TEST_DIR  = os.path.join(BASE_DIR, 'test_images')
CSV_PATH  = os.path.join(BASE_DIR, 'train.csv')
OUTPUT_SUB = os.path.join(BASE_DIR, 'submission.csv')
os.listdir(BASE_DIR)


['train.csv', 'test_images', 'train_images']

##Installing packages


In [6]:
!pip install torch torchvision timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [38]:
import pandas as pd
import numpy as np
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models


##Reading Data


In [39]:
df = pd.read_csv(CSV_PATH)
n_null = df['ClassId'].isna().sum()
print(f"Rows with missing ClassId: {n_null}")

if n_null > 0:
    print("Some images have no valid ClassId. Here's a sample of those ImageIds:")
    print(df.loc[df['ClassId'].isna(), 'ImageId'].unique()[:10])
    df = df.dropna(subset=['ClassId'])
    print(f"After dropping nulls, rows = {len(df)}")
df['ClassId'] = df['ClassId'].astype(int)
label_df = (
    df
    .groupby('ImageId')['ClassId']
    .agg(lambda x: x.value_counts().idxmax())
    .reset_index()
)

print("Unique images:", label_df['ImageId'].nunique())
print("Rows in label_df:", len(label_df))
print(label_df['ClassId'].value_counts())


Rows with missing ClassId: 1
Some images have no valid ClassId. Here's a sample of those ImageIds:
['2 367677 34 367793 32 367933 34 368049 32 368189 34 368305 32 368445 34 368561 32 368701 34 368817 32 368957 34 369073 32 369213 34 369329 32 369469 34 369585 32 369725 34 369841 32 369981 34 370097 32 370237 34 370353 32 370493 34 370609 32 370749 34 370865 32 371005 34 371121 32 371261 34 371377 32 371517 34 371632 33 371773 33 371888 33 372029 33 372144 33 372285 33 372400 33 372541 33 372656 33 372797 33 372912 33 373053 33 373168 33 373309 33 373424 33 373565 33 373680 33 373821 33 373936 33 374077 33 374192 33 374333 33 374448 33 374589 33 374704 33 374845 33 374960 33 375101 33 375216 33 375357 33 375472 33 375613 33 375728 33 375869 33 375984 33 376125 33 376240 33 376381 33 376496 33 376637 33 376752 33 376893 33 377008 33 377149 33 377264 33 377405 33 377520 33 377661 33 377776 33 377917 33 378032 33 378173 33 378288 33 378429 33 378544 33 378685 33 378800 33 378941 33 379056 

Train / Validation Split

In [40]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(
    label_df,
    test_size=0.2,
    stratify=label_df['ClassId'],
    random_state=42
)
print(f"→ Train: {len(train_df)} images, Val: {len(val_df)} images")


→ Train: 5332 images, Val: 1334 images


##Dataset & DataLoader

In [41]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

BASE_DIR = '/content/drive/MyDrive/data'
TRAIN_DIR = os.path.join(BASE_DIR, 'train_images')
TEST_DIR  = os.path.join(BASE_DIR, 'test_images')

train_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

class SteelDefectDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.loc[idx, 'ImageId']
        label  = self.df.loc[idx, 'ClassId'] - 1
        img_path = os.path.join(self.img_dir, img_id)
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label
train_ds = SteelDefectDataset(train_df, TRAIN_DIR, train_transforms)
val_ds   = SteelDefectDataset(val_df,   TRAIN_DIR, val_transforms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=2)


##Model Creation

In [42]:

import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


Using device: cpu




##Training Loop

In [43]:
import os

all_files = os.listdir(TRAIN_DIR)
file_map = { os.path.splitext(fname)[0]: fname
             for fname in all_files }


from torch.utils.data import Dataset
from PIL import Image

class SteelDefectDataset(Dataset):
    def __init__(self, df, img_dir, file_map, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.file_map = file_map
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.loc[idx, 'ImageId']
        fname = self.file_map.get(img_id)
        if fname is None:
            raise FileNotFoundError(f"No file for ID {img_id} in {self.img_dir}")
        img_path = os.path.join(self.img_dir, fname)

        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)

        label = self.df.loc[idx, 'ClassId'] - 1
        return img, label

train_ds = SteelDefectDataset(train_df, TRAIN_DIR, file_map, train_transforms)
val_ds   = SteelDefectDataset(  val_df, TRAIN_DIR, file_map, val_transforms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=2)


In [32]:
CHECKPOINT_PATH = os.path.join(BASE_DIR, "best_resnet18.pth")

# inside your training loop, when you get a new best:
torch.save(model.state_dict(), CHECKPOINT_PATH)
print(f"  ↳ Saved new best model to {CHECKPOINT_PATH}")


  ↳ Saved new best model to /content/drive/MyDrive/data/best_resnet18.pth


##Inference & Submission

In [44]:
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image

BASE_DIR        = '/content/drive/MyDrive/data'
TEST_DIR        = os.path.join(BASE_DIR, 'test_images')
CHECKPOINT_PATH = os.path.join(BASE_DIR, 'best_resnet18.pth')
OUT_CSV         = os.path.join(BASE_DIR, 'submission.csv')
test_files = os.listdir(TEST_DIR)
test_file_map = { os.path.splitext(f)[0]: f for f in test_files }
print(f"Found {len(test_files)} files in TEST_DIR, mapping covers {len(test_file_map)} IDs")
class TestDataset(Dataset):
    def __init__(self, img_dir, bare_ids, file_map, transform=None):
        self.img_dir    = img_dir
        self.bare_ids   = bare_ids
        self.file_map   = file_map
        self.transform  = transform

    def __len__(self):
        return len(self.bare_ids)

    def __getitem__(self, idx):
        bare_id = self.bare_ids[idx]
        fname   = self.file_map.get(bare_id, None)
        if fname is None:
            raise FileNotFoundError(f"No file for ID {bare_id} in {self.img_dir}")
        img_path = os.path.join(self.img_dir, fname)
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, bare_id

bare_test_ids = sorted(test_file_map.keys())
test_ds       = TestDataset(TEST_DIR, bare_test_ids, test_file_map, val_transforms)
test_loader   = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)
assert os.path.exists(CHECKPOINT_PATH), f"Checkpoint not found at {CHECKPOINT_PATH}"
model.load_state_dict(torch.load(CHECKPOINT_PATH))
model.eval()
preds = []
with torch.no_grad():
    for imgs, ids in test_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        classes = outputs.argmax(dim=1).cpu().numpy() + 1
        for bare_id, cls in zip(ids, classes):
            preds.append((bare_id, cls))
sub_df = pd.DataFrame(preds, columns=['ImageId','ClassId'])
sub_df.to_csv(OUT_CSV, index=False)
print(f"✅ Saved submission to {OUT_CSV}")


Found 10 files in TEST_DIR, mapping covers 10 IDs
✅ Saved submission to /content/drive/MyDrive/data/submission.csv
