# Import

In [1]:
import os
import random

import pandas as pd
import numpy as np

from PIL import Image
from tqdm import tqdm

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch import nn, optim

from sklearn.metrics import log_loss
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [2]:
!pip install inplace-abn



In [3]:
!git clone https://github.com/Alibaba-MIIL/TResNet
%cd TResNet

fatal: destination path 'TResNet' already exists and is not an empty directory.
/kaggle/working/TResNet


# Hyperparameter Setting

In [None]:
CFG = {
    'IMG_SIZE': 368,
    'BATCH_SIZE': 32,
    'EPOCHS': 10,
    'LEARNING_RATE': 1e-4,
    'SEED' : 42
}

In [None]:
!wandb login

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Initialize wandb
wandb.init(
    entity='Dacon_Car',
    project="car-classification",  # your project name
    name='TResNet',
    config=CFG  # this will log your hyperparameters
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msingiri129[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Fixed RandomSeed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed Í≥†Ï†ï

# CustomDataset

In [None]:
import os
from PIL import Image
import numpy as np # NumPy ÏûÑÌè¨Ìä∏ Ï∂îÍ∞Ä
from torch.utils.data import Dataset
# albumentationsÏôÄ ToTensorV2 ÏûÑÌè¨Ìä∏Îäî Dataset ÌÅ¥ÎûòÏä§ Ïô∏Î∂ÄÏóêÏÑú Ïù¥Î£®Ïñ¥Ï†∏Ïïº Ìï©ÎãàÎã§.
# import albumentations as A
# from albumentations.pytorch import ToTensorV2

class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None, is_test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        self.samples = []

        if is_test:
            # ÌÖåÏä§Ìä∏ÏÖã: ÎùºÎ≤® ÏóÜÏù¥ Ïù¥ÎØ∏ÏßÄ Í≤ΩÎ°úÎßå Ï†ÄÏû•
            for fname in sorted(os.listdir(root_dir)):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')): # Ïù¥ÎØ∏ÏßÄ ÌôïÏû•Ïûê Ï∂îÍ∞Ä
                    img_path = os.path.join(root_dir, fname)
                    self.samples.append((img_path,))
        else:
            # ÌïôÏäµÏÖã: ÌÅ¥ÎûòÏä§Î≥Ñ Ìè¥Îçî Íµ¨Ï°∞ÏóêÏÑú ÎùºÎ≤® Ï∂îÏ∂ú
            self.classes = sorted(os.listdir(root_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

            for cls_name in self.classes:
                cls_folder = os.path.join(root_dir, cls_name)
                # Ìè¥ÎçîÍ∞Ä ÏïÑÎãå ÌååÏùºÏù¥ ÏûàÏùÑ Ïàò ÏûàÏúºÎØÄÎ°ú isdir Ï≤¥ÌÅ¨ Ï∂îÍ∞Ä
                if not os.path.isdir(cls_folder):
                    continue
                for fname in os.listdir(cls_folder):
                    if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')): # Ïù¥ÎØ∏ÏßÄ ÌôïÏû•Ïûê Ï∂îÍ∞Ä
                        img_path = os.path.join(cls_folder, fname)
                        label = self.class_to_idx[cls_name]
                        self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if self.is_test:
            img_path = self.samples[idx][0]
            image = Image.open(img_path).convert('RGB')
            # PIL Ïù¥ÎØ∏ÏßÄÎ•º NumPy Î∞∞Ïó¥Î°ú Î≥ÄÌôò
            image = np.array(image)

            if self.transform:
                # AlbumentationsÎäî ÎîïÏÖîÎÑàÎ¶¨Î•º Î∞òÌôòÌïòÎ©∞ 'image' ÌÇ§Ïóê Î≥ÄÌôòÎêú Ïù¥ÎØ∏ÏßÄÍ∞Ä ÏûàÏäµÎãàÎã§.
                transformed_data = self.transform(image=image)
                image = transformed_data['image'] # PyTorch ÌÖêÏÑú (C, H, W)

            return image
        else:
            img_path, label = self.samples[idx]
            image = Image.open(img_path).convert('RGB')
            # PIL Ïù¥ÎØ∏ÏßÄÎ•º NumPy Î∞∞Ïó¥Î°ú Î≥ÄÌôò
            image = np.array(image)

            if self.transform:
                # AlbumentationsÎäî ÎîïÏÖîÎÑàÎ¶¨Î•º Î∞òÌôòÌïòÎ©∞ 'image' ÌÇ§Ïóê Î≥ÄÌôòÎêú Ïù¥ÎØ∏ÏßÄÍ∞Ä ÏûàÏäµÎãàÎã§.
                transformed_data = self.transform(image=image)
                image = transformed_data['image'] # PyTorch ÌÖêÏÑú (C, H, W)

            return image, label

# Data Load

In [9]:
train_root = '/kaggle/input/car-classification/train'
test_root = '/kaggle/input/car-classification/test'

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2 # PyTorch ÌÖêÏÑúÎ°ú Î≥ÄÌôòÌïòÍ∏∞ ÏúÑÌï®
import numpy as np # AlbumentationsÎäî NumPy Î∞∞Ïó¥ÏùÑ ÏûÖÎ†•ÏúºÎ°ú Î∞õÏäµÎãàÎã§.
from PIL import Image # Ïù¥ÎØ∏ÏßÄ Î°úÎî©ÏùÑ ÏúÑÌïú ÎùºÏù¥Î∏åÎü¨Î¶¨

# AlbumentationsÏùò train_transform
train_transform = A.Compose([
    # ResizeIfPadNeededÎäî Í∞ÄÎ°úÏÑ∏Î°ú ÎπÑÏú®ÏùÑ Ïú†ÏßÄÌïòÎ©¥ÏÑú Ïù¥ÎØ∏ÏßÄÏùò Í∏¥ Î≥Ä ÎòêÎäî ÏßßÏùÄ Î≥ÄÏùÑ Î¶¨ÏÇ¨Ïù¥Ï¶àÌïú Îã§Ïùå,
    # ÏßÄÏ†ïÎêú ÌÅ¨Í∏∞Ïóê ÎßûÏ∂∞ Ìå®Îî©ÏùÑ Ï∂îÍ∞ÄÌï©ÎãàÎã§.
    # pad_height, pad_widthÎäî ÏµúÏ¢Ö Ï∂úÎ†• ÌÅ¨Í∏∞Î•º ÏùòÎØ∏Ìï©ÎãàÎã§.
    A.Resize(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'], interpolation=Image.BILINEAR), # Î®ºÏ†Ä target sizeÎ°ú resize
    # ResizeIfPadNeededÏùò ÏßÅÏ†ëÏ†ÅÏù∏ ÎåÄÏ≤¥Ï†úÎäî ÏóÜÏßÄÎßå,
    # A.LongestMaxSize ÎòêÎäî A.SmallestMaxSizeÎ•º Î®ºÏ†Ä ÏÇ¨Ïö©ÌïòÍ≥† A.PadIfNeededÎ•º Ï°∞Ìï©ÌïòÎäî Í≤ÉÏù¥ Í∞ÄÏû• Ïú†ÏÇ¨Ìï©ÎãàÎã§.
    # Ïó¨Í∏∞ÏÑúÎäî ÏùºÎ∞òÏ†ÅÏúºÎ°ú ÎßéÏù¥ ÏÇ¨Ïö©ÎêòÎäî ResizeÎ•º Î®ºÏ†Ä ÏÇ¨Ïö©ÌïòÍ≥†,
    # Ïù¥ÌõÑ A.PadIfNeededÎ•º ÏÇ¨Ïö©ÌïòÏó¨ ÏõêÎ≥∏ ÎπÑÏú®ÏùÑ Ïú†ÏßÄÌïòÎ©∞ Ìå®Îî©ÏùÑ Ï∂îÍ∞ÄÌï©ÎãàÎã§.
    # ÎßåÏïΩ ÏõêÎ≥∏ ÎπÑÏú®ÏùÑ Ïú†ÏßÄÌïòÎ©¥ÏÑú Ìå®Îî©ÏúºÎ°ú Ï±ÑÏö∞Îäî Í≤ÉÏù¥ Î™©Ï†ÅÏù¥ÎùºÎ©¥ ÏïÑÎûòÏôÄ Í∞ôÏù¥ LongestMaxSizeÏôÄ PadIfNeededÎ•º ÏÇ¨Ïö©Ìï©ÎãàÎã§.
        A.LongestMaxSize(max_size=CFG['IMG_SIZE'], interpolation=Image.BILINEAR),
        A.PadIfNeeded(min_height=CFG['IMG_SIZE'], min_width=CFG['IMG_SIZE'],
                    border_mode=0, value=(0,0,0)), # border_mode=0 (CONSTANT), valueÎäî Ìå®Îî© ÏÉâÏÉÅ

    # ÏùºÎ∞òÏ†ÅÏúºÎ°ú ÌïôÏäµ ÏãúÏóêÎäî Resize ÌõÑ NormalizeÎ•º ÎßéÏù¥ ÏÇ¨Ïö©Ìï©ÎãàÎã§.
    # torchvisionÏùò NormalizeÏôÄ ÎèôÏùºÌïú mean/std Í∞íÏùÑ ÏÇ¨Ïö©Ìï©ÎãàÎã§.
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0), # Ïù¥ÎØ∏ÏßÄ ÌîΩÏÖÄ Í∞íÏùò ÏµúÎåìÍ∞í (ÏùºÎ∞òÏ†ÅÏúºÎ°ú 255)

    # AlbumentationsÏùò ToTensorV2Îäî Ïù¥ÎØ∏ÏßÄÎ•º PyTorch ÌÖêÏÑúÎ°ú Î≥ÄÌôòÌïòÍ≥† Ï±ÑÎÑê ÏàúÏÑúÎ•º (H, W, C) -> (C, H, W)Î°ú Î≥ÄÍ≤ΩÌï©ÎãàÎã§.
    # torchvisionÏùò ToTensor()ÏôÄ Ïú†ÏÇ¨ÌïòÍ≤å ÎèôÏûëÌï©ÎãàÎã§.
    ToTensorV2()
])  

# AlbumentationsÏùò val_transform (train_transformÍ≥º ÎèôÏùºÌïòÍ≤å Íµ¨ÏÑ±)
val_transform = A.Compose([
    # Í≤ÄÏ¶ù ÏãúÏóêÎèÑ ÎèôÏùºÌïòÍ≤å Resize Î∞è NormalizeÎ•º Ï†ÅÏö©Ìï©ÎãàÎã§.
    A.Resize(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'], interpolation=Image.BILINEAR),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0),
    ToTensorV2()    
])

In [13]:
# Ï†ÑÏ≤¥ Îç∞Ïù¥ÌÑ∞ÏÖã Î°úÎìú
full_dataset = CustomImageDataset(train_root, transform=None)
print(f"Ï¥ù Ïù¥ÎØ∏ÏßÄ Ïàò: {len(full_dataset)}")

targets = [label for _, label in full_dataset.samples]
class_names = full_dataset.classes

# Stratified Split
train_idx, val_idx = train_test_split(
    range(len(targets)), test_size=0.2, stratify=targets, random_state=42
)

# Subset + transform Í∞ÅÍ∞Å Ï†ÅÏö©
train_dataset = Subset(CustomImageDataset(train_root, transform=train_transform), train_idx)
val_dataset = Subset(CustomImageDataset(train_root, transform=val_transform), val_idx)
print(f'train Ïù¥ÎØ∏ÏßÄ Ïàò: {len(train_dataset)}, valid Ïù¥ÎØ∏ÏßÄ Ïàò: {len(val_dataset)}')


# DataLoader Ï†ïÏùò
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

Ï¥ù Ïù¥ÎØ∏ÏßÄ Ïàò: 33137
train Ïù¥ÎØ∏ÏßÄ Ïàò: 26509, valid Ïù¥ÎØ∏ÏßÄ Ïàò: 6628


# Model Define

In [None]:
from src.models.tresnet_v2.tresnet_v2 import TResnetL_V2 as TResnetL368


class TResNet(nn.Module):
    def __init__(self, num_classes):
        super(TResNet, self).__init__()
        model_params = {'num_classes' : 196}
        self.backbone = TResnetL368(model_params)
        
        weights_path = "/kaggle/input/tresnet-stanford-cars-pretrained/stanford_cars_tresnet-l-v2_96_27.pth"
        pretrained_weights = torch.load(weights_path)
        
        self.backbone.load_state_dict(pretrained_weights['model'])  # TResnetL368 Î™®Îç∏ Î∂àÎü¨Ïò§Í∏∞
        self.feature_dim = self.backbone.num_features
        self.backbone.head = nn.Identity()  # feature extractorÎ°úÎßå ÏÇ¨Ïö©
        self.head = nn.Linear(self.feature_dim, num_classes)  # Î∂ÑÎ•òÍ∏∞

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x

# Train/ Validation

In [None]:
model = TResNet(num_classes=len(class_names)).to(device)
best_logloss = float('inf')

# ÏÜêÏã§ Ìï®Ïàò
criterion = nn.CrossEntropyLoss()

# ÏòµÌã∞ÎßàÏù¥Ï†Ä
optimizer = optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])

# ÌïôÏäµ Î∞è Í≤ÄÏ¶ù Î£®ÌîÑ
for epoch in range(CFG['EPOCHS']):
    # Train
    model.train()
    train_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)  # logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # LogLoss
            probs = F.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    val_logloss = log_loss(all_labels, all_probs, labels=list(range(len(class_names))))
    
    # wandb 
    wandb.log({
        "train_loss": avg_train_loss,
        "val_loss": avg_val_loss,
        "val_accuracy": val_accuracy,
        "val_logloss": val_logloss
    })
    
    # Í≤∞Í≥º Ï∂úÎ†•
    print(f"Train Loss : {avg_train_loss:.4f} || Valid Loss : {avg_val_loss:.4f} | Valid Accuracy : {val_accuracy:.4f}%")

    # Best model Ï†ÄÏû•
    if val_logloss < best_logloss:
        best_logloss = val_logloss
        torch.save(model.state_dict(), f'best_model.pth')
        print(f"üì¶ Best model saved at epoch {epoch+1} (logloss: {val_logloss:.4f})")

[Epoch 1/10] Training:   3%|‚ñé         | 28/829 [00:39<19:01,  1.43s/it]

# Inference

In [None]:
test_dataset = CustomImageDataset(test_root, transform=val_transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [None]:
# Ï†ÄÏû•Îêú Î™®Îç∏ Î°úÎìú
model = TResNet(num_classes=len(class_names))
model.load_state_dict(torch.load('best_model.pth', map_location=device))
model.to(device)

# Ï∂îÎ°†
model.eval()
results = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = F.softmax(outputs, dim=1)

        # Í∞Å Î∞∞ÏπòÏùò ÌôïÎ•†ÏùÑ Î¶¨Ïä§Ìä∏Î°ú Î≥ÄÌôò
        for prob in probs.cpu():  # prob: (num_classes,)
            result = {
                class_names[i]: prob[i].item()
                for i in range(len(class_names))
            }
            results.append(result)

pred = pd.DataFrame(results)

# Submission

In [None]:
submission = pd.read_csv('/kaggle/input/car-classification/sample_submission.csv', encoding='utf-8-sig')

# 'ID' Ïª¨ÎüºÏùÑ Ï†úÏô∏Ìïú ÌÅ¥ÎûòÏä§ Ïª¨Îüº Ï†ïÎ†¨
class_columns = submission.columns[1:]
pred = pred[class_columns]

submission[class_columns] = pred.values
submission.to_csv('baseline_submission.csv', index=False, encoding='utf-8-sig')