# Import

In [1]:
import os
import random

import pandas as pd
import numpy as np

from PIL import Image
from tqdm import tqdm

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch import nn, optim

from sklearn.metrics import log_loss
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [2]:
!pip install inplace-abn



In [3]:
!git clone https://github.com/Alibaba-MIIL/TResNet
%cd TResNet

fatal: destination path 'TResNet' already exists and is not an empty directory.
/kaggle/working/TResNet


# Hyperparameter Setting

In [None]:
CFG = {
    'IMG_SIZE': 368,
    'BATCH_SIZE': 32,
    'EPOCHS': 10,
    'LEARNING_RATE': 1e-4,
    'SEED' : 42
}

In [None]:
!wandb login

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Initialize wandb
wandb.init(
    entity='Dacon_Car',
    project="car-classification",  # your project name
    name='TResNet',
    config=CFG  # this will log your hyperparameters
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msingiri129[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Fixed RandomSeed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

# CustomDataset

In [None]:
import os
from PIL import Image
import numpy as np # NumPy 임포트 추가
from torch.utils.data import Dataset
# albumentations와 ToTensorV2 임포트는 Dataset 클래스 외부에서 이루어져야 합니다.
# import albumentations as A
# from albumentations.pytorch import ToTensorV2

class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None, is_test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        self.samples = []

        if is_test:
            # 테스트셋: 라벨 없이 이미지 경로만 저장
            for fname in sorted(os.listdir(root_dir)):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')): # 이미지 확장자 추가
                    img_path = os.path.join(root_dir, fname)
                    self.samples.append((img_path,))
        else:
            # 학습셋: 클래스별 폴더 구조에서 라벨 추출
            self.classes = sorted(os.listdir(root_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

            for cls_name in self.classes:
                cls_folder = os.path.join(root_dir, cls_name)
                # 폴더가 아닌 파일이 있을 수 있으므로 isdir 체크 추가
                if not os.path.isdir(cls_folder):
                    continue
                for fname in os.listdir(cls_folder):
                    if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')): # 이미지 확장자 추가
                        img_path = os.path.join(cls_folder, fname)
                        label = self.class_to_idx[cls_name]
                        self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if self.is_test:
            img_path = self.samples[idx][0]
            image = Image.open(img_path).convert('RGB')
            # PIL 이미지를 NumPy 배열로 변환
            image = np.array(image)

            if self.transform:
                # Albumentations는 딕셔너리를 반환하며 'image' 키에 변환된 이미지가 있습니다.
                transformed_data = self.transform(image=image)
                image = transformed_data['image'] # PyTorch 텐서 (C, H, W)

            return image
        else:
            img_path, label = self.samples[idx]
            image = Image.open(img_path).convert('RGB')
            # PIL 이미지를 NumPy 배열로 변환
            image = np.array(image)

            if self.transform:
                # Albumentations는 딕셔너리를 반환하며 'image' 키에 변환된 이미지가 있습니다.
                transformed_data = self.transform(image=image)
                image = transformed_data['image'] # PyTorch 텐서 (C, H, W)

            return image, label

# Data Load

In [9]:
train_root = '/kaggle/input/car-classification/train'
test_root = '/kaggle/input/car-classification/test'

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2 # PyTorch 텐서로 변환하기 위함
import numpy as np # Albumentations는 NumPy 배열을 입력으로 받습니다.
from PIL import Image # 이미지 로딩을 위한 라이브러리

# Albumentations의 train_transform
train_transform = A.Compose([
    # ResizeIfPadNeeded는 가로세로 비율을 유지하면서 이미지의 긴 변 또는 짧은 변을 리사이즈한 다음,
    # 지정된 크기에 맞춰 패딩을 추가합니다.
    # pad_height, pad_width는 최종 출력 크기를 의미합니다.
    A.Resize(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'], interpolation=Image.BILINEAR), # 먼저 target size로 resize
    # ResizeIfPadNeeded의 직접적인 대체제는 없지만,
    # A.LongestMaxSize 또는 A.SmallestMaxSize를 먼저 사용하고 A.PadIfNeeded를 조합하는 것이 가장 유사합니다.
    # 여기서는 일반적으로 많이 사용되는 Resize를 먼저 사용하고,
    # 이후 A.PadIfNeeded를 사용하여 원본 비율을 유지하며 패딩을 추가합니다.
    # 만약 원본 비율을 유지하면서 패딩으로 채우는 것이 목적이라면 아래와 같이 LongestMaxSize와 PadIfNeeded를 사용합니다.
        A.LongestMaxSize(max_size=CFG['IMG_SIZE'], interpolation=Image.BILINEAR),
        A.PadIfNeeded(min_height=CFG['IMG_SIZE'], min_width=CFG['IMG_SIZE'],
                    border_mode=0, value=(0,0,0)), # border_mode=0 (CONSTANT), value는 패딩 색상

    # 일반적으로 학습 시에는 Resize 후 Normalize를 많이 사용합니다.
    # torchvision의 Normalize와 동일한 mean/std 값을 사용합니다.
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0), # 이미지 픽셀 값의 최댓값 (일반적으로 255)

    # Albumentations의 ToTensorV2는 이미지를 PyTorch 텐서로 변환하고 채널 순서를 (H, W, C) -> (C, H, W)로 변경합니다.
    # torchvision의 ToTensor()와 유사하게 동작합니다.
    ToTensorV2()
])  

# Albumentations의 val_transform (train_transform과 동일하게 구성)
val_transform = A.Compose([
    # 검증 시에도 동일하게 Resize 및 Normalize를 적용합니다.
    A.Resize(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'], interpolation=Image.BILINEAR),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0),
    ToTensorV2()    
])

In [13]:
# 전체 데이터셋 로드
full_dataset = CustomImageDataset(train_root, transform=None)
print(f"총 이미지 수: {len(full_dataset)}")

targets = [label for _, label in full_dataset.samples]
class_names = full_dataset.classes

# Stratified Split
train_idx, val_idx = train_test_split(
    range(len(targets)), test_size=0.2, stratify=targets, random_state=42
)

# Subset + transform 각각 적용
train_dataset = Subset(CustomImageDataset(train_root, transform=train_transform), train_idx)
val_dataset = Subset(CustomImageDataset(train_root, transform=val_transform), val_idx)
print(f'train 이미지 수: {len(train_dataset)}, valid 이미지 수: {len(val_dataset)}')


# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

총 이미지 수: 33137
train 이미지 수: 26509, valid 이미지 수: 6628


# Model Define

In [None]:
from src.models.tresnet_v2.tresnet_v2 import TResnetL_V2 as TResnetL368


class TResNet(nn.Module):
    def __init__(self, num_classes):
        super(TResNet, self).__init__()
        model_params = {'num_classes' : 196}
        self.backbone = TResnetL368(model_params)
        
        weights_path = "/kaggle/input/tresnet-stanford-cars-pretrained/stanford_cars_tresnet-l-v2_96_27.pth"
        pretrained_weights = torch.load(weights_path)
        
        self.backbone.load_state_dict(pretrained_weights['model'])  # TResnetL368 모델 불러오기
        self.feature_dim = self.backbone.num_features
        self.backbone.head = nn.Identity()  # feature extractor로만 사용
        self.head = nn.Linear(self.feature_dim, num_classes)  # 분류기

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x

# Train/ Validation

In [None]:
model = TResNet(num_classes=len(class_names)).to(device)
best_logloss = float('inf')

# 손실 함수
criterion = nn.CrossEntropyLoss()

# 옵티마이저
optimizer = optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])

# 학습 및 검증 루프
for epoch in range(CFG['EPOCHS']):
    # Train
    model.train()
    train_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)  # logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # LogLoss
            probs = F.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    val_logloss = log_loss(all_labels, all_probs, labels=list(range(len(class_names))))
    
    # wandb 
    wandb.log({
        "train_loss": avg_train_loss,
        "val_loss": avg_val_loss,
        "val_accuracy": val_accuracy,
        "val_logloss": val_logloss
    })
    
    # 결과 출력
    print(f"Train Loss : {avg_train_loss:.4f} || Valid Loss : {avg_val_loss:.4f} | Valid Accuracy : {val_accuracy:.4f}%")

    # Best model 저장
    if val_logloss < best_logloss:
        best_logloss = val_logloss
        torch.save(model.state_dict(), f'best_model.pth')
        print(f"📦 Best model saved at epoch {epoch+1} (logloss: {val_logloss:.4f})")

[Epoch 1/10] Training:   3%|▎         | 28/829 [00:39<19:01,  1.43s/it]

# Inference

In [None]:
test_dataset = CustomImageDataset(test_root, transform=val_transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [None]:
# 저장된 모델 로드
model = TResNet(num_classes=len(class_names))
model.load_state_dict(torch.load('best_model.pth', map_location=device))
model.to(device)

# 추론
model.eval()
results = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = F.softmax(outputs, dim=1)

        # 각 배치의 확률을 리스트로 변환
        for prob in probs.cpu():  # prob: (num_classes,)
            result = {
                class_names[i]: prob[i].item()
                for i in range(len(class_names))
            }
            results.append(result)

pred = pd.DataFrame(results)

# Submission

In [None]:
submission = pd.read_csv('/kaggle/input/car-classification/sample_submission.csv', encoding='utf-8-sig')

# 'ID' 컬럼을 제외한 클래스 컬럼 정렬
class_columns = submission.columns[1:]
pred = pred[class_columns]

submission[class_columns] = pred.values
submission.to_csv('baseline_submission.csv', index=False, encoding='utf-8-sig')