<a href="https://colab.research.google.com/github/Hanbin-git/practice/blob/main/feature_color%2BRandAugment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import shutil
import zipfile

# 1. Drive에서 로컬로 복사 (빠름)
shutil.copy("/content/drive/MyDrive/open.zip", "/content/open.zip")

# 2. 압축 풀기
with zipfile.ZipFile("/content/open.zip", "r") as zip_ref:
    zip_ref.extractall("/content/")

# 3. 확인
!ls /content/train


컨티넨탈_10세대_2017_2019	     A_클래스_W177_2020_2025
어코드_10세대_2018_2022		     B_클래스_W246_2013_2018
1시리즈_F20_2013_2015		     뉴_스타일_코란도_C_2017_2019
1시리즈_F20_2016_2019		     프리우스_C_2018_2020
1시리즈_F40_2020_2024		     뉴_CC_2012_2016
그랜드카니발_2006_2010		     CLA_클래스_C117_2014_2019
2008_2015_2017			     CLA_클래스_C118_2020_2025
에쿠스_신형_2010_2015		     CLE_클래스_C236_2024_2025
파나메라_2010_2016		     CLS_클래스_C257_2019_2023
뉴_제타_2011_2016		     CLS_클래스_W218_2012_2017
뉴_카이엔_2011_2018		     아반떼_하이브리드_CN7_2021_2023
엑센트_신형_2011_2019		     아반떼_CN7_2021_2023
스파크_2012_2015		     더_뉴_아반떼_CN7_2023_2025
쿠퍼_컨트리맨_2012_2015		     CT6_2016_2018
올_뉴_모닝_2012_2015		     C_클래스_W204_2008_2015
말리부_2012_2016		     C_클래스_W205_2015_2021
아베오_2012_2016		     C_클래스_W206_2022_2024
뉴_티구안_2012_2016		     제네시스_DH_2014_2016
레이_2012_2017			     쏘나타_DN8_2020_2023
올란도_2012_2018		     쏘나타_디_엣지_DN8_2024_2025
더_뉴_파사트_2012_2019		     e_트론_2020_2023
트랙스_2013_2016		     E_PACE_2018_2020
콰트로포르테_2014_2016		     EQ900_2016_2018
더_뉴_아반떼_2014_2016		     E

In [3]:
import os
import glob
import torch
import timm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import StratifiedKFold


In [4]:
# train 경로
train_dir = '/content/train'

# test 경로
test_dir = '/content/test'

# sample_submission (추론 때 사용)
sample_submission_path = '/content/sample_submission.csv'


In [5]:
import torch
import gc

def show_memory_status():
    allocated = torch.cuda.memory_allocated() / (1024 ** 2)  # MB 단위
    reserved = torch.cuda.memory_reserved() / (1024 ** 2)    # MB 단위
    print(f"📊 현재 GPU 메모리 상태: Allocated = {allocated:.2f} MB | Reserved = {reserved:.2f} MB")

# 현재 CUDA 사용 가능 여부 확인
if torch.cuda.is_available():
    print("🔍 초기화 전 GPU 메모리 상태:")
    show_memory_status()

    # GPU 캐시 및 메모리 초기화
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    gc.collect()

    print("\n🧹 GPU 메모리 초기화 완료")
    print("🔍 초기화 후 GPU 메모리 상태:")
    show_memory_status()
else:
    print("❌ CUDA 사용 불가")


🔍 초기화 전 GPU 메모리 상태:
📊 현재 GPU 메모리 상태: Allocated = 0.00 MB | Reserved = 0.00 MB

🧹 GPU 메모리 초기화 완료
🔍 초기화 후 GPU 메모리 상태:
📊 현재 GPU 메모리 상태: Allocated = 0.00 MB | Reserved = 0.00 MB


In [6]:
import torch
from torch.utils.data import Dataset
from PIL import Image
import cv2
import numpy as np
import os

class CarImageDataset(Dataset):
    def __init__(self, file_list, class_to_idx, transform=None, use_aspect=False, use_color=False):
        self.file_list = file_list
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.use_aspect = use_aspect
        self.use_color = use_color

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        path = self.file_list[idx]

        # 🚗 Step 1: Load Image (RGB 고정)
        image_pil = Image.open(path).convert("RGB")

        # 🚗 Step 2: Calculate Features if needed
        width, height = image_pil.size
        aspect_ratio = np.array([width / height], dtype=np.float32)

        image_cv2 = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
        color_mean = image_cv2.mean(axis=(0, 1))
        color_mean = color_mean[::-1]
        color_mean = np.array(color_mean / 255.0, dtype=np.float32)

        # 🚗 Step 3: Apply Transform
        if self.transform:
            image = self.transform(image_pil)
        else:
            image = transforms.ToTensor()(image_pil)  # fallback

        # 🚗 Step 4: Extract label
        class_name = os.path.basename(os.path.dirname(path))
        label = self.class_to_idx[class_name]

        # 🚗 Step 5: Return according to mode
        if self.use_aspect and self.use_color:
            return image, torch.tensor(aspect_ratio), torch.tensor(color_mean), label
        elif self.use_aspect:
            return image, torch.tensor(aspect_ratio), label
        elif self.use_color:
            return image, torch.tensor(color_mean), label
        else:
            return image, label


In [7]:
import os
import glob
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import RandAugment
from PIL import Image
from sklearn.model_selection import train_test_split

# ✅ 실험 이름
EXPERIMENT = "Baseline_RandAug"

# ✅ 파일 리스트 로딩
file_list = glob.glob('/content/train/*/*.jpg')

# ✅ 클래스명 추출 함수
def extract_class_name_jpg(path):
    return os.path.basename(os.path.dirname(path))

# ✅ 클래스 인덱스 매핑
class_names = sorted(set(extract_class_name_jpg(f) for f in file_list))
class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}

print(f"✅ 클래스 수: {len(class_to_idx)}")  # 396 expected

# ✅ 라벨 리스트
labels = [class_to_idx[extract_class_name_jpg(f)] for f in file_list]

# ✅ Train/Val Split
train_files, val_files = train_test_split(file_list, test_size=0.1, stratify=labels, random_state=42)

# ✅ Step2 기준 Transform (기본 + RandAug 포함)
IMG_SIZE = 456
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    RandAugment(num_ops=2, magnitude=9),  # ✅ RandAug 추가
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

# ✅ Dataset 클래스
class CarImageDataset(Dataset):
    def __init__(self, file_list, class_to_idx, transform=None, use_aspect=False, use_color=False):
        self.file_list = file_list
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.use_aspect = use_aspect
        self.use_color = use_color

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        path = self.file_list[idx]

        # Load image
        image_pil = Image.open(path).convert("RGB")

        # Feature: aspect ratio
        width, height = image_pil.size
        aspect_ratio = torch.tensor([width / height], dtype=torch.float32)

        # Feature: RGB 평균값 (color mean)
        image_np = np.array(image_pil)
        color_mean = image_np.mean(axis=(0, 1)) / 255.0
        color_mean = torch.tensor(color_mean, dtype=torch.float32)

        # Transform
        assert self.transform is not None, "transform은 반드시 지정되어야 합니다"
        image = self.transform(image_pil)

        # Label
        class_name = extract_class_name_jpg(path)
        label = self.class_to_idx[class_name]

        # 반환 조건에 따라 조정
        if self.use_aspect and self.use_color:
            return image, aspect_ratio, color_mean, label
        elif self.use_aspect:
            return image, aspect_ratio, label
        elif self.use_color:
            return image, color_mean, label
        else:
            return image, label

# ✅ 실험 설정 (Step2: RandAugment)
USE_ASPECT = False
USE_COLOR = True

print(f"🚀 실험 설정: {EXPERIMENT} (Aspect={USE_ASPECT}, Color={USE_COLOR})")

# ✅ Dataset 및 DataLoader
train_dataset = CarImageDataset(train_files, class_to_idx, train_transform, use_aspect=USE_ASPECT, use_color=USE_COLOR)
val_dataset   = CarImageDataset(val_files, class_to_idx, val_transform, use_aspect=USE_ASPECT, use_color=USE_COLOR)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,
                          num_workers=4, pin_memory=True, persistent_workers=True, prefetch_factor=2)

val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False,
                        num_workers=4, pin_memory=True, persistent_workers=True, prefetch_factor=2)


✅ 클래스 수: 396
🚀 실험 설정: Baseline_RandAug (Aspect=False, Color=True)


In [8]:
import torch
import torch.nn as nn
import timm

# ✅ 실험명
EXPERIMENT = "Baseline_RandAug"

class CustomModel(nn.Module):
    def __init__(self, use_aspect, use_color, num_classes):
        super(CustomModel, self).__init__()

        self.use_aspect = use_aspect
        self.use_color = use_color

        # ✅ EfficientNet-B5 (no drop_connect here)
        self.backbone = timm.create_model(
            'tf_efficientnet_b5',
            pretrained=True,
            num_classes=0
        )
        backbone_out_features = self.backbone.num_features

        # ✅ 메타 feature 차원 계산
        meta_features_dim = 0
        if self.use_aspect:
            meta_features_dim += 1
        if self.use_color:
            meta_features_dim += 3

        # ✅ Classifier
        self.classifier = nn.Sequential(
            nn.Linear(backbone_out_features + meta_features_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(p=0.4),  # Step1의 Dropout 유지
            nn.Linear(512, num_classes)
        )

    def forward(self, image, aspect_ratio=None, color_mean=None):
        x = self.backbone(image)

        aux_list = []
        if self.use_aspect:
            aux_list.append(aspect_ratio)
        if self.use_color:
            aux_list.append(color_mean)

        if aux_list:
            aux_features = torch.cat(aux_list, dim=1)
            x = torch.cat([x, aux_features], dim=1)

        return self.classifier(x)

# ✅ 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ 실험 설정 (RandAug 실험)
USE_ASPECT = False
USE_COLOR = True
NUM_CLASSES = 396

# ✅ 모델 생성
model = CustomModel(USE_ASPECT, USE_COLOR, NUM_CLASSES).to(device)

# ✅ Step2 전략: backbone은 freeze, FC만 학습
for param in model.backbone.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/122M [00:00<?, ?B/s]

In [9]:
criterion = nn.CrossEntropyLoss()

# AdamW + weight_decay 추가 추천 (EffNet 계열에 많이 사용)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)


In [None]:
# EfficientNet-B5 with Color Feature + Dropout
# Strategy: Baseline + Dropout + EarlyStopping + KFold (5)

import os
import glob
import copy
import torch
import numpy as np
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
import torch.nn as nn
import timm

# ✅ Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ File paths and labels
file_list = glob.glob('/content/train/*/*.jpg')
def extract_class_name_jpg(path):
    return os.path.basename(os.path.dirname(path))

class_names = sorted(set(extract_class_name_jpg(f) for f in file_list))
class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}
labels = [class_to_idx[extract_class_name_jpg(f)] for f in file_list]

# ✅ Transforms
IMG_SIZE = 456
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ✅ Feature extractors
def compute_aspect_ratio(path):
    with Image.open(path) as img:
        w, h = img.size
        return w / h

def compute_dominant_color(path):
    with Image.open(path).convert("RGB") as img:
        img = img.resize((16, 16))
        np_img = np.array(img) / 255.0
        return np_img.mean(axis=(0, 1))

# ✅ Dataset
class CarJPGDataset(Dataset):
    def __init__(self, file_list, class_to_idx, transform=None, use_aspect=False, use_color=False):
        self.file_list = file_list
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.use_aspect = use_aspect
        self.use_color = use_color

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        path = self.file_list[idx]
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = self.class_to_idx[extract_class_name_jpg(path)]

        meta_features = []
        if self.use_aspect:
            meta_features.append(compute_aspect_ratio(path))
        if self.use_color:
            meta_features.extend(compute_dominant_color(path).tolist())

        meta_features = torch.tensor(meta_features if meta_features else [0.0, 0.0, 0.0], dtype=torch.float32)
        return image, meta_features, label

# ✅ Model
class CustomModel(nn.Module):
    def __init__(self, use_aspect, use_color, num_classes):
        super().__init__()
        self.use_aspect = use_aspect
        self.use_color = use_color

        self.backbone = timm.create_model('tf_efficientnet_b5', pretrained=True, num_classes=0)
        for param in self.backbone.parameters():
            param.requires_grad = False

        meta_dim = (1 if use_aspect else 0) + (3 if use_color else 0)
        self.classifier = nn.Sequential(
            nn.Linear(self.backbone.num_features + meta_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )

    def forward(self, image, aspect_ratio=None, color_mean=None):
        x = self.backbone(image)
        aux = []
        if self.use_aspect:
            aux.append(aspect_ratio)
        if self.use_color:
            aux.append(color_mean)
        if aux:
            x = torch.cat([x] + aux, dim=1)
        return self.classifier(x)

# ✅ Configs
EXPERIMENT = "C"
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
use_aspect = False
use_color = True

# ✅ Training loop
for fold, (train_idx, val_idx) in enumerate(skf.split(file_list, labels)):
    print(f"\n📂 Fold {fold + 1}/5")

    train_files = [file_list[i] for i in train_idx]
    val_files = [file_list[i] for i in val_idx]

    train_dataset = CarJPGDataset(train_files, class_to_idx, train_transform, use_aspect, use_color)
    val_dataset = CarJPGDataset(val_files, class_to_idx, val_transform, use_aspect, use_color)

    train_loader = DataLoader(train_dataset, batch_size=96, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=96, shuffle=False, num_workers=4, pin_memory=True)

    model = CustomModel(use_aspect, use_color, num_classes=396).to(device)
    for param in model.classifier.parameters():
        param.requires_grad = True

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

    best_val_loss = float('inf')
    patience = 3
    patience_counter = 0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(1, 31):
        print(f"\n🔄 Epoch {epoch}")

        model.train()
        train_loss, train_correct = 0.0, 0
        for X, meta, y in tqdm(train_loader, desc="Train", leave=False):
            X, meta, y = X.to(device), meta.to(device), y.to(device)
            outputs = model(X, color_mean=meta)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * X.size(0)
            train_correct += (outputs.argmax(1) == y).sum().item()

        train_loss /= len(train_loader.dataset)
        train_acc = train_correct / len(train_loader.dataset)

        model.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for X, meta, y in tqdm(val_loader, desc="Valid", leave=False):
                X, meta, y = X.to(device), meta.to(device), y.to(device)
                outputs = model(X, color_mean=meta)
                loss = criterion(outputs, y)
                val_loss += loss.item() * X.size(0)
                val_correct += (outputs.argmax(1) == y).sum().item()

        val_loss /= len(val_loader.dataset)
        val_acc = val_correct / len(val_loader.dataset)

        print(f"✅ Fold {fold+1} | Epoch {epoch} | Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
        print(f"✅ Fold {fold+1} | Epoch {epoch} | Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            save_path = f"/content/drive/MyDrive/team_models/EffNetB5_{EXPERIMENT}_fold{fold+1}.pth"
            torch.save(model.state_dict(), save_path)
            print(f"📦 Model saved for Fold {fold+1}")
            patience_counter = 0
        else:
            patience_counter += 1
            print(f"⚠️ EarlyStopping patience: {patience_counter}/{patience}")
            if patience_counter >= patience:
                print("⛔ Early stopping")
                break

    model.load_state_dict(best_model_wts)
    print(f"🎯 Best model for Fold {fold+1} loaded.")



📂 Fold 1/5

🔄 Epoch 1


Train:  23%|██▎       | 65/277 [02:33<08:16,  2.34s/it]

In [None]:
import os
import numpy as np
import torch
import torch.nn.functional as F
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import timm
from torchvision import transforms
from PIL import Image

# ✅ 고정 경로
TEST_DIR = "/content/test"
SAMPLE_SUB_PATH = "/content/sample_submission.csv"
NUM_CLASSES = 396

# ✅ 샘플 제출 파일에서 클래스명 추출
sample = pd.read_csv(SAMPLE_SUB_PATH)
column_names = sample.columns.tolist()[1:]  # 'ID' 제외

# ✅ Transform (Step2 기준: 456x456 + RandAugment)
transform = transforms.Compose([
    transforms.Resize((456, 456)),
    transforms.RandAugment(),  # Step2 핵심 추가
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ✅ Dominant Color 계산 함수
def compute_dominant_color(path):
    with Image.open(path).convert("RGB") as img:
        img = img.resize((16, 16))
        np_img = np.array(img) / 255.0
        mean_color = np_img.mean(axis=(0, 1))
        return mean_color  # (3,)

# ✅ 테스트용 Dataset
class TestJPGDatasetWithColor(Dataset):
    def __init__(self, img_root, transform=None):
        self.file_list = sorted([
            os.path.join(img_root, f) for f in os.listdir(img_root) if f.endswith('.jpg')
        ])
        self.transform = transform

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        path = self.file_list[idx]
        image = Image.open(path).convert('RGB')
        color_mean = compute_dominant_color(path)
        color_mean = torch.tensor(color_mean, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        fname = os.path.basename(path).replace(".jpg", "")
        return image, color_mean, fname

# ✅ DataLoader 설정
test_dataset = TestJPGDatasetWithColor(TEST_DIR, transform)
test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=6,
    pin_memory=True,
    prefetch_factor=4
)

# ✅ 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ 실험명 및 모델 정의 import
exp_name = "step2"  # 변경됨
from model import CustomModel  # 반드시 학습에 사용한 CustomModel과 동일하게 정의되어 있어야 함

FOLD_MODEL_PATHS = [
    f"/content/drive/MyDrive/team_models/EffNetB5_{exp_name}_fold{fold}.pth"
    for fold in range(1, 6)
]

ensemble_outputs = []

for fold_idx, model_path in enumerate(FOLD_MODEL_PATHS):
    print(f"\n🚀 Inference with Fold {fold_idx + 1} Model: {model_path}")

    model = CustomModel(use_aspect=False, use_color=True, num_classes=NUM_CLASSES)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    fold_probs = []
    with torch.no_grad():
        for imgs, color_mean, names in tqdm(test_loader, desc=f"🔍 Fold {fold_idx + 1} Inference"):
            imgs = imgs.to(device)
            color_mean = color_mean.to(device)
            outputs = model(imgs, color_mean=color_mean)
            probs = F.softmax(outputs, dim=1)
            fold_probs.append(probs.cpu().numpy())

    fold_probs = np.concatenate(fold_probs, axis=0)
    ensemble_outputs.append(fold_probs)

# ✅ 앙상블 평균 결과 저장
ensemble_outputs = np.mean(np.stack(ensemble_outputs, axis=0), axis=0)

results = []
for idx, path in enumerate(test_dataset.file_list):
    fname = os.path.basename(path).replace(".jpg", "")
    row = {"ID": fname}
    row.update({class_name: ensemble_outputs[idx, i] for i, class_name in enumerate(column_names)})
    results.append(row)

submission_df = pd.DataFrame(results)
submission_df = submission_df[["ID"] + column_names]

SAVE_SUBMISSION_PATH = f"/content/drive/MyDrive/team_models/submission_effb5_step2.csv"
submission_df.to_csv(SAVE_SUBMISSION_PATH, index=False)
print(f"\n✅ 앙상블 서브미션 저장 완료: {SAVE_SUBMISSION_PATH}")
