### Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2
import timm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

# 이미지 증량 및 처리를 위한 라이브러리 ( albumentations )
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from albumentations.core.transforms_interface import ImageOnlyTransform
import torchvision.models as models


# sklearn 
from sklearn.model_selection import train_test_split # train test Split 
from sklearn import preprocessing
from sklearn.metrics import f1_score # f1 score 
from sklearn.metrics import classification_report
from tqdm.auto import tqdm 

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')
# device = "cpu"

In [3]:
CFG = {
    'IMG_SIZE':448,
    'EPOCHS':50,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':8,
    'SEED':1042
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### Data Load

In [5]:
all_img_list = glob.glob('../data/train/*/*')

In [6]:
df = pd.DataFrame(columns=['img_path', 'rock_type'])
df['img_path'] = all_img_list
df['rock_type'] = df['img_path'].apply(lambda x : str(x).split('/')[3])

In [7]:
df["rock_type"].value_counts()

rock_type
Granite           92923
Mud_Sandstone     89467
Gneiss            73914
Andesite          43802
Weathered_Rock    37169
Basalt            26810
Etc               15935
Name: count, dtype: int64

In [8]:
df = df.sample(5000)

In [9]:
df

Unnamed: 0,img_path,rock_type
320509,../data/train/Gneiss/TRAIN_11986.jpg,Gneiss
250677,../data/train/Mud_Sandstone/TRAIN_71853.jpg,Mud_Sandstone
328030,../data/train/Gneiss/TRAIN_15959.jpg,Gneiss
9472,../data/train/Andesite/TRAIN_42719.jpg,Andesite
7622,../data/train/Andesite/TRAIN_26090.jpg,Andesite
...,...,...
22602,../data/train/Andesite/TRAIN_02037.jpg,Andesite
174719,../data/train/Weathered_Rock/TRAIN_02804.jpg,Weathered_Rock
357512,../data/train/Gneiss/TRAIN_32934.jpg,Gneiss
263260,../data/train/Mud_Sandstone/TRAIN_51603.jpg,Mud_Sandstone


In [10]:
# 
train, val, _, _ = train_test_split(df, df['rock_type'], test_size=0.3, stratify=df['rock_type'], random_state=CFG['SEED'])

In [11]:
df["rock_type"].value_counts(normalize=True)

rock_type
Granite           0.2354
Mud_Sandstone     0.2332
Gneiss            0.1980
Andesite          0.1240
Weathered_Rock    0.0968
Basalt            0.0694
Etc               0.0432
Name: proportion, dtype: float64

In [12]:
le = preprocessing.LabelEncoder()
train['rock_type'] = le.fit_transform(train['rock_type'])
val['rock_type'] = le.transform(val['rock_type'])

### Data Preprocessing

In [13]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [14]:
model_name = 'eva02_large_patch14_448.mim_m38m_ft_in22k_in1k'
model = timm.create_model(model_name, pretrained=True).to(device)

In [15]:
from timm.data import resolve_data_config
config = resolve_data_config({}, model=model)
CFG['mean'] =config['mean']
CFG['std'] = config['std']
# CFG.interpolation = config.interpolation

In [16]:
train_transform = A.Compose([
    # PadSquare(value=(0, 0, 0)),
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE'] ,interpolation=cv2.INTER_CUBIC),
    A.Normalize(mean=CFG['mean'], std=CFG['std']),
    ToTensorV2()
])

test_transform = A.Compose([
    # PadSquare(value=(0, 0, 0)),
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE'] ,interpolation=cv2.INTER_CUBIC),
    A.Normalize(mean=CFG['mean'], std=CFG['std']),
    ToTensorV2()
])

In [17]:
train_dataset = CustomDataset(train['img_path'].values, train['rock_type'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=4,pin_memory=True,prefetch_factor=2)

val_dataset = CustomDataset(val['img_path'].values, val['rock_type'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=4,pin_memory=True,prefetch_factor=2)

In [18]:
class BaseModel(nn.Module):
    def __init__(self, model_name , num_classes=len(le.classes_)):
        super(BaseModel, self).__init__()
        self.backbone = timm.create_model(model_name, pretrained=True).to(device)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

### Train

In [19]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_score = 0
    best_model = None
    save_path = f"best_model_{model_name}.pth"

    for epoch in range(1, CFG['EPOCHS'] + 1):
        model.train()
        train_loss = []

        for imgs, labels in tqdm(iter(train_loader), desc=f"Epoch {epoch}"):
            imgs = imgs.float().to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            output = model(imgs)
            # print("output")
            # print(output)
            # print("labels")
            # print(labels)

            loss = criterion(output, labels)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)

        print(f'Epoch [{epoch}], Train Loss: {_train_loss:.5f}, Val Loss: {_val_loss:.5f}, Val Macro F1: {_val_score:.5f}')
        torch.save(model.state_dict(), f"{epoch}_{model_name}_{_val_loss:.5f}.pth")
        if scheduler is not None:
            scheduler.step(_val_score)

        if best_score < _val_score:
            best_score = _val_score
            best_model = model

            # 모델 가중치 저장
            torch.save(model.state_dict(), f"best_{epoch}_{model_name}_{_val_loss:.5f}.pth")
            print(f"Best model saved (epoch {epoch}, F1={_val_score:.4f}) → {save_path}")

    return best_model

In [20]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='macro')
    
    return _val_loss, _val_score

### Inference

In [21]:
test = pd.read_csv('../data/test.csv')
test["img_path"] = test["img_path"].apply(lambda x : "../data/test/"+x.split("/")[-1])

In [22]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [23]:
model_name

'eva02_large_patch14_448.mim_m38m_ft_in22k_in1k'

In [24]:
#model load
load_model = BaseModel(model_name , 7)

# load_model = timm.create_model(model_name, pretrained=False).to(device)
load_model.load_state_dict(torch.load("/mnt/sdb/sim/dacon/DDP/3_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth"))
load_model.eval()  # 평가 모드로 전환 (옵션)


BaseModel(
  (backbone): Eva(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (rope): RotaryEmbeddingCat()
    (blocks): ModuleList(
      (0-23): 24 x EvaBlock(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): EvaAttention(
          (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (norm): Identity()
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path1): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): SwiGLU(
          (fc1_g): Linear(in_fe

In [25]:
load_model.cuda("cuda:1")

BaseModel(
  (backbone): Eva(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (rope): RotaryEmbeddingCat()
    (blocks): ModuleList(
      (0-23): 24 x EvaBlock(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): EvaAttention(
          (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (norm): Identity()
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path1): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): SwiGLU(
          (fc1_g): Linear(in_fe

In [26]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds

In [None]:
preds = inference(load_model, test_loader, device)
#  2%|▏         | 61/2969 [01:00<47:51,  1.01it/s]

  0%|          | 0/11876 [00:00<?, ?it/s]

  1%|          | 98/11876 [00:24<48:27,  4.05it/s] 

### Submission

In [None]:
submit = pd.read_csv('/mnt/sdb/sim/dacon/data/sample_submission.csv')

In [None]:
submit['rock_type'] = preds

In [None]:
submit.to_csv('./500개_05_submit_eva02_448.csv', index=False)