### Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2
import timm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

# 이미지 증량 및 처리를 위한 라이브러리 ( albumentations )
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from albumentations.core.transforms_interface import ImageOnlyTransform
import torchvision.models as models


# sklearn 
from sklearn.model_selection import train_test_split # train test Split 
from sklearn import preprocessing
from sklearn.metrics import f1_score # f1 score 
from sklearn.metrics import classification_report
from tqdm.auto import tqdm 

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
# device = "cpu"

In [3]:
CFG = {
    'IMG_SIZE':448,
    'EPOCHS':30,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':8,
    'SEED':1042
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### Data Load

In [5]:
all_img_list = glob.glob('../data/train/*/*')

In [7]:
df = pd.DataFrame(columns=['img_path', 'rock_type'])
df['img_path'] = all_img_list
df['rock_type'] = df['img_path'].apply(lambda x : str(x).split('/')[3])

In [16]:
# ETC 데이터 증량 
df = pd.concat([df,df[df['rock_type' ] =="Etc"]],axis=0)

In [17]:
df["rock_type"].value_counts()

rock_type
Granite           92923
Mud_Sandstone     89467
Gneiss            73914
Andesite          43802
Weathered_Rock    37169
Etc               31870
Basalt            26810
Name: count, dtype: int64

In [18]:
df = df.sample(50000 , random_state= 42)

In [19]:
train, val, _, _ = train_test_split(df, df['rock_type'], test_size=0.3, stratify=df['rock_type'], random_state=CFG['SEED'])

In [20]:
df["rock_type"].value_counts(normalize=True)

rock_type
Granite           0.23510
Mud_Sandstone     0.22570
Gneiss            0.18912
Andesite          0.10816
Weathered_Rock    0.09172
Etc               0.08170
Basalt            0.06850
Name: proportion, dtype: float64

In [21]:
le = preprocessing.LabelEncoder()
train['rock_type'] = le.fit_transform(train['rock_type'])
val['rock_type'] = le.transform(val['rock_type'])

# 각 라벨에 대한 인코딩 값 확인
label_mapping = dict(zip(le.classes_, range(len(le.classes_))))
print("Label to encoded value mapping:", label_mapping)

Label to encoded value mapping: {'Andesite': 0, 'Basalt': 1, 'Etc': 2, 'Gneiss': 3, 'Granite': 4, 'Mud_Sandstone': 5, 'Weathered_Rock': 6}


### Data Preprocessing

In [22]:
class PadSquare(ImageOnlyTransform):
    def __init__(self, border_mode=0, value=0, always_apply=False, p=1.0):
        super().__init__(always_apply, p)
        self.border_mode = border_mode
        self.value = value

    def apply(self, image, **params):
        h, w, c = image.shape
        max_dim = max(h, w)
        pad_h = max_dim - h
        pad_w = max_dim - w
        top = pad_h // 2
        bottom = pad_h - top
        left = pad_w // 2
        right = pad_w - left
        image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=self.value)
        return image

    def get_transform_init_args_names(self):
        return ("border_mode", "value")

In [23]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [24]:
model_name = 'eva02_large_patch14_448.mim_m38m_ft_in22k_in1k'
model = timm.create_model(model_name, pretrained=True).to(device)

In [25]:
from timm.data import resolve_data_config
config = resolve_data_config({}, model=model)
CFG['mean'] =config['mean']
CFG['std'] = config['std']
# CFG.interpolation = config.interpolation

In [26]:
train_transform = A.Compose([
    # PadSquare(value=(0, 0, 0)),
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE'] ,interpolation=cv2.INTER_CUBIC),
    A.Affine(rotate=(-360,360),shear={"x": (-10, 10), "y": (-10, 10)}, border_mode = 1,p = 1 ),
    A.GridDistortion(num_steps=5, distort_limit=0.2, p= 0.5),
    A.Morphological(scale = (1,3), operation="erosion",p = 0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p = 0.5),
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.CoarseDropout(num_holes_range=(3, 5) , p = 0.5 ),
    A.RandomResizedCrop( size = (CFG['IMG_SIZE'], CFG['IMG_SIZE']), scale = (0.7,1),ratio=(0.75, 1.33), p=0.5),  # Random zoom effect
    A.Normalize(mean=CFG['mean'], std=CFG['std']),
    ToTensorV2()
])

test_transform = A.Compose([
    # PadSquare(value=(0, 0, 0)),
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE'] ,interpolation=cv2.INTER_CUBIC),
    A.Normalize(mean=CFG['mean'], std=CFG['std']),
    ToTensorV2()
])

In [27]:
train_dataset = CustomDataset(train['img_path'].values, train['rock_type'].values, train_transform)
# train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=4,pin_memory=True,prefetch_factor=2)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['rock_type'].values, test_transform)
# val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=4,pin_memory=True,prefetch_factor=2)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

### Train

In [28]:
def train(model, optimizer, train_loader, val_loader, scheduler, device , patience = 5 ):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_score = 0
    early_stop_counter = 0
    best_model = None
    save_path = f"best_model_{model_name}.pth"


    for epoch in range(1, CFG['EPOCHS'] + 1):
        model.train()
        train_loss = []

        for imgs, labels in tqdm(iter(train_loader), desc=f"Epoch {epoch}"):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.LongTensor)
            labels = labels.to(device)

            optimizer.zero_grad()
            output = model(imgs)
            loss = criterion(output, labels)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device, epoch)
        _train_loss = np.mean(train_loss)

        print(f'Epoch [{epoch}], Train Loss: {_train_loss:.5f}, Val Loss: {_val_loss:.5f}, Val Macro F1: {_val_score:.5f}')
        torch.save(model.state_dict(), f"1_{epoch}_{model_name}_{_val_loss:.5f}.pth")
        if scheduler is not None:
            scheduler.step(_val_score)

        if best_score < _val_score:
            early_stop_counter = 0
            best_score = _val_score
            best_model = model

            # 모델 가중치 저장
            torch.save(model.state_dict(), f"1_best_{epoch}_{model_name}_{_val_loss:.5f}.pth")
            print(f"Best model saved (epoch {epoch}, F1={_val_score:.4f}) → {save_path}")
        else:
            early_stop_counter += 1
            print(f"No improvement for {early_stop_counter} epoch(s)")

            if early_stop_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch}")
                break
            
    return best_model

In [29]:
def validation(model, criterion, val_loader, device , epoch):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.LongTensor)
            labels = labels.to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='macro')

        # 예측 결과와 실제 값을 pandas DataFrame으로 생성
        # Calculate F1 score for each label
        f1_per_class = f1_score(true_labels, preds, average=None)

        # Create a DataFrame for label-wise F1 scores
        f1_df = pd.DataFrame({
            'Label': le.classes_,
            'F1 Score': f1_per_class
        })

        # Save the F1 scores to a CSV file
        f1_df.to_csv(f"{epoch}_f1_scores_per_label_1.csv", index=False)
        print(f"F1 scores per label saved to {epoch}_f1_scores_per_label.csv")

        # Save the validation results
        results_df = pd.DataFrame({
            'gt': true_labels,
            'pred': preds
        })
        
        results_df.to_csv(f"validation_results_1_{epoch}.csv", index=False)
        print(f"Validation results saved to validation_results_{epoch}.csv")

        # # CSV 파일로 저장
        # results_df.to_csv(f"validation_results_{epoch}.csv", index=False)
        # print(f"Validation results saved to validation_results_{epoch}.csv")
        
    return _val_loss, _val_score

In [30]:
class BaseModel(nn.Module):
    def __init__(self, model_name ,num_classes=len(le.classes_)):
        super(BaseModel, self).__init__()
        self.backbone = timm.create_model(model_name, pretrained=True).to(device)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [31]:

from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR
model = BaseModel(model_name)
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"], weight_decay=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8)
# scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)
# scheduler = CosineAnnealingLR(optimizer, T_max=CFG['EPOCHS'], eta_min=1e-8)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Epoch 1: 100%|██████████| 4375/4375 [1:02:22<00:00,  1.17it/s]
100%|██████████| 1875/1875 [10:47<00:00,  2.90it/s]


F1 scores per label saved to 1_f1_scores_per_label.csv
Validation results saved to validation_results_1.csv
Epoch [1], Train Loss: 0.73012, Val Loss: 0.57586, Val Macro F1: 0.76523
Best model saved (epoch 1, F1=0.7652) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 2: 100%|██████████| 4375/4375 [51:17<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:00<00:00,  4.46it/s]


F1 scores per label saved to 2_f1_scores_per_label.csv
Validation results saved to validation_results_2.csv
Epoch [2], Train Loss: 0.54176, Val Loss: 0.51587, Val Macro F1: 0.78426
Best model saved (epoch 2, F1=0.7843) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 3: 100%|██████████| 4375/4375 [51:18<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:00<00:00,  4.46it/s]


F1 scores per label saved to 3_f1_scores_per_label.csv
Validation results saved to validation_results_3.csv
Epoch [3], Train Loss: 0.44931, Val Loss: 0.51448, Val Macro F1: 0.80545
Best model saved (epoch 3, F1=0.8055) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 4: 100%|██████████| 4375/4375 [51:19<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:00<00:00,  4.46it/s]


F1 scores per label saved to 4_f1_scores_per_label.csv
Validation results saved to validation_results_4.csv
Epoch [4], Train Loss: 0.36091, Val Loss: 0.45141, Val Macro F1: 0.81909
Best model saved (epoch 4, F1=0.8191) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 5: 100%|██████████| 4375/4375 [51:19<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:01<00:00,  4.45it/s]


F1 scores per label saved to 5_f1_scores_per_label.csv
Validation results saved to validation_results_5.csv
Epoch [5], Train Loss: 0.29291, Val Loss: 0.41403, Val Macro F1: 0.84303
Best model saved (epoch 5, F1=0.8430) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 6: 100%|██████████| 4375/4375 [51:20<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:00<00:00,  4.46it/s]


F1 scores per label saved to 6_f1_scores_per_label.csv
Validation results saved to validation_results_6.csv
Epoch [6], Train Loss: 0.22514, Val Loss: 0.49438, Val Macro F1: 0.82149
No improvement for 1 epoch(s)


Epoch 7: 100%|██████████| 4375/4375 [51:20<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:00<00:00,  4.45it/s]


F1 scores per label saved to 7_f1_scores_per_label.csv
Validation results saved to validation_results_7.csv
Epoch [7], Train Loss: 0.17704, Val Loss: 0.46409, Val Macro F1: 0.84124
No improvement for 2 epoch(s)


Epoch 8: 100%|██████████| 4375/4375 [51:20<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:00<00:00,  4.46it/s]


F1 scores per label saved to 8_f1_scores_per_label.csv
Validation results saved to validation_results_8.csv
Epoch [8], Train Loss: 0.14627, Val Loss: 0.47883, Val Macro F1: 0.84488
Best model saved (epoch 8, F1=0.8449) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 9: 100%|██████████| 4375/4375 [51:24<00:00,  1.42it/s]
100%|██████████| 1875/1875 [07:01<00:00,  4.45it/s]


F1 scores per label saved to 9_f1_scores_per_label.csv
Validation results saved to validation_results_9.csv
Epoch [9], Train Loss: 0.12682, Val Loss: 0.52542, Val Macro F1: 0.84109
No improvement for 3 epoch(s)


Epoch 10: 100%|██████████| 4375/4375 [51:41<00:00,  1.41it/s]
100%|██████████| 1875/1875 [07:21<00:00,  4.24it/s]


F1 scores per label saved to 10_f1_scores_per_label.csv
Validation results saved to validation_results_10.csv
Epoch [10], Train Loss: 0.10083, Val Loss: 0.53524, Val Macro F1: 0.83999
No improvement for 4 epoch(s)


Epoch 11: 100%|██████████| 4375/4375 [51:46<00:00,  1.41it/s]
100%|██████████| 1875/1875 [07:09<00:00,  4.37it/s]


F1 scores per label saved to 11_f1_scores_per_label.csv
Validation results saved to validation_results_11.csv
Epoch [11], Train Loss: 0.10034, Val Loss: 0.51605, Val Macro F1: 0.84611
Best model saved (epoch 11, F1=0.8461) → best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth


Epoch 12: 100%|██████████| 4375/4375 [52:11<00:00,  1.40it/s]
100%|██████████| 1875/1875 [07:11<00:00,  4.35it/s]


F1 scores per label saved to 12_f1_scores_per_label.csv
Validation results saved to validation_results_12.csv
Epoch [12], Train Loss: 0.08771, Val Loss: 0.60776, Val Macro F1: 0.83512
No improvement for 5 epoch(s)
Early stopping triggered at epoch 12


### Inference

In [32]:
test = pd.read_csv('../data/test.csv')
test["img_path"] = test["img_path"].apply(lambda x : "../data/test/"+x.split("/")[-1])

In [33]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [34]:
# #model load
# load_model = timm.create_model(model_name, pretrained=False).to(device)
# load_model.load_state_dict(torch.load("/mnt/sdb/sim/dacon/ipynb_files/best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth"))
# load_model.eval()  # 평가 모드로 전환 (옵션)
load_model = infer_model
infer_model.eval()

BaseModel(
  (backbone): Eva(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (rope): RotaryEmbeddingCat()
    (blocks): ModuleList(
      (0-23): 24 x EvaBlock(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): EvaAttention(
          (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (norm): Identity()
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path1): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): SwiGLU(
          (fc1_g): Linear(in_fe

In [35]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    logits = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            logits.append(pred)
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds , logits

In [36]:
preds , logits = inference(load_model, test_loader, device)

100%|██████████| 11876/11876 [1:06:12<00:00,  2.99it/s]


### Submission

In [38]:
submit = pd.read_csv('/mnt/hdd1/sim/dacon/data/sample_submission.csv')

In [39]:
submit['rock_type'] = preds

In [40]:
submit.to_csv('./1_30epoch_5만_baseline_submit_eva02_448.csv', index=False)

# validation F1 Score Matrix

In [None]:
# import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# f1_scores_per_label.csv 파일을 읽기
epoch = 1 
f1_df = pd.read_csv(f"{epoch}_f1_scores_per_label.csv")

# Plotting F1 scores for each label
plt.figure(figsize=(10, 6))
sns.barplot(x='F1 Score', y='Label', data=f1_df, palette='viridis')
plt.title("F1 Score per Label")
plt.xlabel("F1 Score")
plt.ylabel("Label")
plt.show()