### Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2
import timm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

# 이미지 증량 및 처리를 위한 라이브러리 ( albumentations )
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from albumentations.core.transforms_interface import ImageOnlyTransform
import torchvision.models as models


# sklearn 
from sklearn.model_selection import train_test_split # train test Split 
from sklearn import preprocessing
from sklearn.metrics import f1_score # f1 score 
from sklearn.metrics import classification_report
from tqdm.auto import tqdm 

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')
# device = "cpu"

In [3]:
CFG = {
    'IMG_SIZE':448,
    'EPOCHS':30,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':4,
    'SEED':1042
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### Data Load

In [5]:
all_img_list = glob.glob('../data/train/*/*')

In [6]:
df = pd.DataFrame(columns=['img_path', 'rock_type'])
df['img_path'] = all_img_list
df['rock_type'] = df['img_path'].apply(lambda x : str(x).split('\\')[1])

In [7]:
df["rock_type"].value_counts()

rock_type
Granite           92923
Mud_Sandstone     89467
Gneiss            73914
Andesite          43802
Weathered_Rock    37169
Basalt            26810
Etc               15935
Name: count, dtype: int64

In [8]:
# df = df.sample(300 , random_state= 42)

In [9]:
train, val, _, _ = train_test_split(df, df['rock_type'], test_size=0.3, stratify=df['rock_type'], random_state=CFG['SEED'])

In [10]:
df["rock_type"].value_counts(normalize=True)

rock_type
Granite           0.244521
Mud_Sandstone     0.235427
Gneiss            0.194500
Andesite          0.115262
Weathered_Rock    0.097808
Basalt            0.070549
Etc               0.041932
Name: proportion, dtype: float64

In [11]:
le = preprocessing.LabelEncoder()
train['rock_type'] = le.fit_transform(train['rock_type'])
val['rock_type'] = le.transform(val['rock_type'])

# 각 라벨에 대한 인코딩 값 확인
label_mapping = dict(zip(le.classes_, range(len(le.classes_))))
print("Label to encoded value mapping:", label_mapping)

Label to encoded value mapping: {'Andesite': 0, 'Basalt': 1, 'Etc': 2, 'Gneiss': 3, 'Granite': 4, 'Mud_Sandstone': 5, 'Weathered_Rock': 6}


### Data Preprocessing

In [12]:
class PadSquare(ImageOnlyTransform):
    def __init__(self, border_mode=0, value=0, always_apply=False, p=1.0):
        super().__init__(always_apply, p)
        self.border_mode = border_mode
        self.value = value

    def apply(self, image, **params):
        h, w, c = image.shape
        max_dim = max(h, w)
        pad_h = max_dim - h
        pad_w = max_dim - w
        top = pad_h // 2
        bottom = pad_h - top
        left = pad_w // 2
        right = pad_w - left
        image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=self.value)
        return image

    def get_transform_init_args_names(self):
        return ("border_mode", "value")

In [13]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [14]:
model_name = 'eva02_large_patch14_448.mim_m38m_ft_in22k_in1k'
model = timm.create_model(model_name, pretrained=True).to(device)

In [15]:
from timm.data import resolve_data_config
config = resolve_data_config({}, model=model)
CFG['mean'] =config['mean']
CFG['std'] = config['std']
# CFG.interpolation = config.interpolation

In [16]:
train_transform = A.Compose([
    # PadSquare(value=(0, 0, 0)),
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE'] ,interpolation=cv2.INTER_CUBIC),
    A.Affine(rotate=(-360,360),shear={"x": (-10, 10), "y": (-10, 10)}, border_mode = 1,p = 1 ),
    A.GridDistortion(num_steps=5, distort_limit=0.2, p= 0.5),
    A.Morphological(scale = (1,3), operation="erosion",p = 0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p = 0.5),
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.CoarseDropout(num_holes_range=(3, 5) , p = 0.5 ),
    A.RandomResizedCrop( size = (CFG['IMG_SIZE'], CFG['IMG_SIZE']), scale = (0.7,1),ratio=(0.75, 1.33), p=0.5),  # Random zoom effect
    A.Normalize(mean=CFG['mean'], std=CFG['std']),
    ToTensorV2()
])

test_transform = A.Compose([
    # PadSquare(value=(0, 0, 0)),
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE'] ,interpolation=cv2.INTER_CUBIC),
    A.Normalize(mean=CFG['mean'], std=CFG['std']),
    ToTensorV2()
])

In [17]:
train_dataset = CustomDataset(train['img_path'].values, train['rock_type'].values, train_transform)
# train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=4,pin_memory=True,prefetch_factor=2)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['rock_type'].values, test_transform)
# val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=4,pin_memory=True,prefetch_factor=2)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

### Train

In [18]:
def train(model, optimizer, train_loader, val_loader, scheduler, device , patience = 5 ):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_score = 0
    early_stop_counter = 0
    best_model = None
    save_path = f"best_model_{model_name}.pth"


    for epoch in range(1, CFG['EPOCHS'] + 1):
        model.train()
        train_loss = []

        for imgs, labels in tqdm(iter(train_loader), desc=f"Epoch {epoch}"):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.LongTensor)
            labels = labels.to(device)

            optimizer.zero_grad()
            output = model(imgs)
            loss = criterion(output, labels)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device, epoch)
        _train_loss = np.mean(train_loss)

        print(f'Epoch [{epoch}], Train Loss: {_train_loss:.5f}, Val Loss: {_val_loss:.5f}, Val Macro F1: {_val_score:.5f}')
        torch.save(model.state_dict(), f"{epoch}_{model_name}_{_val_loss:.5f}.pth")
        if scheduler is not None:
            scheduler.step(_val_score)

        if best_score < _val_score:
            best_score = _val_score
            best_model = model

            # 모델 가중치 저장
            torch.save(model.state_dict(), f"best_{epoch}_{model_name}_{_val_loss:.5f}.pth")
            print(f"Best model saved (epoch {epoch}, F1={_val_score:.4f}) → {save_path}")
        else:
            early_stop_counter += 1
            print(f"No improvement for {early_stop_counter} epoch(s)")

            if early_stop_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch}")
                break
            
    return best_model

In [19]:
def validation(model, criterion, val_loader, device , epoch):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.LongTensor)
            labels = labels.to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='macro')

        # 예측 결과와 실제 값을 pandas DataFrame으로 생성
        # Calculate F1 score for each label
        f1_per_class = f1_score(true_labels, preds, average=None)

        # Create a DataFrame for label-wise F1 scores
        f1_df = pd.DataFrame({
            'Label': le.classes_,
            'F1 Score': f1_per_class
        })

        # Save the F1 scores to a CSV file
        f1_df.to_csv(f"{epoch}_f1_scores_per_label.csv", index=False)
        print(f"F1 scores per label saved to {epoch}_f1_scores_per_label.csv")

        # Save the validation results
        results_df = pd.DataFrame({
            'gt': true_labels,
            'pred': preds
        })
        
        results_df.to_csv(f"validation_results_{epoch}.csv", index=False)
        print(f"Validation results saved to validation_results_{epoch}.csv")

        # # CSV 파일로 저장
        # results_df.to_csv(f"validation_results_{epoch}.csv", index=False)
        # print(f"Validation results saved to validation_results_{epoch}.csv")
        
    return _val_loss, _val_score

In [20]:
class BaseModel(nn.Module):
    def __init__(self, model_name ,num_classes=len(le.classes_)):
        super(BaseModel, self).__init__()
        self.backbone = timm.create_model(model_name, pretrained=True).to(device)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [None]:

from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR
model = BaseModel(model_name)
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"], weight_decay=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8)
# scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)
# scheduler = CosineAnnealingLR(optimizer, T_max=CFG['EPOCHS'], eta_min=1e-8)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Epoch 1:   0%|          | 12/66504 [00:21<31:31:31,  1.71s/it]

### Inference

In [21]:
test = pd.read_csv('../data/test.csv')
test["img_path"] = test["img_path"].apply(lambda x : "../data/test/"+x.split("/")[-1])

In [22]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [23]:
# #model load
# load_model = timm.create_model(model_name, pretrained=False).to(device)
# load_model.load_state_dict(torch.load("/mnt/sdb/sim/dacon/ipynb_files/best_model_eva02_large_patch14_448.mim_m38m_ft_in22k_in1k.pth"))
# load_model.eval()  # 평가 모드로 전환 (옵션)
load_model = infer_model
infer_model.eval()

Eva(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (rope): RotaryEmbeddingCat()
  (blocks): ModuleList(
    (0-23): 24 x EvaBlock(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): EvaAttention(
        (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
        (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): SwiGLU(
        (fc1_g): Linear(in_features=1024, out_features=2730, bias=True)
        (fc1_x): Linear(in

In [24]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds

In [25]:
preds = inference(load_model, test_loader, device)

  0%|          | 0/11876 [00:00<?, ?it/s]

100%|██████████| 11876/11876 [48:42<00:00,  4.06it/s]


### Submission

In [26]:
submit = pd.read_csv('/mnt/sdb/sim/dacon/data/sample_submission.csv')

In [27]:
submit['rock_type'] = preds

In [28]:
submit.to_csv('./5만_baseline_submit_eva02_448.csv', index=False)

In [29]:
submit

Unnamed: 0,ID,rock_type
0,TEST_00000,Mud_Sandstone
1,TEST_00001,Mud_Sandstone
2,TEST_00002,Mud_Sandstone
3,TEST_00003,Granite
4,TEST_00004,Granite
...,...,...
95001,TEST_95001,Gneiss
95002,TEST_95002,Gneiss
95003,TEST_95003,Gneiss
95004,TEST_95004,Gneiss


: 

# validation F1 Score Matrix

In [None]:
# import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# f1_scores_per_label.csv 파일을 읽기
epoch = 1 
f1_df = pd.read_csv(f"{epoch}_f1_scores_per_label.csv")

# Plotting F1 scores for each label
plt.figure(figsize=(10, 6))
sns.barplot(x='F1 Score', y='Label', data=f1_df, palette='viridis')
plt.title("F1 Score per Label")
plt.xlabel("F1 Score")
plt.ylabel("Label")
plt.show()