In [4]:
import os
import random
import numpy as np
import pandas as pd
from glob import glob
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.efficientnet import preprocess_input


In [5]:
# Config
CFG = {
    'IMG_SIZE': 380,
    'EPOCHS': 20,
    'LR': 3e-4,
    'BATCH_SIZE': 12,
    'SEED': 2025,
    'FOLDS': 5
}

# Seed 고정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(CFG['SEED'])

# 경로 설정
TRAIN_DIR = "D:/데이콘 250519 대회/open/train"
TEST_DIR = "D:/데이콘 250519 대회/open/test"
SAMPLE_SUB = "D:/데이콘 250519 대회/open/sample_submission.csv"


In [6]:
# 클래스 라벨 매핑
label_list = sorted(os.listdir(TRAIN_DIR))
label2id = {v: i for i, v in enumerate(label_list)}
id2label = {i: v for v, i in label2id.items()}

# 이미지 경로 및 라벨
image_paths = glob(os.path.join(TRAIN_DIR, '*', '*.jpg'))
labels = [label2id[os.path.basename(os.path.dirname(p))] for p in image_paths]


In [7]:
# 이미지 로딩 및 증강
def load_and_preprocess(img_path, augment=False):
    img = load_img(img_path, target_size=(CFG['IMG_SIZE'], CFG['IMG_SIZE']))
    img = img_to_array(img)
    img = preprocess_input(img)

    if augment:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.9, 1.1)
        img = tf.image.random_saturation(img, 0.9, 1.1)
        img = tf.image.random_hue(img, 0.02)

    return img


In [8]:
def create_dataset(image_paths, labels=None, is_train=True):
    def gen():
        for i, path in enumerate(image_paths):
            img = load_and_preprocess(path, augment=is_train)
            if labels is not None:
                yield img, labels[i]
            else:
                yield img

    if labels is not None:
        ds = tf.data.Dataset.from_generator(
            gen,
            output_types=(tf.float32, tf.int32),
            output_shapes=((CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3), ())
        )
    else:
        ds = tf.data.Dataset.from_generator(
            gen,
            output_types=tf.float32,
            output_shapes=(CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3)
        )

    if is_train:
        ds = ds.shuffle(1024)
    ds = ds.batch(CFG['BATCH_SIZE']).prefetch(tf.data.AUTOTUNE)
    return ds


In [9]:
def build_model(num_classes):
    base = tf.keras.applications.EfficientNetB4(
        include_top=False,
        input_shape=(CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3),
        weights='imagenet',
        pooling='avg'
    )
    x = layers.BatchNormalization()(base.output)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inputs=base.input, outputs=output)
    return model


In [11]:
from tensorflow.keras.applications import EfficientNetB3

def build_model(num_classes):
    base_model = EfficientNetB3(
        include_top=False,
        input_shape=(CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3),
        weights='imagenet',
        pooling='avg'
    )
    x = base_model.output
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs=base_model.input, outputs=outputs)
    return model



In [None]:
# 테스트 데이터셋
test_paths = sorted(glob(os.path.join(TEST_DIR, '*.jpg')))
test_ds = create_dataset(test_paths, is_train=False)

# 모델 앙상블 예측
preds_list = []
for fold, path in enumerate(model_paths):
    model = build_model(num_classes=len(label2id))
    model.load_weights(path)
    preds = model.predict(test_ds)
    preds_list.append(preds)

final_preds = np.mean(preds_list, axis=0)


In [None]:
submission = pd.read_csv(SAMPLE_SUB)
for idx, class_name in enumerate(label2id.keys()):
    submission[class_name] = final_preds[:, idx]

submission.to_csv("submission.csv", index=False)
print("submission.csv 저장 완료")


In [1]:
import os
import random
import numpy as np
import pandas as pd
from glob import glob
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.applications import EfficientNetB3

# Config
CFG = {
    'IMG_SIZE': 380,
    'EPOCHS': 20,
    'LR': 3e-4,
    'BATCH_SIZE': 12,
    'SEED': 2025,
    'FOLDS': 5
}

# Seed 고정 함수
def set_seed(seed=CFG['SEED']):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
set_seed()

# 경로 설정
TRAIN_DIR = "D:/데이콘 250519 대회/open/train"
TEST_DIR = "D:/데이콘 250519 대회/open/test"
SAMPLE_SUB = "D:/데이콘 250519 대회/open/sample_submission.csv"
SAVE_DIR = "D:/데이콘 250519 대회/models"
os.makedirs(SAVE_DIR, exist_ok=True)

# 클래스 라벨 매핑
label_list = sorted(os.listdir(TRAIN_DIR))
label2id = {v: i for i, v in enumerate(label_list)}
id2label = {i: v for v, i in label2id.items()}

# 학습 이미지 및 라벨 리스트
image_paths = glob(os.path.join(TRAIN_DIR, '*', '*.jpg'))
labels = [label2id[os.path.basename(os.path.dirname(p))] for p in image_paths]

# 이미지 전처리 및 증강 함수
def load_and_preprocess(img_path, augment=False):
    img = load_img(img_path, target_size=(CFG['IMG_SIZE'], CFG['IMG_SIZE']))
    img = img_to_array(img)
    img = preprocess_input(img)

    if augment:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.9, 1.1)
        img = tf.image.random_saturation(img, 0.9, 1.1)
        img = tf.image.random_hue(img, 0.02)

    return img

# tf.data.Dataset 생성 함수
def create_dataset(image_paths, labels=None, is_train=True):
    def gen():
        for i, path in enumerate(image_paths):
            img = load_and_preprocess(path, augment=is_train)
            if labels is not None:
                yield img, labels[i]
            else:
                yield img

    if labels is not None:
        ds = tf.data.Dataset.from_generator(
            gen,
            output_types=(tf.float32, tf.int32),
            output_shapes=((CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3), ())
        )
    else:
        ds = tf.data.Dataset.from_generator(
            gen,
            output_types=tf.float32,
            output_shapes=(CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3)
        )

    if is_train:
        ds = ds.shuffle(1024)
    ds = ds.batch(CFG['BATCH_SIZE']).prefetch(tf.data.AUTOTUNE)
    return ds

# EfficientNetB3 모델 생성 함수
def build_model(num_classes):
    base_model = EfficientNetB3(
        include_top=False,
        input_shape=(CFG['IMG_SIZE'], CFG['IMG_SIZE'], 3),
        weights='imagenet',
        pooling='avg'
    )
    x = layers.BatchNormalization()(base_model.output)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs=base_model.input, outputs=outputs)
    return model

# Stratified K-Fold 교차검증 학습
skf = StratifiedKFold(n_splits=CFG['FOLDS'], shuffle=True, random_state=CFG['SEED'])
num_classes = len(label2id)

for fold, (train_idx, val_idx) in enumerate(skf.split(image_paths, labels)):
    print(f"\n===== Fold {fold} Training =====")
    train_paths = [image_paths[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_paths = [image_paths[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    train_ds = create_dataset(train_paths, train_labels, is_train=True)
    val_ds = create_dataset(val_paths, val_labels, is_train=False)

    model = build_model(num_classes)
    model.compile(
        optimizer=optimizers.Adam(learning_rate=CFG['LR']),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # 콜백 정의
    ckpt_path = os.path.join(SAVE_DIR, f"model_fold{fold}.h5")
    checkpoint = callbacks.ModelCheckpoint(
        ckpt_path,
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=True,
        verbose=1
    )
    early_stopping = callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True, verbose=1)

    # 학습
    model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=CFG['EPOCHS'],
        callbacks=[checkpoint, early_stopping]
    )

# 테스트 데이터셋 생성
test_paths = sorted(glob(os.path.join(TEST_DIR, '*.jpg')))
test_ds = create_dataset(test_paths, is_train=False)

# 학습한 모델 앙상블 예측
preds_list = []
for fold in range(CFG['FOLDS']):
    model = build_model(num_classes)
    model.load_weights(os.path.join(SAVE_DIR, f"model_fold{fold}.h5"))
    preds = model.predict(test_ds)
    preds_list.append(preds)

final_preds = np.mean(preds_list, axis=0)

# 제출 파일 생성
submission = pd.read_csv(SAMPLE_SUB)
for idx, class_name in enumerate(label2id.keys()):
    submission[class_name] = final_preds[:, idx]

submission.to_csv("submission.csv", index=False)
print("submission.csv 저장 완료")




===== Fold 0 Training =====
Epoch 1/20
   2210/Unknown - 1009s 380ms/step - loss: 2.1066 - accuracy: 0.6218
Epoch 1: val_accuracy improved from -inf to 0.20323, saving model to D:/데이콘 250519 대회/models\model_fold0.h5
Epoch 2/20
Epoch 2: val_accuracy improved from 0.20323 to 0.28847, saving model to D:/데이콘 250519 대회/models\model_fold0.h5
Epoch 3/20
Epoch 3: val_accuracy improved from 0.28847 to 0.32544, saving model to D:/데이콘 250519 대회/models\model_fold0.h5
Epoch 4/20
 466/2210 [=====>........................] - ETA: 34:47 - loss: 0.4411 - accuracy: 0.8879

KeyboardInterrupt: 