In [2]:
import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

# 설정
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 10
SEED = 42

# 시드 고정
def seed_everything(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything()

# 경로
train_dir = r"D:\데이콘 250519 대회\open\train"
test_dir = r"D:\데이콘 250519 대회\open\test"
submission_path = r"D:\데이콘 250519 대회\open\sample_submission.csv"
submission_save_path = r"D:\데이콘 250519 대회\open\tf_submission.csv"

# 클래스 이름
class_names = sorted(os.listdir(train_dir))
num_classes = len(class_names)

# 데이터셋 생성
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset="training",
    seed=SEED,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset="validation",
    seed=SEED,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# 데이터 전처리 (정규화)
normalization_layer = layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

# 모델 구성 (EfficientNetV2)
base_model = tf.keras.applications.EfficientNetV2B0(
    include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights="imagenet"
)
base_model.trainable = False  # 전이학습용 freeze

model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 학습
model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS)

# 모델 저장
model.save('best_model.h5')

# 테스트 데이터셋 로드
test_image_paths = sorted([
    os.path.join(test_dir, fname)
    for fname in os.listdir(test_dir)
    if fname.lower().endswith(".jpg")
])

# 예측
predictions = []
for path in test_image_paths:
    img = tf.keras.utils.load_img(path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = tf.keras.utils.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    probs = model.predict(img_array)[0]
    predictions.append(probs)

# 결과 저장
submission = pd.read_csv(submission_path, encoding='utf-8-sig')
pred_df = pd.DataFrame(predictions, columns=class_names)
submission[class_names] = pred_df[class_names]
submission.to_csv(submission_save_path, index=False, encoding='utf-8-sig')



Found 33137 files belonging to 396 classes.
Using 26510 files for training.
Found 33137 files belonging to 396 classes.
Using 6627 files for validation.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
