In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras import layers, models

# 1. 데이터 불러오기
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")


In [9]:
X = train.drop(columns=["ID", "label"]).values
y = train["label"]
X_test = test.drop(columns=["ID"]).values

# 2. reshape: (1024 → 32x32 이미지로)
X = X.reshape(-1, 32, 32, 1)
X_test = X_test.reshape(-1, 32, 32, 1)

# 3. 라벨 인코딩 (문자 → 숫자)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 4. 학습/검증 분리
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# 5. CNN 모델 정의
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# 6. 학습
model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), batch_size=32)

# 7. 검증 정확도 및 classification report 출력
val_preds = model.predict(X_val)
val_preds_labels = np.argmax(val_preds, axis=1)

print("Validation Accuracy:", np.mean(val_preds_labels == y_val))
print("\nClassification Report:\n")
print(classification_report(y_val, val_preds_labels, target_names=le.classes_))

# 8. 테스트 데이터 예측 및 저장
test_preds = model.predict(X_test)
test_labels = le.inverse_transform(np.argmax(test_preds, axis=1))

submission = pd.DataFrame({
    "ID": test["ID"],
    "label": test_labels
})
submission.to_csv("submission_cnn.csv", index=False)
print("✅ submission_cnn.csv 저장 완료!")


Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.0998 - loss: 42.6867 - val_accuracy: 0.1753 - val_loss: 2.2577
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.2924 - loss: 1.9864 - val_accuracy: 0.4481 - val_loss: 1.5016
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6377 - loss: 1.1285 - val_accuracy: 0.5584 - val_loss: 1.1720
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8153 - loss: 0.5534 - val_accuracy: 0.7403 - val_loss: 0.7835
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9199 - loss: 0.2839 - val_accuracy: 0.7532 - val_loss: 0.7487
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9750 - loss: 0.1009 - val_accuracy: 0.8052 - val_loss: 0.6517
Epoch 7/100
[1m20/20[0m 