In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 필요한 라이브러리 불러오기
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score, classification_report

# 1. 데이터 불러오기
train = pd.read_csv('/content/drive/MyDrive/패턴인식/train_processed.csv')
test = pd.read_csv('/content/drive/MyDrive/패턴인식/test_processed.csv')

# 2. Feature, Label 나누기 
X = train.drop(['id', 'y', 'shares'], axis=1)
y = train['y']

# 3. Train/Validation 분리
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. Neural Network 모델 만들기 (BatchNormalization 추가, Dropout 줄이고 learning_rate 조정)
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.BatchNormalization(),
    layers.Dropout(0.1),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.1),
    layers.Dense(1, activation='sigmoid')  # 이진 분류 문제니까 sigmoid 사용
])

# 5. 모델 컴파일
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0004),  # learning rate 살짝 줄임
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# 6. EarlyStopping 콜백 설정
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True
)

# 7. 모델 학습
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stop]
)

# 8. 테스트 데이터 준비 (id는 제거)
X_test = test.drop(['id'], axis=1)

# 9. 예측 (확률 예측)
prob_predictions = model.predict(X_test)

# 10. 확률을 0.48 기준으로 0 또는 1로 변환
threshold = 0.48
y_predictions = (prob_predictions >= threshold).astype(int)

# 11. 결과 저장 (id + y_predict + y_prob)
submission = pd.DataFrame({
    'id': test['id'],
    'y_predict': y_predictions.flatten(),
    'y_prob': prob_predictions.flatten()
})

submission.to_csv('prediction.csv', index=False)

print("Done! 결과는 prediction.csv에 저장됐습니다.")

# 12. Validation 데이터로 F1 Score, AUC, Classification Report 계산
val_prob_predictions = model.predict(X_val)
val_predictions = (val_prob_predictions >= threshold).astype(int)

f1 = f1_score(y_val, val_predictions)
auc = roc_auc_score(y_val, val_prob_predictions)

print(f"Validation F1 Score: {f1:.4f}")
print(f"Validation AUC Score: {auc:.4f}")

# 추가: precision, recall, f1-score, support 모두 출력
report = classification_report(y_val, val_predictions)
print("Classification Report:")
print(report)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.5523 - loss: 0.7828 - val_accuracy: 0.6151 - val_loss: 0.6547
Epoch 2/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6255 - loss: 0.6640 - val_accuracy: 0.6318 - val_loss: 0.6430
Epoch 3/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6443 - loss: 0.6419 - val_accuracy: 0.6403 - val_loss: 0.6364
Epoch 4/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6458 - loss: 0.6342 - val_accuracy: 0.6446 - val_loss: 0.6341
Epoch 5/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6476 - loss: 0.6296 - val_accuracy: 0.6405 - val_loss: 0.6333
Epoch 6/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6592 - loss: 0.6231 - val_accuracy: 0.6426 - val_loss: 0.6295
Epoch 7/100
[1m278/2