In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 필요한 라이브러리 불러오기
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score

# 1. 데이터 불러오기
train = pd.read_csv('/content/drive/MyDrive/패턴인식/train_processed.csv')
test = pd.read_csv('/content/drive/MyDrive/패턴인식/test_processed.csv')

# 2. Feature, Label 나누기 ('shares'도 제거해야 함)
X = train.drop(['id', 'y', 'shares'], axis=1)
y = train['y']

# 3. Train/Validation 분리
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. Neural Network 모델 만들기 (BatchNormalization 추가, Dropout 줄이고 learning_rate 조정)
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.BatchNormalization(),
    layers.Dropout(0.1),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.1),
    layers.Dense(1, activation='sigmoid')  # 이진 분류 문제니까 sigmoid 사용
])

# 5. 모델 컴파일
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# 6. EarlyStopping 콜백 설정
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=7, restore_best_weights=True
)

# 7. 모델 학습
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stop]
)

# 8. 테스트 데이터 준비 (id는 제거)
X_test = test.drop(['id'], axis=1)

# 9. 예측 (확률 예측)
prob_predictions = model.predict(X_test)

# 10. 확률을 0.5 기준으로 0 또는 1로 변환
y_predictions = (prob_predictions >= 0.5).astype(int)

# 11. 결과 저장 (id + y_predict + y_prob)
submission = pd.DataFrame({
    'id': test['id'],
    'y_predict': y_predictions.flatten(),
    'y_prob': prob_predictions.flatten()
})

submission.to_csv('prediction.csv', index=False)

print("Done! 결과는 prediction.csv에 저장됐습니다.")

# 12. Validation 데이터로 F1 Score, AUC 계산
val_prob_predictions = model.predict(X_val)
val_predictions = (val_prob_predictions >= 0.5).astype(int)

f1 = f1_score(y_val, val_predictions)
auc = roc_auc_score(y_val, val_prob_predictions)

print(f"Validation F1 Score: {f1:.4f}")
print(f"Validation AUC Score: {auc:.4f}")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.5613 - loss: 0.7480 - val_accuracy: 0.6282 - val_loss: 0.6454
Epoch 2/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6219 - loss: 0.6619 - val_accuracy: 0.6360 - val_loss: 0.6394
Epoch 3/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6436 - loss: 0.6387 - val_accuracy: 0.6417 - val_loss: 0.6360
Epoch 4/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.6525 - loss: 0.6265 - val_accuracy: 0.6430 - val_loss: 0.6321
Epoch 5/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6573 - loss: 0.6188 - val_accuracy: 0.6417 - val_loss: 0.6330
Epoch 6/100
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6585 - loss: 0.6176 - val_accuracy: 0.6423 - val_loss: 0.6315
Epoch 7/100
[1m278/278[0m [32m━