In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df_con = pd.read_csv("./2023빅콘_데이터전처리.csv")

df_con

df_con.drop(['Unnamed: 0'], axis=1, inplace=True)
df_con

Unnamed: 0,play_date,price,ticket_cancel,year,seat_code,genre_code
0,2022-02-04 20:00:00,10000,2,2022,3180010,1.0
1,2022-03-02 19:30:00,180000,0,2022,1021207,4.0
2,2019-03-23 20:00:00,144000,2,2019,1010201,1.0
3,2019-11-09 17:00:00,0,0,2019,3050408,1.0
4,2019-07-23 20:00:00,0,0,2019,2040804,1.0
...,...,...,...,...,...,...
1096963,2021-07-04 15:00:00,90000,2,2021,1010202,1.0
1096964,2023-06-13 17:00:00,10000,0,2023,3010604,1.0
1096965,2020-10-20 19:30:00,0,0,2020,1041612,2.0
1096966,2020-08-18 19:30:00,0,0,2020,1042008,3.0


## Bayesian Optimization
####
#### 베이지안 최적화는 최적의 해 근처의 하이퍼파라미터를 위주로 탐색하는 작업과 
#### 임의의 새로운 하이퍼파라미터를 탐색하는 과정을 반복하여 최적의 해 탐색하는 기법

In [2]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
     -------------------------------------- 100.3/100.3 kB 6.0 MB/s eta 0:00:00
Collecting pyaml>=16.9
  Downloading pyaml-23.9.6-py3-none-any.whl (22 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.9.6 scikit-optimize-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
import numpy as np
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [14]:
# 특성과 목표 변수 분리
X = df_con[["ticket_cancel", "year", "seat_code", "genre_code"]]
y = df_con["price"]

# 데이터 분할 (훈련 세트와 테스트 세트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 데이터 정규화 (0과 1 사이의 값으로 스케일링)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
def create_model():
    model = keras.Sequential([
        layers.Input(shape=(X_train_scaled.shape[1],)),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.2),  # 드롭아웃 레이어 추가
        layers.Dense(256, activation='relu'),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),  # 드롭아웃 레이어 추가
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])
    
    # 모델 컴파일
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    return model


# 모델 생성 및 컴파일
model = create_model()

# 모델 훈련
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=1)

# Bayesian Optimization을 위한 하이퍼파라미터 탐색 공간 정의
param_dist = {
    'learning_rate': Real(1e-6, 1e-2, prior='log-uniform'),
    'epochs': [50, 100, 200],
    'batch_size': [32, 64, 128],
    'dropout_rate': [0.2, 0.3, 0.4],
    'hidden_layers': [1, 2, 3],
    'hidden_units': [64, 128, 256]
}

# Bayesian Optimization을 사용한 모델 최적화
opt = BayesSearchCV(
    keras_reg,
    param_dist,
    n_iter=10,
    cv=5,
    n_jobs=-1,
    verbose=1,
    scoring='neg_mean_squared_error',
    random_state=42
)

opt.fit(X_train_scaled, y_train)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100

KeyboardInterrupt: 

In [None]:
# 최적의 하이퍼파라미터 출력
print("가장 좋은 파라미터 조합: ", opt.best_params_)
print("가장 좋은 평가 점수: ", -opt.best_score_)

# 최적의 모델 평가
best_model = opt.best_estimator_
best_model.fit(X_train_scaled, y_train)
test_loss = best_model.score(X_test_scaled, y_test)
print("테스트 세트에서의 손실: {:.4f}".format(-test_loss))

# 예측
predictions = best_model.predict(X_test_scaled)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 실제 가격과 예측 가격 데이터
actual_prices = y_test.values
predicted_prices = [prediction[0] for prediction in predictions]

# MSE 계산
mse = mean_squared_error(actual_prices, predicted_prices)

# MAE 계산
mae = mean_absolute_error(actual_prices, predicted_prices)

# R 제곱 계산
r2 = r2_score(actual_prices, predicted_prices)

print(f"평균 제곱 오차 (MSE): {mse:.2f}")
print(f"평균 절대 오차 (MAE): {mae:.2f}")
print(f"R 제곱 (R-squared): {r2:.2f}")