In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# 데이터 불러오기
df = pd.read_csv('your_data.csv')

# '시점'을 datetime 형식으로 변환하고, 필요한 시계열 피처 추출
df['시점'] = pd.to_datetime(df['시점'].str[:6], format='%Y%m')
df['월'] = df['시점'].dt.month
df['년'] = df['시점'].dt.year

# 필요한 피처만 선택 (여기서는 '평균가(원/kg)' 예측)
features = ['월', '년', '총반입량(kg)', '총거래금액(원)', '최저가(원/kg)', '최고가(원/kg)', '경매 건수', '전순 평균가격(원)', '전달 평균가격(원)', '전년 평균가격(원)']
target = '평균가(원/kg)'

X = df[features]
y = df[target]

# Label Encoding (필요시)
# encoder = LabelEncoder()
# df['등급명'] = encoder.fit_transform(df['등급명'])

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

# LGBM 데이터셋 준비
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

# LGBM 모델 학습
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
}

lgb_model = lgb.train(params, train_data, valid_sets=[train_data, test_data], early_stopping_rounds=50)

# 예측 및 평가
y_pred_lgb = lgb_model.predict(X_test)
rmse_lgb = np.sqrt(mean_squared_error(y_test, y_pred_lgb))
print(f'LGBM RMSE: {rmse_lgb}')


In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator

# 스케일링
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['평균가(원/kg)']])

# LSTM을 위한 시계열 데이터 생성 (여기서는 12개월 시계열 데이터를 사용)
time_steps = 12
generator = TimeseriesGenerator(scaled_data, scaled_data, length=time_steps, batch_size=1)

# LSTM 모델
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_steps, 1)))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

# 학습
model.fit(generator, epochs=10)

# 예측
pred_lstm = model.predict(generator)
pred_lstm_rescaled = scaler.inverse_transform(pred_lstm)


In [None]:
# 두 모델의 결과를 결합 (가중 평균)
hybrid_pred = 0.5 * y_pred_lgb + 0.5 * pred_lstm_rescaled.flatten()

# 평가
nmae_value = np.mean(np.abs((y_test - hybrid_pred) / y_test))
print(f'Hybrid Model NMAE: {nmae_value}')
