In [2]:
import pandas as pd
import numpy as np

from dataloader.data_loader import data_loader
from utils.data_split import data_split
from utils.load_params import load_params
from models.train_model import train_model
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_squared_log_error, mean_absolute_percentage_error, explained_variance_score
from datetime import datetime

In [3]:
dataset_name = "heat" # 
model_name = "XGB" # LGBM/XGB/
split_type = "time" # random/time
model_type = "regressor" # classifier/regressor

In [4]:
now = datetime.now()
date_code = "".join([str(now.month), str(now.day), str(now.hour), str(now.minute)])
save_name = "_".join([model_name, dataset_name, split_type, date_code, ".csv"])

In [5]:
train_df, test_df, submission_df , target_column = data_loader(dataset_name)

In [6]:
x_train, x_valid, y_train, y_valid = data_split(split_type, train_df, target_column)
X_test = test_df.copy()

In [7]:
x_train = x_train.drop(columns=["train_heatbranch_id"])
x_valid = x_valid.drop(columns=["train_heatbranch_id"])
X_test = X_test.drop(columns=["train_heatbranch_id"])
# 원래는 삭제해야


In [8]:
local_min = min(y_train)

In [9]:
params = load_params(model_name, model_type)
model = train_model(model_name, model_type, params, x_train, y_train)

In [10]:
y_valid_pred = model.predict(x_valid)
y_valid_pred = np.where(y_valid_pred < 0, 0, y_valid_pred)

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


In [11]:
y_valid_mae = mean_absolute_error(y_valid, y_valid_pred)
y_valid_mse = mean_squared_error(y_valid, y_valid_pred)
y_valid_rmse = np.sqrt(mean_squared_error(y_valid, y_valid_pred))
y_valid_r2 = r2_score(y_valid, y_valid_pred)
y_valid_mape = mean_absolute_percentage_error(y_valid, y_valid_pred)
y_valid_evs = explained_variance_score(y_valid, y_valid_pred)

# MSLE 계산을 try-except 블록으로 감싸서 오류 처리
try:
    y_valid_msle = mean_squared_log_error(y_valid, y_valid_pred)
except ValueError:
    print("Warning: MSLE could not be calculated due to negative values.")
    y_valid_msle = None



In [12]:
print(f"MAE: {y_valid_mae:.2f}") # 절대 오차의 평균, 값이 작을수록 좋음
print(f"MSE: {y_valid_mse:.2f}") # 제곱 오차의 평균, 값이 작을수록 좋음
print(f"RMSE: {y_valid_rmse:.2f}") # MSE의 제곱근, 값이 작을수록 좋음
print(f"R2: {y_valid_r2:.2f}") # 모델의 설명력, 값이 1에 가까울수록 예측이 정확함
if y_valid_msle is not None:
    print(f"MSLE: {y_valid_msle:.2f}") # 예측값과 실제값의 로그 차이를 기반으로 한 평균 제곱 오차, 값이 작을수록 좋음
else:
    print("MSLE: Not available") 
print(f"MAPE: {y_valid_mape:.2f}") # 절대 오차를 실제 값에 대한 백분율로 나타낸 값, 값이 작을수록 좋음
print(f"EVS: {y_valid_evs:.2f}") # 예측된 값과 실제 값 사이의 분산을 측정, 값이 1에 가까울수록 예측이 정확함

MAE: 39.24
MSE: 5754.84
RMSE: 75.86
R2: 0.42
MSLE: Not available
MAPE: 3124671807488.00
EVS: 0.50


In [13]:
x_total = pd.concat([x_train, x_valid], axis=0)
y_total = pd.concat([y_train, y_valid], axis=0)
model = train_model(model_name, model_type, params, x_total, y_total)

In [18]:
X_test = X_test.drop(columns=["train_heatheat_demand"], errors="ignore")  # errors="ignore"는 열이 없으면 무시
test_pred = model.predict(X_test)

submission_df['heat_demand'] = test_pred
submission_df.to_csv(save_name, index=False, encoding='utf-8-sig')