# Single LGBM

---

In [None]:
# DataFrame
import pandas as pd
import numpy as np
import random
import time

# Visualization
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore')

# Save the log
import os
import pickle


# LGBM
from lightgbm import LGBMRegressor

# Metric 
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [None]:
# set the seed
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

In [None]:
# Minus
matplotlib.rcParams['axes.unicode_minus'] = False
# font
plt.rc('font', family='Serif')

In [None]:
def split_data(product_df):
    
    train_df = product_df[product_df['Date']<'2022-08-01'].reset_index(drop=True)
    test_df = product_df[product_df['Date']>='2022-08-01'].reset_index(drop=True)
    return train_df, test_df

In [None]:
# Rolling Forecasting
def LGBM_single(target_df):

    train_df, test_df = split_data(target_df)

    predictions = []
    lgbm_model = LGBMRegressor()
    
    for test_date in test_df['Date']:
        # Use data up to the day before the test date to train the model
        train_until_test_df = target_df[target_df['Date'] < test_date]

        X = train_until_test_df.drop(columns = ['Date', 'y', 'Product'])
        y = train_until_test_df['y']

        # fitting the model 
        best_model_fit = lgbm_model.fit(X, y)
        
        test_X = target_df[target_df['Date'] == test_date]
        test_X = test_X.drop(columns = ['Date', 'y', 'Product'])

        prediction = best_model_fit.predict(test_X)[0]
        predictions.append(max(0, prediction))  # replace negative predictions with 0

    # Create a DataFrame to hold the result
    res_df = test_df.copy()
    res_df['Pred'] = predictions
    res_df.set_index('Date', inplace=True)

    # res_df: ['y'','Pred'] index='Date'
    return res_df

In [None]:
def save_model(product_code, best_model):
    
    folder_path = 'Result\LGBM\Model'
    file_name = f'{product_code}.pkl'
    save_path = os.path.join(folder_path, file_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
  
    with open(save_path, 'wb') as f:
        pickle.dump(best_model, f)
    return best_model

In [None]:
def load_model(file_name):
    file_path = f'Result/LGBM/Model/{file_name}.pkl'
    
    with open(file_path, 'rb') as file:
        best_model= pickle.load(file)
    
    return best_model 

In [None]:
def actual_pred_plot(product_code, res_df, metric_df):
    """
    Plot the actual vs predition and save the figure in the given directory
    """
    save_path = os.path.join("Result", "LGBM", product_code)
    save_name = f'{product_code}'
    
    title = f"Pred Actual Plot - {product_code}"
    actual = res_df['y']
    pred = res_df['Pred']

    # Plot   
    plt.figure(figsize=(16, 8))
    plt.title(title, fontsize=20)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Order Demand", fontsize=14)
    plt.plot(actual, label ='Actual', color='r')
    plt.plot(pred, label='Prediction', color='b')
    plt.legend(loc="upper right")
        
    # Plot 결과 저장
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    # save the figure
    plt.savefig(os.path.join(save_path, save_name+'_plot'+'.png'))
    plt.show()
    
    # 전체 결과에 대한 Metric도 함께 저장
    metric_df.to_csv(os.path.join(save_path, save_name+'_metric.csv'), encoding="utf-8-sig")
    plt.close('all') # close all figures to free up memory

In [None]:
def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / (actual+1)))

def nrmse(y_true, y_pred):
    mse = root_mean_squared_error(y_true, y_pred)
    target_mean = np.mean(y_true)
    nrmse = mse / target_mean
    return nrmse

def nmae(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    target_mean = np.mean(y_true)
    nmae = mae / target_mean
    return nmae

In [None]:
def calculate_metrics(product_code, res_df):

    actual = res_df['y']
    pred = res_df['Pred']

    MAPE = mape(actual, pred) 
    RMSE = root_mean_squared_error(actual, pred)
    MAE = mean_absolute_error(actual,pred) 
    NRMSE = nrmse(actual,pred) 
    NMAE = nmae(actual,pred) 
    R2 = r2_score(actual, pred)
    
    metric_df = pd.DataFrame({'MAPE':[round(MAPE, 4)],
                           'RMSE':[round(RMSE, 4)],
                           'MAE':[round(MAE, 4)],
                           'NRMSE':[round(NRMSE, 4)],
                           'NMAE':[round(NMAE, 4)],
                           'R2': [round(R2, 4)]},
                            index= [product_code])

    return metric_df

In [None]:
def execute_single_LGBM(df, product_code, str_product_code):
    start_time = time.time()
    
    product_code = product_code
    product_df = df[df['Product']== product_code].reset_index(drop=True)

 
    res_df = LGBM_single(product_df) 
    #save_model(product_code, best_model)

    metric_df= calculate_metrics(product_code, res_df)

    actual_pred_plot(str_product_code, res_df, metric_df)
    
    elapsed_time_seconds = time.time() - start_time
    elapsed_time_minutes = elapsed_time_seconds / 60
    print("실행 시간: {:.2f} 분".format(elapsed_time_minutes))
    return metric_df, res_df

---

In [None]:
df = pd.read_csv("../Data/dataset.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.drop(columns=['년월'])

In [None]:
set_seed(1234)

metric_df = pd.DataFrame()
result_df = pd.DataFrame()

target_code = ['Office Product', 'Packaging material', 'Pharmaceuticals']
for code in target_code:

    print("==================================")
    print(f"========== { code } ==========")
    print("==================================")

    all_metric, all_result = execute_single_LGBM(df, code, str(code))

    metric_df = pd.concat([metric_df, all_metric])
    result_df = pd.concat([result_df, all_result], ignore_index=True)
metric_df