# LSTM Single Model

## Data Description

    - Raw data: Historical Product Demand.csv

    - Input data: Data on 8x augmentation of demand records by selecting 8 representative items

    - Product code: 'Product_0025', 'Product_0739', 'Product_0901', 'Product_1154',
                    'Product_1248', 'Product_1295', 'Product_1378', 'Product_2004'
            

    - Size of Data: 116392 rows × 4 columns

    - Features: Date, Product_Code, Product_Category, Order_Demand

    - Period: 2012-01-01 ~ 2017-01-09

---

In [1]:
# DataFrame
import pandas as pd
import numpy as np
import random
from datetime import datetime, date

# Preprocessing
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Save the log
import os
import time
import pickle 

# LSTM
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Activation
from tensorflow.keras import layers

from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MSE

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau

from keras_tuner.tuners import RandomSearch

from keras_tuner import RandomSearch
import tempfile

# Metric 
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import r2_score

## Data Explore

In [2]:
# # Data Loading
# df = pd.read_csv('Data\HPD_0416.csv')
# # convert the string to the datetype
# df['Date'] = pd.to_datetime(df['Date'])

In [3]:
df = pd.read_csv('Data\\train.csv')
df = df[(df['store']==1)]

pd.to_datetime(df['date'].max()) - pd.to_datetime(df['date'].min())
df = df.reset_index(drop=True)
df.rename(columns={'date': 'Date', 'item':'Product_Code', 'sales':'Order_Demand'}, inplace=True)
df = df[['Date', 'Product_Code', 'Order_Demand']]
df['Date'] = pd.to_datetime(df['Date'])
df['Product_Code'] = df['Product_Code'].astype('str')

In [17]:
df = pd.read_csv('Data\\train.csv')
df

Unnamed: 0,date,store,item,sales
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10
...,...,...,...,...
912995,2017-12-27,10,50,63
912996,2017-12-28,10,50,59
912997,2017-12-29,10,50,74
912998,2017-12-30,10,50,62


In [4]:
# print(df.info())
# print('-------------------------')
# print("")
# print("The Number of unique")
# print('-------------------------')
# print('Product code:\t', df.Product_Code.nunique())
# print('Category:\t', df.Product_Category.nunique())
# print('-------------------------')
# print("The Product Code:")
# print("")
# for i, code in enumerate(df['Product_Code'].unique()):
#     print(i+1, code)

---

### Split the train and test set
- Input
     data: dataframe with dates and Demand data
     
- output
    - train:  2012-01-01 ~ 2015-08-31 
    - valid:  2015-09-01 ~ 2016-08-31
    - test :  2016-09-01 ~

In [5]:
def split_data(product_df, time_steps, pred_days): 
    pred_days -= 1
    total_len = len(product_df)
    train_end = int(total_len * 0.8) # 80% of the data for training
    val_end = int(total_len * 0.9) # next 10% for validation, remaining 10% for test
    y = product_df.filter(['y']).values # y(수요량) 값
    
    # Minmax로 0~1 사이에 값이 오도록 정규화
    sc = MinMaxScaler() # 객체 생성
    y_scaled = sc.fit_transform(y) # 전체 y값 정규화
    
    # Train Data
    y_train_scaled = y_scaled[:train_end,:]

    X_train = [] 
    y_train = []
    for i in range(time_steps, train_end - pred_days): # pred_days days shifted
        X_train.append(y_train_scaled[i-time_steps:i,0]) # time steps 만큼 sliding window
        y_train.append(y_train_scaled[i+pred_days,0]) # pred_days days shifted

    X_train = np.array(X_train)
    y_train = np.array(y_train)
    
    # Reshape X_train for LSTM -> (batch_size, time_steps, features)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1],1))

    # Validation Data
    y_val_scaled = y_scaled[train_end : val_end, :]

    X_val = []
    y_val = []
    for i in range(time_steps, len(y_val_scaled) - pred_days): # pred_days days shifted
        X_val.append(y_val_scaled[i-time_steps : i, 0])
        y_val.append(y_val_scaled[i+pred_days, 0]) # pred_days days shifted

    X_val = np.array(X_val)
    y_val = np.array(y_val)

    # Reshape X_val for LSTM -> (batch_size, time_steps, features)
    X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1],1))
    
    # Test Data
    y_test_scaled = y_scaled[val_end:,:]
    
    X_test = []
    y_test = product_df.iloc[val_end+time_steps+pred_days:] # pred_days days shifted
    y_test['y_norm'] = y_test_scaled[time_steps:].reshape(-1).copy() # pred_days days shifted
    # test data 개수만큼 반복
    for i in range(time_steps, len(y_test_scaled) - pred_days): # pred_days days shifted
        X_test.append(y_test_scaled[i-time_steps : i, 0])
    
    X_test = np.array(X_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1],1))

    
    return X_train, y_train, X_val, y_val, X_test, y_test, sc

### LSTM

### Optimize Parameters using Keras Tuner
    - (Random search)

In [1]:
def build_model(hp):
    model = Sequential()
    # 2개의 LSTM Layers
    model.add(LSTM(units=hp.Int('units_1', min_value=32, max_value=480, step=64),
                   activation=hp.Choice('activation_1', ['relu', 'tanh']),
                   return_sequences=True, 
                   input_shape=(None,1)))
    
    model.add(LSTM(units=hp.Int('units_2', min_value=32, max_value=256, step=32),
                   activation=hp.Choice('activation_3', ['relu', 'tanh']),
                   return_sequences=False))
    # Dense Layers 는 1~2개 
    for i in range(hp.Int('num_layers', 1, 2)):
        model.add(Dense(units=hp.Int('dense_units_' + str(i), min_value=16, max_value=64, step=16),
                        activation=hp.Choice('dense_activation_'+ str(i), ['relu', 'tanh'])))
        
    model.add(Dense(1))

    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
                  loss='mean_squared_error',
                  metrics=['mape','mae'])

    return model

def optimize_model(X_train, y_train, X_val, y_val, X_test, sc, epochs, trials):
    # 진행 상황 저장 할 필요없어서, 임시 경로 생성
    with tempfile.TemporaryDirectory() as temp_dir:
        tuner = RandomSearch(
            build_model,
            objective='val_loss',
            max_trials= trials,
            directory=temp_dir,
            project_name='temp_project')

    tuner.search_space_summary()

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    # 최적의 조합 탐색
    tuner.search(X_train, y_train,
                 epochs=epochs,
                 batch_size=32,
                 validation_data=(X_val, y_val),
                 callbacks=[early_stopping])

    tuner.results_summary()

    best_model = tuner.get_best_models(num_models=1)[0]

    # Test 데이터 예측
    pred = best_model.predict(X_test) # 예측값 얻기
    pred_norm = pred # 예측값을 저장하되, normalize된 값 저장
    pred = sc.inverse_transform(pred) # denormalize된 예측값 저장
    
    best_model.summary()
    # 모델 객체와 예측값 반환
    return best_model, pred, pred_norm

### LSTM Single Model

In [2]:
def LSTM_single(product_df, time_steps, epochs):

    # 학습 데이터와 테스트 데이터 분리
    X_train, y_train, X_val, y_val, X_test, y_test, sc = split_data(product_df, time_steps, pred_days=1)
    
    # LSTM 모델 학습 및 예측
    best_model, pred, pred_norm = optimize_model(X_train, y_train, X_val, y_val, X_test, sc, epochs, trials=10)
    
    # 예측 결과 저장
    y_test.reset_index(drop=True, inplace=True)
    pred_df = pd.DataFrame({'Pred': pred.reshape(-1) ,'Pred_norm': pred_norm.reshape(-1)})
    res_df = pd.concat([y_test, pred_df], axis=1)
    res_df.set_index('Date', inplace=True)
    res_df.loc[res_df['Pred']<0, 'Pred']=0
    # res_df: ['y', 'y_norm', 'Pred', 'Pred_norm'], index='Date'
        
    # 모델과 result_df
    return best_model, res_df

## Plot the result

In [3]:
def actual_pred_plot(product_code, res_df, metric_df, normalize):
    today = date.today()
    """
    Plot the actual vs predition and save the figure in the given directory
    """
    
    save_path = os.path.join("Result", "Single_LSTM_Result", product_code)
    save_name = f'{product_code}_all_result'
    
    title = f"Pred Actual Plot - {product_code}"
    actual = res_df['y']
    pred = res_df['Pred']
    
    if normalize: 
        title += "(Normalized)"
        actual = res_df['y_norm']
        pred = res_df['Pred_norm']
        save_name += "_normalized"
    # Plot   
    plt.figure(figsize=(16, 8))
    plt.title(title, fontsize=20)
    plt.xlabel("Time", fontsize=14)
    plt.ylabel("Order Demand", fontsize=14)
    plt.plot(actual, label ='Actual', marker='o', ms=3)
    plt.plot(pred, label='Prediction', marker='o', ms=3)
    plt.legend(loc="upper right")
        
    # Plot 결과 저장
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    # save the figure
    today_date = f'_{today.month:02d}{today.day:02d}'
    plt.savefig(os.path.join(save_path, save_name+'.png'))
    # Metric도 함께 저장
    metric_df.to_csv(os.path.join(save_path, save_name+'.csv'))
        
    plt.close('all') # close all figures to free up memory

## Save and Load the model 

In [4]:
def save_model(product_code, best_model):
    today = date.today()
    folder_path = 'Result/Single_LSTM_Result/Model'
    file_name = f'{product_code}_{today.month:02d}{today.day:02d}.pkl'
    save_path = os.path.join(folder_path, file_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    # 객체를 pickle 파일로 저장
    with open(save_path, 'wb') as f:
        pickle.dump(best_model, f)
    return best_model

In [5]:
def load_model(file_path):
    file_path = f'Result/Single_LSTM_Result/Model/{file_name}'
    
    with open(file_path, 'rb') as file:
        model_dict= pickle.load(file)
    
    return best_model

## Metrics

In [6]:
# Model Metric
def mase(training_series, testing_series, prediction_series):
    n = training_series.shape[0]
    d = np.abs(np.diff(training_series)).sum() / (n-1)
    
    errors = np.abs(testing_series - prediction_series)
    return errors.mean() / d

def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / (actual+1)))

# 정규화 된 지표
def nrmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred, squared=False)
    target_mean = np.mean(y_true)
    nrmse = mse / target_mean
    return nrmse

# 정규화 된 지표
def nmae(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    target_mean = np.mean(y_true)
    nmae = mae / target_mean
    return nmae

In [7]:
def calculate_metrics(product_code, res_df, normalize):
    # 정규화 옵션이 True인 경우 정규화된 데이터 사용, 그렇지 않으면 원래 데이터 사용
    if normalize:
        actual = res_df['y_norm']
        pred = res_df['Pred_norm']
    else:
        actual = res_df['y']
        pred = res_df['Pred']

    # 메트릭 계산
    # MASE = mase(np.array(train_series), np.array(actual), pred) 
    MAPE = mape(actual, pred) 
    RMSE = mean_squared_error(actual, pred)**0.5 
    MAE = mean_absolute_error(actual,pred) 
    NRMSE = nrmse(actual,pred) 
    NMAE = nmae(actual,pred)
    R2 = r2_score(actual,pred) 
    # RMSLE = mean_squared_log_error(actual, pred)**0.5 

    # 계산된 메트릭을 데이터프레임에 추가
    metric_df = pd.DataFrame({'MAPE':[round(MAPE, 4)],
                           'RMSE':[round(RMSE, 4)],
                           'MAE':[round(MAE, 4)],
                           'NRMSE':[round(NRMSE, 4)],
                           'NMAE':[round(NMAE, 4)],
                           'R2':[round(R2, 4)]},
                            index= [product_code])

    return metric_df

---

## Check the Result

In [8]:
def execute_single_LSTM(product_code, time_steps=30, epochs=100):
    start_time = time.time()
    product_code = product_code # 예측하고자 하는 코드 입력
    product_df = df[df['Product_Code']== product_code].reset_index(drop=True)
    product_df = product_df[['Date', 'Order_Demand']]
    product_df.rename(columns={'Order_Demand': 'y'}, inplace=True)

    # LSTM 단일 모델
    model, res_df = LSTM_single(product_df, time_steps, epochs) #dictionary, time_steps, epochs
    save_model(product_code, model)
    # 모델 Metric과 Pred_Actual Plot 저장
    metric_df_norm = calculate_metrics(product_code, res_df, True)
    metric_df= calculate_metrics(product_code, res_df, False)
    
    actual_pred_plot(product_code, res_df, metric_df_norm, True)
    actual_pred_plot(product_code, res_df, metric_df, False)
    # 실행시간 확인
    elapsed_time_seconds = time.time() - start_time
    elapsed_time_minutes = elapsed_time_seconds / 60
    print("실행 시간: {:.2f} 분".format(elapsed_time_minutes))
    return metric_df

---

## Whole Process
    - product_code에 str으로 예측하고자 하는 코드를 입력
    - ['Product_0025', 'Product_0739', 'Product_0901', 'Product_1154',
       'Product_1248', 'Product_1295', 'Product_1378', 'Product_2004']

In [9]:
# 새로운 데이터
codes = ['4', '5', '6', '7', '8']

In [15]:
execute_single_LSTM('4')

Trial 10 Complete [00h 00m 35s]
val_loss: 0.022640511393547058

Best val_loss So Far: 0.020697925239801407
Total elapsed time: 00h 18m 43s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in C:\Users\7info\AppData\Local\Temp\tmpeilnxpg5\temp_project
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 05 summary
Hyperparameters:
units_1: 480
activation_1: tanh
units_2: 160
activation_3: relu
num_layers: 1
dense_units_0: 48
dense_activation_0: relu
learning_rate: 0.001
dense_units_1: 16
dense_activation_1: relu
Score: 0.020697925239801407

Trial 07 summary
Hyperparameters:
units_1: 160
activation_1: tanh
units_2: 160
activation_3: tanh
num_layers: 1
dense_units_0: 64
dense_activation_0: relu
learning_rate: 0.01
dense_units_1: 32
dense_activation_1: tanh
Score: 0.021361257880926132

Trial 01 summary
Hyperparameters:
units_1: 480
activation_1: relu
units_2: 128
activation_3: tanh
num_layers: 2
dense_units_0: 32
dense_activation_0: relu
learning_rate: 0.01

Unnamed: 0,MAPE,RMSE,MAE,NRMSE,NMAE,R2
4,0.2092,5.2689,4.1862,0.2438,0.1937,0.1759


In [18]:
for code in codes:
    print("==================================")
    print(f"========== { code } ==========")
    print("==================================")
    execute_single_LSTM(code, 1, 1)

Trial 3 Complete [00h 00m 03s]
val_loss: 0.034922946244478226

Best val_loss So Far: 0.034922946244478226
Total elapsed time: 00h 00m 12s

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
32                |160               |units_1
relu              |relu              |activation_1
96                |192               |units_2
relu              |relu              |activation_3
1                 |1                 |num_layers
64                |16                |dense_units_0
tanh              |tanh              |dense_activation_0
0.001             |0.001             |learning_rate



KeyboardInterrupt: 