## LSTM method with 36 months for training and testing, another 18 months for future validation

In [None]:
import pandas as pd
from tqdm import tqdm
import keras
import pickle 
from model_preparation import plotting_3618
from models import calculate_smape_3618, predictfuture_3618, LSTM_HyperParameter_Tuning
import matplotlib.pyplot as plt

In [None]:
# Load the Excel file
df_origin = pd.read_csv('20230411_SummerTerm23_Data_Challenge_Infineon_Data.csv')
stockprice = pd.read_csv('stockprice.csv')
#delete discontinuing products
df_continue = df_origin[df_origin['reporting_time']=='2023-02-01T00:00:00.000Z']
df_continue_origin = df_origin[df_origin['product_name'].isin(df_continue['product_name'])]

# Check number of datapoints
product_counts = df_continue_origin['product_name'].value_counts()
i=0
for name in tqdm(df_continue_origin['product_name'].unique()):
    # if we have enough data points
    if product_counts[name] > 50:
        i=i+1
#result dictionaries
result = {}
result_temp = []
result_less80 = {}
result_other = {}
result_hype = {}
result_stock = {}
result_stock_hype = {}
result_stock_hype_other = {}
result_stock_other = {}
# Use previous 15 data to predict next demand
step = 15 
future_periods = 18
config = [[25, 50], [32, 64], [0.1, 0.2]]

In [None]:
for name in tqdm(df_continue_origin['product_name'].unique()):
    # if we have enough data points
    if product_counts[name] > 49:
        df_product = df_continue_origin.loc[df_continue_origin['product_name'] == name]
        df_product = df_product.sort_values(by='reporting_time', ascending=True)
        dataset, model, X_train, X_test, y_train, y_test, predicted_demand, smape, scaler, real_demand, future_y = calculate_smape_3618(
            df_product, step)
        if 0.00 < smape < 1.00:
            accuracy = 1.00 - smape
            if accuracy > 0.79999999:
                future = predictfuture_3618(dataset, step, model, future_periods)
                future_demand = scaler.inverse_transform(future)[:, 0]
                actual = pd.DataFrame(real_demand.reset_index(drop=True))
                future = pd.DataFrame(future_demand)
                future.index = list(range(len(actual) - 18, len(actual)))
                future_accuracy = 1 - smape_loss(actual.iloc[len(actual) - 18:len(actual), :], future)
                result[name] = [accuracy, smape, future_demand, predicted_demand[:, 0], real_demand, future_accuracy]
#                 model.save("LSTM_model/%s.keras" % name)
                plotting_3618(name, result[name], future_periods, hyper=False)
            else:
                result_less80[name] = [accuracy, smape, X_train, X_test, y_train, y_test]
                # hyperparameter tunning
                new_accuracy, new_smape, new_model, new_predicted_demand, hist = LSTM_HyperParameter_Tuning(config,
                                                                                                            X_train,
                                                                                                            y_train,
                                                                                                            X_test,
                                                                                                            y_test,
                                                                                                            scaler,
                                                                                                            accuracy)
                if new_accuracy > 0.79999999:
                    future = predictfuture_3618(dataset, step, new_model, future_periods)
                    future_demand = scaler.inverse_transform(future)[:, 0]
                    actual = pd.DataFrame(real_demand.reset_index(drop=True))
                    future = pd.DataFrame(future_demand)
                    future.index = list(range(len(actual) - 18, len(actual)))
                    future_accuracy = 1 - smape_loss(actual.iloc[len(actual) - 18:len(actual), :], future)
                    result_hype[name] = [new_accuracy, new_smape, future_demand, new_predicted_demand[:, 0],
                                         real_demand, future_accuracy]
#                     model.save("LSTM_model/hyper/%s.keras" % name)
                    plotting_3618(name, result_hype[name], future_periods, hyper=True)
                else:
                    result_temp.append(name)
        else:
            result_other[name] = [accuracy, smape, X_train, X_test, y_train, y_test]


In [None]:
with open("LSTM_results/3618/result_80up.pkl", 'wb') as f:
    pickle.dump(result, f)
with open("LSTM_results/3618/result_80down.pkl", 'wb') as f:
    pickle.dump(result_less80, f)
with open("LSTM_results/3618/result_afterhype.pkl", 'wb') as f:
    pickle.dump(result_hype, f)
with open("LSTM_results/3618/result_temp.pkl", 'wb') as f:
    pickle.dump(result_temp, f)
with open("LSTM_results/3618/result_other.pkl", 'wb') as f:
    pickle.dump(result_other, f)

In [None]:
for name in tqdm(result_temp):
    df_product = df_continue_origin.loc[df_continue_origin['product_name'] == name]
    df_product = df_product.sort_values(by='reporting_time', ascending=True)
    # using stock price as external indicators
    df_product.reset_index(drop=True, inplace=True)
    stock_price_cut = stockprice[-len(df_product):]
    stock_price_cut.reset_index(drop=True, inplace=True)
    df_update = pd.concat([df_product, stock_price_cut], axis=1)
    dataset, model, X_train, X_test, y_train, y_test, predicted_demand, smape, scaler, real_demand, future_y = calculate_smape_3618(
        df_update, step)
    if 0.00 < smape < 1.00:
        accuracy = 1.00 - smape
        if accuracy > 0.79999999:
            future = predictfuture_3618(dataset, step, model, future_periods)
            future_demand = scaler.inverse_transform(future)[:, 0]
            actual = pd.DataFrame(real_demand.reset_index(drop=True))
            future = pd.DataFrame(future_demand)
            future.index = list(range(len(actual) - 18, len(actual)))
            future_accuracy = 1 - smape_loss(actual.iloc[len(actual) - 18:len(actual), :], future)
            result_stock[name] = [accuracy, smape, future_demand, predicted_demand[:, 0], real_demand, future_accuracy]
            # model.save("LSTM_model/stock/%s.keras" % name)
            plotting_3618(name, result_stock[name], future_periods, stock=True)
        else:
            # hyperparameter tunning again
            new_accuracy, new_smape, new_model, new_predicted_demand, hist = LSTM_HyperParameter_Tuning(
                config,
                X_train,
                y_train,
                X_test,
                y_test,
                scaler,
                accuracy)
            if new_accuracy > 0.79999999:
                future = predictfuture_3618(dataset, step, new_model, future_periods)
                future_demand = scaler.inverse_transform(future)[:, 0]
                actual = pd.DataFrame(real_demand.reset_index(drop=True))
                future = pd.DataFrame(future_demand)
                future.index = list(range(len(actual) - 18, len(actual)))
                future_accuracy = 1 - smape_loss(actual.iloc[len(actual) - 18:len(actual), :], future)
                result_stock_hype[name] = [new_accuracy, new_smape, future_demand,
                                           new_predicted_demand[:, 0],
                                           real_demand, future_accuracy]
                # model.save("LSTM_model/stock/hyper/%s.keras" % name)
                plotting_3618(name, result_stock_hype[name], future_periods, hyper=True, stock=True)
            else:
                # future = predictfuture_3618(dataset, step, new_model, future_periods)
                # future_demand = scaler.inverse_transform(future)[:, 0]
                # actual = pd.DataFrame(real_demand.reset_index(drop=True))
                # future = pd.DataFrame(future_demand)
                # future.index = list(range(len(actual) - 18, len(actual)))
                # future_accuracy = 1 - smape_loss(actual.iloc[len(actual) - 18:len(actual), :], future)
                result_stock_hype_other[name] = [new_accuracy, new_smape]
                print("cannot exceed 80% after adding stock price and hyperparameter tunning: ")
                print(name)
    else:
        result_stock_other[name] = [accuracy, smape, X_train, X_test, y_train, y_test]


In [None]:
with open("LSTM_results/3618/result_afterstock.pkl", 'wb') as f:
    pickle.dump(result_stock, f)
with open("LSTM_results/3618/result_afterstockhype.pkl", 'wb') as f:
    pickle.dump(result_stock_hype, f)
with open("LSTM_results/3618/result_still80downafteralltunning.pkl", 'wb') as f:
    pickle.dump(result_stock_hype_other, f)
with open("LSTM_results/3618/result_stock_other.pkl", 'wb') as f:
    pickle.dump(result_stock_other, f)