In [None]:

#!pip install pmdarima



import sys,os

import pandas as pd
import numpy as np 

from matplotlib import pyplot as plt 
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import math

from pmdarima import auto_arima
from pmdarima import pipeline
from pmdarima import model_selection
from pmdarima import preprocessing as ppc
from pmdarima import arima


from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Bidirectional
from tensorflow.keras.layers import Dense, Dropout

from contextlib import contextmanager
@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout
     
###################################################################################################################################################
df_metrics=pd.DataFrame()
def metrics(test,prediction_test,final):
    df_metrics.at[0,"MAE_ARIMA"]=(mean_absolute_error(test,prediction_test))
    df_metrics.at[0,"MAE_Hybrid"]=(mean_absolute_error(test,final))
    df_metrics.at[0,"RMSE_ARIMA"]=(math.sqrt(mean_squared_error(test, prediction_test)))
    df_metrics.at[0,"RMSE_Hybrid"]=(math.sqrt(mean_squared_error(test, final)))
    df_metrics.at[0,"MSE_ARIMA"]=(mean_squared_error(test,prediction_test))
    df_metrics.at[0,"MSE_Hybrid"]=(mean_squared_error(test,final))
    df_metrics.at[0,"MAPE_ARIMA"]=(mean_absolute_percentage_error(test,prediction_test))
    df_metrics.at[0,"MAPE_Hybrid"]=(mean_absolute_percentage_error(test,final))
    return df_metrics

###################################################################################################################################################
def get_bilstm_dataframe(df,train,test):
  df_pred_train=train.to_frame()
  df_pred_test=test.to_frame()
  df_pred_test.columns=df_pred_train.columns
  df_temp=pd.concat([df_pred_train,df_pred_test],ignore_index=True)
  a=df.products_quantity.values-df_temp.values
  a=pd.DataFrame(df.products_quantity)
  df_bilstm=a-df_temp.values
  df_bilstm=df_bilstm.reset_index(drop=True)
  df_bilstm['price_per_unit']=df['price_per_unit'].to_numpy()
  return df_bilstm   

###################################################################################################################################################
def plot_arima(df_data,prediction_test,prediction_train):
    #plot arima predicition
    fig, ax = plt.subplots(1, 1, figsize=(25, 10))
    ax.plot(df_data)
    ax.plot(prediction_test)
    ax.plot(prediction_train)
    plt.axhline(0,color="black",alpha=0.3) #x-axis line
    plt.show()

def plot_loss(history):
    #plot Bilstm train-val loss
    fig, ax = plt.subplots(1, 1, figsize=(10, 4))
    plt.plot(history.history['loss'], label='Training loss')
    plt.plot(history.history['val_loss'], label='Validation loss')
    plt.legend()
    plt.show()
def plot_bilstm(df_bilstm,df_bilstm_prdictions):
    #plot Bilstm predictions
    fig, ax = plt.subplots(1, 1, figsize=(25, 10))
    ax.plot(df_bilstm.products_quantity)
    ax.plot(df_bilstm_prdictions)
    plt.axhline(0,color="black",alpha=0.3) #x-axis line
    plt.show()

###################################################################################################################################################
def Arima(df):
  d=arima.ndiffs(df.products_quantity)
  df_data = df.products_quantity.to_numpy()
  train, test = model_selection.train_test_split(df_data, train_size=int(len(df_data)*0.8))

  # create a pipeline with multiple stages... the  dataset is
  # seasonal, so we'll include a FourierFeaturizer so we can fit it without
  # seasonality

  with suppress_stdout():
    pipe = pipeline.Pipeline([
        ("fourier", ppc.FourierFeaturizer(m=52, k=26)),
        ("arima", auto_arima(train,start_p=0, d=d, start_q=0, 
                                          max_p=7, max_d=1, max_q=7,
                                          seasonal=False, 
                                          error_action='ignore',trace = True,
                                          supress_warnings=False,stepwise = True))])
    pipe.fit(train)
  

  prediction_test= pipe.predict(n_periods=len(test))
  prediction_train=pipe.predict_in_sample()



  resid=train-pipe.predict_in_sample()
  
  df_bilstm=get_bilstm_dataframe(df,prediction_train,prediction_test)

  return prediction_test,df_bilstm,prediction_train,test,pipe

###################################################################################################################################################
def BiLSTM(df_bilstm):
   

    size=int(len(df_bilstm)*0.8)
    df_for_training=df_bilstm[:size]
    

    #LSTM uses sigmoid and tanh that are sensitive to magnitude so values need to be normalized
    # normalize the dataset
    scaler = StandardScaler()
    scaler = scaler.fit(df_for_training)
    df_for_training_scaled = scaler.transform(df_for_training)


    
    trainX = []
    trainY = []

    n_future = 1   # Number of days we want to look into the future based on the past days.
    n_past = 3  # Number of past days we want to use to predict the future.


    #Reformat input data into a shape: (n_samples x timesteps x n_features)
    for n in range(n_past, len(df_for_training_scaled) - n_future +1):
        trainX.append(df_for_training_scaled[n - n_past:n, 0:df_for_training.shape[1]])
        trainY.append(df_for_training_scaled[n + n_future - 1:n + n_future, 0])

    trainX, trainY = np.array(trainX), np.array(trainY)




    # define the model
    model = Sequential()
    model.add(Bidirectional(LSTM(32, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False)))
    #model.add(Bidirectional(LSTM(4, activation='relu', return_sequences=False)))
    model.add(Dropout(0.2))
    model.add(Dense(trainY.shape[1]))

    model.compile(optimizer='adam', loss='mse')
    
    with suppress_stdout():
      # fit the model
      history = model.fit(trainX, trainY, epochs=30, batch_size=16, validation_split=0.1, verbose=1)
      prediction = model.predict(trainX[-(len(df_bilstm)-size):]) #shape = (n, 1) where n is the n_days_for_prediction


    prediction_copies = np.repeat(prediction, df_bilstm.shape[1], axis=-1)
    y_pred_future = scaler.inverse_transform(prediction_copies)[:,0]
    df_bilstm_prdictions=pd.DataFrame(y_pred_future)
    df_bilstm_prdictions.index=df_bilstm[size:].index


    return df_bilstm_prdictions,history

###################################################################################################################################################
def format_data_for_future_predictions(df_new,price):
  x=int(len(df_new)/4+len(df_new)%4)
  for i in range (0,x):
    df_new.loc[len(df_new)]=[1,price,0,0]
  return df_new


###################################################################################################################################################
df_temp=pd.DataFrame()
def predict(df,plot_graphs=False,training=False,Price=0):
  n=len(df)

  if training==False:
    df=format_data_for_future_predictions(df,Price)

  # ARIMA prediction
  prediction_test,df_bilstm,prediction_train,test,model=Arima(df)

  if plot_graphs==True:
    plot_arima(df.products_quantity.to_numpy(),prediction_test,prediction_train)

  # BiLSTM correction
  df_bilstm_prdictions,history=BiLSTM(df_bilstm)

  if plot_graphs==True:
    #plot_loss(history)
    plot_bilstm(df_bilstm,df_bilstm_prdictions)

  # metrics to evaluate the predictions accuracy in case of training.
  final_pred=df_bilstm_prdictions[0]+prediction_test
  df_temp=metrics(test,prediction_test,df_bilstm_prdictions[0]+prediction_test)

  if training==False:
    df.drop(df.tail(n).index,inplace = True)
  
  return final_pred.to_frame(),prediction_test.to_frame(),df_bilstm_prdictions,df_temp,model