# Define Constant

In [None]:
Epochs = 100
Batch_Size = 32

# Call Libraries

In [None]:
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout,RepeatVector, TimeDistributed, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime
import os
import shutil
import joblib
import gc

warnings.filterwarnings('ignore')

In [None]:
%run Common.ipynb

# Define Functions

## Create dataset Sequence

In [None]:
def Create_Seq(data, p , Horizon):

    X, y , dates = [],[] , []

    for i in range( len(data) - p - Horizon + 1 ):

        X_data = data[i: i + p]
        y_data = data[i + p : i + p + Horizon]

        X.append(X_data['Scaled_Close'].values.reshape(-1,1) )
        y.append(y_data['Scaled_Close'].values.reshape(-1,1))

        dates.append(y_data['Date'].tolist())


        # print(f'Use {X_data.iloc[0]['Date'].date()} to {X_data.iloc[-1]['Date'].date()} to predict stock price from {y_data.iloc[0]['Date'].date()} - {y_data.iloc[-1]['Date'].date()}')

    return np.array(X), np.array(y), dates

## Build RNN model

In [None]:
def Build_Model( Horizon, p, units = 64, dropout = 0.2, layers = 2):

    model = Sequential()

    # -------------- Encoder --------------
    for i in range(layers):

        model.add(LSTM(units , 
                       input_shape=(p, 1) if i == 0 else None,
                       return_sequences = True))
        
        model.add(Dropout(dropout))

    
    # -------------- Bottleneck --------------
    model.add(LSTM(units, return_sequences=False))
    model.add(Dropout(dropout))

    
    
    # -------------- RepeatVector --------------
    model.add(RepeatVector(Horizon))


    # -------------- Decoder --------------    
    for _ in range(layers):
        model.add(LSTM(units, return_sequences = True))
        model.add(Dropout(dropout))

    model.add(TimeDistributed(Dense(1)))


    # ---------- Compile ----------
    model.compile(
        optimizer='adam',
        loss='mse',                
        metrics=['mae']
    )


    return model


## Fit model

In [None]:
def Fit_Model(  X_train, y_train , X_test, y_test , Horizon, p , patience = 35):

    model = Build_Model( Horizon , p )

    early_stop = EarlyStopping(
        monitor='val_loss',
        patience = patience,
        restore_best_weights=True,
        min_delta=1e-6,
        verbose = 0
    )


    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=Epochs,
        batch_size = Batch_Size,
        callbacks=[early_stop],
        verbose=0
    )


    return model


## Handle Predict Result and Aggregate

In [None]:
def Predict_Result_Agg(predicts,dates, scaler):

    results = []
    order_counter = 1

    for predictList, dateList in zip(predicts,dates ):

        for predict, date in zip(predictList , dateList):

            result = {"Date": date.date(), "Predict" : predict[0], "Order": order_counter}

            results.append(result)
            order_counter += 1


    result_df = pd.DataFrame(results)


    aggregated_result = result_df.copy()
    aggregated_result = aggregated_result.groupby(['Date']).agg(
        count=('Predict', 'count'),
        mean=('Predict', 'mean'),
        Predict_Order_Sum=('Predict', lambda x: (x * result_df.loc[x.index, 'Order']).sum()),
        Order_Sum=('Order', 'sum')
    ).reset_index()


    # Weighted Predict
    aggregated_result['Weighted_Mean'] = aggregated_result['Predict_Order_Sum'] / aggregated_result['Order_Sum']
    aggregated_result = aggregated_result.drop(columns=['Predict_Order_Sum', 'Order_Sum'])


    # Rename + Scaler
    aggregated_result = aggregated_result.rename(columns={'mean': 'Close_Pred'})
    aggregated_result["Close_Pred"] = scaler.inverse_transform(aggregated_result[['Close_Pred']])


    result_df = result_df.rename(columns={'Predict': 'Close_Pred'})
    result_df["Close_Pred"] = scaler.inverse_transform(result_df[['Close_Pred']])

    
    
    return aggregated_result, result_df

## Overall RNN Process for each

In [None]:
def RNN(input_df, p, t, h , id, d ):

    scaler = MinMaxScaler((0,1))
    scaler.fit(input_df[['Close']])
    input_df['Scaled_Close'] = scaler.transform(input_df[['Close']])


    Code = input_df['Code'].values[0]
    
    X, y, dates = Create_Seq(input_df, p , h)


    # Split Data to training and Validation dataset
    v = int(len(X) * t)
    X_train, X_test = X[:v], X[v:]
    y_train, y_test = y[:v], y[v:]  
    d_test = dates[v:]

    
    # Fit Model and predict validation dataset
    model = Fit_Model(  X_train, y_train , X_test, y_test , h, p)

    predicts = model.predict(X_test)
    agg_df,indi_df = Predict_Result_Agg(predicts,d_test , scaler)


    # Join 2 df
    agg_df['Date'] = pd.to_datetime(agg_df['Date'])
    merged_df = pd.merge(input_df[['Code', 'Date', 'Close']], agg_df[['Date', 'Close_Pred']], on='Date', how='left')


    indi_df['Date'] = pd.to_datetime(indi_df['Date'])
    indi_df = pd.merge( indi_df[['Date', 'Close_Pred' , 'Order']] , input_df[['Code', 'Date', 'Close']] , on='Date', how='left')
    indi_df = indi_df[['Date', 'Close' , 'Close_Pred' , 'Order']]
    

    # Get Mean Absolute Error
    merged_df_cleaned = merged_df.dropna(subset=['Close_Pred']).copy()
    mae = mean_absolute_error(merged_df_cleaned['Close'], merged_df_cleaned['Close_Pred'])

    mae_loss = tf.keras.losses.MeanAbsoluteError()
    mae_model = float(mae_loss(indi_df['Close'], indi_df['Close_Pred']).numpy())


    print(f'Mean Absolute Error: {round(mae,3)} + {round(mae_model,3)}')

    # Omit the old data for better presentation
    data_df = merged_df.tail( len(predicts) + 50)       


    
    return mae,mae_model, model, data_df, indi_df

    