# Define Constant

In [None]:
Epochs = 100
Batch_Size = 32

# Call Libraries

In [None]:
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout,RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime
import os
import joblib

warnings.filterwarnings('ignore')

In [None]:
%run Common.ipynb

# Define Functions

## Create dataset Sequence

In [None]:
def Create_Seq(data, p, Horizon):

    X,y = [], []

    """
    for i in range(len(data) - p - Horizon):

        X.append(data[i:i + p])
        y.append(data[i + p + Horizon ])
    """

    for i in range(len(data) - p):

        X.append(data[i:i + p])
        y.append(data[i + p ])

    return np.array(X) ,np.array(y)

## Build RNN model

In [None]:
def Build_Model(X_train, Horizon, p , times = 2):

    """
    model = Sequential([])

    # Encoder
    model.add( LSTM(p, return_sequences=True, input_shape=(X_train.shape[1], 1)) )
    model.add(Dropout(0.2))

    # LAST ENCODER LSTM â†’ 2D output
    model.add(LSTM(p, return_sequences=False))
    model.add(Dropout(0.2))


    # Decoder
    for _ in range(times):

        model.add(LSTM(Horizon, return_sequences=True))
        model.add(Dropout(0.2))

    model.add(TimeDistributed(Dense(25)))
    model.add(TimeDistributed(Dense(1)))

    """

    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])




    model.compile(optimizer='adam', loss='huber')

    return model

## Fit model

In [None]:
def Fit_Model(  X_train, y_train , X_test, y_test , Horizon, p , patience = 12):


    model = Build_Model(X_train, Horizon, p )


    early_stop = EarlyStopping(
        monitor='val_loss',
        patience = patience,
        restore_best_weights=True,
        verbose=1
    )

    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=Epochs,
        batch_size = Batch_Size,
        callbacks=[early_stop],
        verbose=0
    )


    print(f"\nTraining complete. Best model is in memory (epoch {len(history.history['loss'])}).")

    return model

## Overall RNN Process for each

In [None]:
def RNN(data_df , p , t , Horizon ):

    scaler = MinMaxScaler((0,1))
    closes = data_df['Close'].values.reshape(-1, 1)
    scale_data = scaler.fit_transform(closes)

    X,y = Create_Seq(scale_data, p, Horizon)

    v = int(len(X) * t )

    X_train, X_test = X[:v] , X[v:]
    y_train, y_test  = y[:v] , y[v:]


    model = Fit_Model( X_train, y_train , X_test, y_test, Horizon, p)


    pred_scaled = model.predict(X_test)

    pred_price = scaler.inverse_transform(pred_scaled).flatten()
    actu_price = scaler.inverse_transform(y_test).flatten()


    data_df['Close_Pred'] = np.nan
    # data_df.loc[data_df.index[v + p + Horizon:] , 'Close_Pred'] = pred_price
    data_df.loc[data_df.index[v + p:] , 'Close_Pred'] = pred_price



    # Skip part of the old data for better presentation
    data_df = data_df.tail( len(pred_price) + 90)

    mae = mean_absolute_error(actu_price, pred_price)


    return mae, model, data_df,scaler


# Main Process of Notebook

In [None]:
def Main_RNN(data, Horizon, min_p , gp ):

    Code = data['Code'].drop_duplicates().values[0]

    best_result_List = []

    # For each validation fraction
    for i in range(6,7):

        t = i / 10
        max_p = int(len(data) / 4) # not enough training data if p length too long

        best_result = None

        # For each seq_len
        for p in range(min_p , max_p , int(min_p/gp) ):

            print("=" * 100)
            print(f"Training for Seq_len = {p}/{max_p} with validation fraction = {t} ")

            mae, model, return_df, scaler = RNN(data , p , t , Horizon)

            output_df = return_df.copy()

            if best_result == None  :

                best_result =  [mae, model, output_df, t, p , scaler]


            elif mae < best_result[0]:

                best_result = [mae, model, output_df, t, p , scaler]



        best_result_List.append(best_result)




    # BackUp Previous Folder
    ThisFolder = f"{RNN_PATH}/{Code}"
    CurrFolder = f"{ThisFolder}/Current"
    BackUpName = f"{ThisFolder}/{datetime.now().strftime("%Y%m%d")}"
    fileName   = f'RNN_{Code}'


    if os.path.exists(CurrFolder):

        os.rename(CurrFolder , BackUpName)

    os.makedirs(CurrFolder)


    # Create directory if it doesn't exist
    if not os.path.exists(CurrFolder):
        os.makedirs(CurrFolder)


    # Save Best Result
    for id in range(len(best_result_List)):

        display_id = id + 1

        best_result = best_result_List[id]

        mae   = best_result[0]
        model = best_result[1]
        df    = best_result[2]
        t     = best_result[3]
        p     = best_result[4]
        scler = best_result[5]


        title = f'Id: {display_id}\nRNN({p} with training Fraction = {t})'
        name = f'RNN({p})'


        Plot_Result(df, title, Code, model = 'RNN' , name = name , IsSave = True)


        # Save Model + Scaler
        model.save(f"{CurrFolder}/{fileName}_{display_id}.keras")
        joblib.dump(scaler, f"{CurrFolder}/{fileName}_{display_id}_scaler.pkl")
