# Mod' Building

Given the processed data and the mod architecture, I compute the predictions and the Tensor Board for:

- LSTM: 1 Hidden Layer no Tunning
- LSTM: 2 Hidden Layers no Tunning
- LSTM: 3 Hidden Layers no Tunning

#

- LSTM: 1 Hidden Layer with Tunning
- LSTM: 2 Hidden Layers with Tunning
- LSTM: 3 Hidden Layers with Tunning

In [None]:
# Install Requirements
%pip install -r ../requirements.txt

### Import Libraries

In [None]:
# Warnings
import warnings
warnings.filterwarnings('ignore')

# Import Basic
import os
import datetime as dt
from pathlib import Path
import pydot

# Statistics
import random # functions for generating random numbers

# Data Manipulation
import numpy as np
import pandas as pd
import pandas_ta as ta # Extends the capabilities of Pandas for financial technical analysis


# Plotting & Outputs
from pprint import pprint
import matplotlib.pyplot as plt
plt.style.use('seaborn')

# Import custom transformer
# from helper import DayTransformer, TimeTransformer

## Feature Engineering

# Feature Selection
from boruta import BorutaPy

# Feature Transformation
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder

# Organize Training and Testing Data
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit, cross_val_score

## Build and Evaluate Model

# Tensorflow - LSTM Models 
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator 

# Model Architecture and Optimization
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, BatchNormalization
from tensorflow.keras.losses import BinaryCrossentropy 
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.layers import Dropout, Dense, Flatten
from tensorflow.keras.utils import plot_model

from tensorflow.keras.optimizers import Adam, RMSprop 

# Ouput Metrics
from tensorflow.keras.metrics import BinaryAccuracy, Accuracy, AUC, Precision, Recall

# Model Evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import RocCurveDisplay, ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, recall_score, f1_score

# Tuning - Kerastuner
import keras_tuner as kt
from kerastuner import HyperParameter, HyperParameters
from kerastuner.tuners import RandomSearch, Hyperband, BayesianOptimization

pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 1000)

### Import Other functions and Architecture

In [None]:
# Import Other Functions
from aux_functions import *

# Import Model Architecture
from mod_architecture import *

# Import Tune Models
from mod_tuning import *

### Set Path

In [None]:
## Results Path
m_fit_path = Path('../Output/Model_fit')
m_arch_path = Path('../Plot/Model_architecture')
tb_path = Path('../Output/TensorBoard')
tun_path = Path('../Output/Tuning')
text_path = Path('../Text')

if not m_fit_path.exists():
    m_fit_path.mkdir(parents=True)
    
if not m_arch_path.exists():
    m_arch_path.mkdir(parents=True)
    
if not tb_path.exists():
    tb_path.mkdir(parents=True)
    
if not tun_path.exists():
    tun_path.mkdir(parents=True)
    
if not text_path.exists():
    text_path.mkdir(parents=True)

### Load Data

In [None]:
## Random
random = 10
set_seeds(seed=random)

## Load dataset
df = pd.read_csv('../../Input/feat_final.csv', index_col=0)

## Set Features and Target Variable
X, y = df.iloc[:,:-1].values, df.iloc[:,-1].values

## Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random, shuffle=False)  

## Output the train and test data size
print(f"Train and Test Size {X_train.shape}, {y_train.shape}, {X_test.shape}, {y_test.shape}")

## Sequence length
seqlen = pd.read_csv('../Output/Variables/seqlen.csv', header=None).iloc[0, 0]

## Number of features
numfeat = X.shape[1] 

### Generate train and test sequence data

In [None]:
## Generate train and test sequence data
batch_size = 64
tsg_train  = TimeseriesGenerator(X_train, y_train, length=seqlen, batch_size=batch_size)
tsg_test   = TimeseriesGenerator(X_test, y_test, length=seqlen, batch_size=batch_size)

## Verify Sequence
# for i in range(len(tsg_train)): # len(tsg_train) is the number of sequences
#     a, b = tsg_train[i]
#     print(a.shape, b.shape) # Batch size in each sequence, length, feature size

## 1.1 - LSTM: 1 Hidden Layer no Tunning 
### Fit/Prediction

In [None]:
## Intent Model 
lstm_11 = create_lstm_h1_noTun( hu=10, lookback=seqlen, features=numfeat )

## Plot Model Architecture
# plot_model(lstm_11, 
#           to_file= (m_arch_path / 'model11.pdf').as_posix(),
#           show_shapes=True, # Include the shapes of the data (input and output shapes of the layers)
#           show_layer_names=True) # Displayed the names of the layers in the diagram

# ## Summary
# lstm_11.summary()

## Model fitting -  Since I have a balanced data, no need: class_weight
lstm_11.fit( tsg_train, # Using generate TS data
             epochs= 400, # 500 
             verbose=1, 
             shuffle=False,
             callbacks= create_callbacks(
                        filepath = (m_fit_path / 'm11.h5').as_posix(),
                        log_dir=os.path.join("../Output/TensorBoard/Fiting/m11_logs", dt.datetime.now().strftime("%Y%m%d-%H%M%S")), 
                        monitor='loss', 
                        patience=10 )                              
            )

## Load Model
#model = load_model( (m_fit_path / 'm11.h5').as_posix() )

### Export Predictions

## Predic Model - Train
ypred_train = np.where( lstm_11.predict(tsg_train, verbose=False) > 0.5, 1, 0)

## Predic Model - Test
ypred_test  = np.where( lstm_11.predict(tsg_test, verbose=False) > 0.5, 1, 0)

## Probability Model
yprob = lstm_11.predict(tsg_test)

## Save
pd.DataFrame(ypred_train).to_csv('../Output/Variables/m11_ypred_train.csv', index=False)
pd.DataFrame(ypred_test).to_csv('../Output/Variables/m11_ypred_test.csv', index=False)
pd.DataFrame(yprob).to_csv('../Output/Variables/m11_yprob.csv', index=False)

## 2.1 - LSTM: 2 Hidden Layer no Tunning 
### Fit/Prediction

In [None]:
## Intent Model 
lstm_21 = create_lstm_h2_noTun( hu=10, dropout=0.5, lookback=seqlen, features=numfeat )

## Plot Model Architecture
# plot_model(lstm_21, 
#           to_file= (m_arch_path / 'model21.pdf').as_posix(),
#           show_shapes=True, # Include the shapes of the data (input and output shapes of the layers)
#           show_layer_names=True) # Displayed the names of the layers in the diagram

# # Summary
# lstm_21.summary()

## Model fitting - Since I have a balanced data, no need: class_weight
lstm_21.fit( tsg_train, # Using generate TS data
             epochs= 500, # 1000
             verbose=1, 
             shuffle=False,
             callbacks= create_callbacks(
                        filepath = (m_fit_path / 'm21.h5').as_posix(),
                        log_dir=os.path.join("../Output/TensorBoard/Fiting/m21_logs", dt.datetime.now().strftime("%Y%m%d-%H%M%S")), 
                        monitor='loss', 
                        patience=10 )                              
            )

## Load Model
#model = load_model( (m_fit_path / 'm21.h5').as_posix() )

### Export Predictions

## Predic Model - Train
ypred_train = np.where( lstm_21.predict(tsg_train, verbose=False) > 0.5, 1, 0)

## Predic Model - Test
ypred_test  = np.where( lstm_21.predict(tsg_test, verbose=False) > 0.5, 1, 0)

## Probability Model
yprob = lstm_21.predict(tsg_test)

## Save
pd.DataFrame(ypred_train).to_csv('../Output/Variables/m21_ypred_train.csv', index=False)
pd.DataFrame(ypred_test).to_csv('../Output/Variables/m21_ypred_test.csv', index=False)
pd.DataFrame(yprob).to_csv('../Output/Variables/m21_yprob.csv', index=False)

## 3.1 - LSTM: 3 Hidden Layer no Tunning 
### Fit/Prediction

In [None]:
## Intent Model 
lstm_31 = create_lstm_h3_noTun( hu=10, dropout=0.5, lookback=seqlen, features=numfeat )

## Plot Model Architecture
# plot_model(lstm_31, 
#           to_file= (m_arch_path / 'model31.pdf').as_posix(),
#           show_shapes=True, # Include the shapes of the data (input and output shapes of the layers)
#           show_layer_names=True) # Displayed the names of the layers in the diagram

# # Summary
# lstm_31.summary()

## Model fitting - Since I have a balanced data, no need: class_weight
lstm_31.fit( tsg_train, # Using generate TS data
             epochs= 300,  
             verbose=1, 
             shuffle=False,
             callbacks= create_callbacks(
                        filepath = (m_fit_path / 'm31.h5').as_posix(),
                        log_dir=os.path.join("../Output/TensorBoard/Fiting/m31_logs", dt.datetime.now().strftime("%Y%m%d-%H%M%S")), 
                        monitor='loss', 
                        patience=10 )                              
            )

## Load Model
#model = load_model( (m_fit_path / 'm31.h5').as_posix() )

### Export Predictions

## Predic Model - Train
ypred_train = np.where( lstm_31.predict(tsg_train, verbose=False) > 0.5, 1, 0)

## Predic Model - Test
ypred_test  = np.where( lstm_31.predict(tsg_test, verbose=False) > 0.5, 1, 0)

## Probability Model
yprob = lstm_31.predict(tsg_test)

## Save
pd.DataFrame(ypred_train).to_csv('../Output/Variables/m31_ypred_train.csv', index=False)
pd.DataFrame(ypred_test).to_csv('../Output/Variables/m31_ypred_test.csv', index=False)
pd.DataFrame(yprob).to_csv('../Output/Variables/m31_yprob.csv', index=False)

## 1.2 - LSTM: 1 Hidden Layer with Tunning 
### Tuning

In [None]:
## Fit Hyperband Algorithm
hbtuner1 = kt.Hyperband( tune_lstm_h1, # Model
                         objective="val_loss",
                         max_epochs=20, #  20
                         hyperband_iterations=2, #3
                         directory="../Output/Tuning",
                         project_name="hb_m1",
                         overwrite=False,
                         seed=10
                       )  

## Run Hyperband Algorithm - Since I have a balanced data, no need: class_weight
hbtuner1.search( tsg_train, 
                 epochs=15, #25 more restrictive
                 validation_data=tsg_test, 
                 callbacks= tune_callbacks( monitor='loss', patience=3), # Set patience
                 shuffle=False,
                 verbose=0  #  disable progress bars
                )

## Display tuning results 
#hbtuner1.results_summary()

### Fit/Prediction

In [None]:
## Best Parameters
best_hb1 = hbtuner1.get_best_hyperparameters()[0]

## Intent Model 
lstm_12 = create_lstm_h1_Tun( best_hb1, lookback=seqlen, features=numfeat  )

## Print
print("Best Hyperparameters:")
for hp in best_hb1.space:
    print(f"{hp.name}: {best_hb1.get(hp.name)}")

# ## Plot Model Architecture
# plot_model(lstm_12, 
#           to_file= (m_arch_path / 'model12.pdf').as_posix(),
#           show_shapes=True, # Include the shapes of the data (input and output shapes of the layers)
#           show_layer_names=True) # Displayed the names of the layers in the diagram

# ## Summary
# lstm_12.summary()

## Model fitting - Since I have a balanced data, no need: class_weight
lstm_12.fit( tsg_train, # Using generate TS data
             epochs= 200, 
             verbose=1, 
             shuffle=False,
             callbacks= create_callbacks(
                        filepath = (m_fit_path / 'm12.h5').as_posix(),
                        log_dir=os.path.join("../Output/TensorBoard/Fiting/m12_logs", dt.datetime.now().strftime("%Y%m%d-%H%M%S")), 
                        monitor='loss', 
                        patience=5 )                              
            )

## Load Model
#model = load_model( (m_fit_path / 'm12.h5').as_posix() )

### Export Predictions

## Predic Model - Train
ypred_train = np.where( lstm_12.predict(tsg_train, verbose=False) > 0.5, 1, 0)

## Predic Model - Test
ypred_test  = np.where( lstm_12.predict(tsg_test, verbose=False) > 0.5, 1, 0)

## Probability Model
yprob = lstm_12.predict(tsg_test)

## Save
pd.DataFrame(ypred_train).to_csv('../Output/Variables/m12_ypred_train.csv', index=False)
pd.DataFrame(ypred_test).to_csv('../Output/Variables/m12_ypred_test.csv', index=False)
pd.DataFrame(yprob).to_csv('../Output/Variables/m12_yprob.csv', index=False)

## 2.2 - LSTM: 2 Hidden Layer with Tunning 
### Tuning

In [None]:
## Fit Hyperband Algorithm
hbtuner2 = kt.Hyperband( tune_lstm_h2, # Model
                         objective="val_loss",
                         max_epochs=20, #  20
                         hyperband_iterations=2, #3
                         directory="../Output/Tuning",
                         project_name="hb_m2",
                         overwrite=False,
                         seed=10
                       )

## Run Hyperband Algorithm - Since I have a balanced data, no need: class_weight
hbtuner2.search( tsg_train, 
                 epochs=15, #25  # more restrictive
                 validation_data=tsg_test, 
                 shuffle=False,
                 callbacks= tune_callbacks( monitor='loss', patience=3), # Set patience
                 verbose=0  #  disable progress bars
                )


## Display tuning results 
#hbtuner2.results_summary()

### Fit/Prediction

In [None]:
## Best Parameters
best_hb2 = hbtuner2.get_best_hyperparameters()[0]

## Intent Model 
lstm_22 = create_lstm_h2_Tun( best_hb2, lookback=seqlen, features=numfeat  )

## Print
#print("Best Hyperparameters:")
# for hp in best_hb2.space:
#     print(f"{hp.name}: {best_hb2.get(hp.name)}")


# ## Plot Model Architecture
# plot_model(lstm_22, 
#           to_file= (m_arch_path / 'model22.pdf').as_posix(),
#           show_shapes=True, # Include the shapes of the data (input and output shapes of the layers)
#           show_layer_names=True) # Displayed the names of the layers in the diagram

# # Summary
# lstm_22.summary()

## Model fitting - Since I have a balanced data, no need: class_weight
lstm_22.fit( tsg_train, # Using generate TS data
             epochs= 200, # 500 1000
             verbose=1, 
             shuffle=False,
             callbacks= create_callbacks(
                        filepath = (m_fit_path / 'm22.h5').as_posix(),
                        log_dir=os.path.join("../Output/TensorBoard/Fiting/m22_logs", dt.datetime.now().strftime("%Y%m%d-%H%M%S")), 
                        monitor='loss', 
                        patience=5 )                              
            )

## Load Model
#model = load_model( (m_fit_path / 'm22.h5').as_posix() )

### Export Predictions

## Predic Model - Train
ypred_train = np.where( lstm_22.predict(tsg_train, verbose=False) > 0.5, 1, 0)

## Predic Model - Test
ypred_test  = np.where( lstm_22.predict(tsg_test, verbose=False) > 0.5, 1, 0)

## Probability Model
yprob = lstm_22.predict(tsg_test)

## Save
pd.DataFrame(ypred_train).to_csv('../Output/Variables/m22_ypred_train.csv', index=False)
pd.DataFrame(ypred_test).to_csv('../Output/Variables/m22_ypred_test.csv', index=False)
pd.DataFrame(yprob).to_csv('../Output/Variables/m22_yprob.csv', index=False)

## 3.2 - LSTM: 3 Hidden Layer with Tunning 
### Tuning

In [None]:
## Fit Hyperband Algorithm
hbtuner3 = kt.Hyperband( tune_lstm_h3, # Model
                         objective="val_loss",
                         max_epochs=20, # 5
                         hyperband_iterations=2, #15
                         directory="../Output/Tuning",
                         project_name="hb_m3",
                         overwrite=False,
                         seed=10
                       )

## Run Hyperband Algorithm - Since I have a balanced data, no need: class_weight
hbtuner3.search( tsg_train, 
                 epochs=15, #50  # more restrictive
                 validation_data=tsg_test, 
                 callbacks= tune_callbacks( monitor='loss', patience=5), # Set patience
                 shuffle=False,
                 verbose=0  #  disable progress bars
                )

## Display tuning results 
#hbtuner3.results_summary()

### Prediction

In [None]:
## Best Parameters
best_hb3 = hbtuner3.get_best_hyperparameters()[0]

## Intent Model 
lstm_32 = create_lstm_h3_Tun( best_hb3, lookback=seqlen, features=numfeat  )

## Print
#print("Best Hyperparameters:")
# for hp in best_hb3.space:
#     print(f"{hp.name}: {best_hb3.get(hp.name)}")


# ## Plot Model Architecture
# plot_model(lstm_32, 
#           to_file= (m_arch_path / 'model32.pdf').as_posix(),
#           show_shapes=True, # Include the shapes of the data (input and output shapes of the layers)
#           show_layer_names=True) # Displayed the names of the layers in the diagram

# # Summary
# lstm_32.summary()

## Model fitting - Since I have a balanced data, no need: class_weight
lstm_32.fit( tsg_train, # Using generate TS data
             epochs= 200, # 500 
             verbose=1, 
             shuffle=False,
             callbacks= create_callbacks(
                        filepath = (m_fit_path / 'm32.h5').as_posix(),
                        log_dir=os.path.join("../Output/TensorBoard/Fiting/m32_logs", dt.datetime.now().strftime("%Y%m%d-%H%M%S")), 
                        monitor='loss', 
                        patience=5 )                              
            )

## Load Model
#model = load_model( (m_fit_path / 'm32.h5').as_posix() )

### Export Predictions

## Predic Model - Train
ypred_train = np.where( lstm_32.predict(tsg_train, verbose=False) > 0.5, 1, 0)

## Predic Model - Test
ypred_test  = np.where( lstm_32.predict(tsg_test, verbose=False) > 0.5, 1, 0)

## Probability Model
yprob = lstm_32.predict(tsg_test)

## Save
pd.DataFrame(ypred_train).to_csv('../Output/Variables/m32_ypred_train.csv', index=False)
pd.DataFrame(ypred_test).to_csv('../Output/Variables/m32_ypred_test.csv', index=False)
pd.DataFrame(yprob).to_csv('../Output/Variables/m32_yprob.csv', index=False)

## Tensor Board

In [None]:
%load_ext tensorboard 
# Access: http://localhost:6006
# %reload_ext tensorboard
# Clear: 
# cmd prompt --> taskkill /IM "tensorboard.exe" /F
# AppData/Local/Temp/.tensorboard-info

%tensorboard --logdir ../Output/TensorBoard/Fiting