In [None]:
import logging
import tensorflow as tf


from src.common.plots import Visualize as V
from src.data.get_data import CSVsLoader
from src.common.logs import setup_logging, log_model_info
from src.features.build_features import FeatureEngineering as FE

from src.models_service.models_service import TensorflowDataPreparation as TFDataPrep
from src.models_service.models_service import TensorflowModelService as TFModelService
from src.models_service.errors import ErrorsCalculation as ErrorCalc
from env import Env


logger = setup_logging(logger_name=__name__,
                        console_level=logging.INFO, 
                        log_file_level=logging.INFO)

DATA_DIR_PROCESSED = (f'{Env.PROJECT_ROOT}/data/03_processed/daily_full')

config = {
    'AV': {
        'key': '',
        'ticker': 'MSFT',
        'outputsize': 'full',
        'key_adjusted_close': 'Adj Close',
        'key_volume': 'Volume',
    },
    'data': {
        'test_size': 0.05,
    }, 
    'model': {
        'name': 'LSTM', 
        'window': [10, 20, 40, 60, 120, 240],
        'batch_size' : 32,
        'epochs' : 500,
        'shuffle_buffer_size' : 5500, # https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle
        'optimizer': 'adam',
        'loss': 'huber_loss',
    },
    'plots': {
        'loss_zoom': 0.9,
        'show': False,
    },
}

In [None]:
# -----------------------------Data----------------------------------------
df = CSVsLoader(ticker=config['AV']['ticker'], directory=DATA_DIR_PROCESSED)
df = FE.create_features(df, logger)
df_train, df_test = TFDataPrep.split_train_test(df, config['data']['test_size'], logger)

df_train_X = df_train.drop(columns=['Adj Close'])
df_train_y = df_train['Adj Close']

df_test_X = df_test.drop(columns=['Adj Close'])
df_test_X = FE.rename_shifted_columns(df_test_X)
df_test_y = df_test['Adj Close']


train_dataset_X, scalers_X = TFDataPrep.windowed_dataset_X(df_train_X, 
                                                            window_size=config['model']['window'], 
                                                            logger=logger,
                                                            verbose=False)
train_dataset_y = TFDataPrep.windowed_dataset_y(df_train_y, 
                                    window_size=config['model']['window'], 
                                    logger=logger,
                                    verbose=False)
train_dataset = TFDataPrep.combine_datasets(train_dataset_X, train_dataset_y, config, logger, verbose=True)

In [None]:
# -----------------------------Model Architecture--------------------------
model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(None, 2)),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1),
        ]
    )

model = TFModelService.name_model(model, config)
log_model_info(config, model, logger)


# -----------------------------Model Training-------------------------------
model.compile(loss=config['model']['loss'], 
            optimizer=config['model']['optimizer'], 
            metrics=['mae','mape'],
            )    

history = model.fit(train_dataset, epochs=config['model']['epochs'])

# Plot MAE and Loss
mae=history.history['mae']
loss=history.history['loss']
zoom = int(len(mae) * config['plots']['loss_zoom'])

V.plot_series(x=range(config['model']['epochs'])[-zoom:],
                y=(mae[-zoom:],loss[-zoom:]),
                model_name=config['model']['name'],
                title='MAE_and_Loss',
                xlabel='Epochs',
                ylabel=f'MAE and Loss',
                legend=['MAE', f'Loss - {config["model"]["loss"]}'],
                show=config['plots']['show'],
            )

In [None]:
# # Save the model
TFModelService.save_model(model=model, logger=logger)    
TFModelService.save_scalers(scalers=scalers_X, model_name=model._name ,logger=logger)


# #------------------------Load the model if necessary--------------------------
# model_scaler_name = 'MSFT_LSTM_W10_SBS5500_B32_E500_P42113_2023_10_09__15_49'
# model = TFModelService.load_model(model_name=model_scaler_name, logger=logger)
# scalers_X = TFModelService.load_scalers(model_name=model_scaler_name, logger=logger)

In [None]:
# -----------------------------Predictions-----------------------------------
results = TFModelService.model_forecast(model=model, 
                                        df=df_test_X,
                                        window_size=TFModelService.get_window_size_from_model_name(model._name),
                                        scalers=scalers_X,
                                        verbose=False)

df_test_plot_y = TFModelService.prep_test_df_shape(df_test_y, config)

V.plot_series(  x=df_test_plot_y.index,  # as dates
                y=(df_test_plot_y, results),
                model_name=config['model']['name'],
                title='Predictions',
                xlabel='Date',
                ylabel='Price',
                legend=['Actual', 'Predicted'],
                show=config['plots']['show'],)

In [None]:
# -----------------------Calculate Errors----------------------------------
naive_forecast = ErrorCalc.get_naive_forecast(df).loc[df_test_plot_y.index] # Getting same days as results
rmse, mae, mape, mase = ErrorCalc.calc_errors(df_test_plot_y, results, naive_forecast)
ErrorCalc.save_errors_to_table(model._name, {'rmse': rmse, 'mae': mae, 'mape': mape, 'mase': mase})