In [1]:
!pip install --upgrade mlflow dagshub -q
!pip install pyngrok -q
!pip install --upgrade keras_tuner -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.0/29.0 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m73.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m260.1/260.1 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction')

import os
import joblib
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from tabulate import tabulate

from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column, row
from bokeh.palettes import Category10
from bokeh.models import ColumnDataSource, HoverTool, Legend

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import (
    LSTM,
    Dense,
    Dropout,
    Input,
    BatchNormalization,
    Bidirectional
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau
)
from tensorflow.keras.regularizers import L1L2, l2
from tensorflow.keras.metrics import (
    MeanAbsolutePercentageError,
    RootMeanSquaredError,
    MeanAbsoluteError,
    MeanSquaredError
)

from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    root_mean_squared_error
)

from config import *

from mlflow.models.signature import infer_signature
from bokeh.plotting import output_file, save

import dagshub
import mlflow

output_notebook()

Mounted at /content/drive


In [3]:
dagshub.init(repo_owner='bojte.csongor', repo_name='stock_market_prediction_thesis', mlflow=True)

mlflow.set_tracking_uri("https://dagshub.com/bojte.csongor/stock_market_prediction_thesis.mlflow")
mlflow.set_experiment(experiment_name="GRU")

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=b9847575-f661-42e7-ad06-8d003e9bfa5b&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=7dac3c2a270e3f5cd57bd9dd0f31ab2d768e78d7e04bd84e0c46ca5e226e84c2




<Experiment: artifact_location='mlflow-artifacts:/d10aec8d63d244508f6c54660dee98d4', creation_time=1745051301607, experiment_id='2', last_update_time=1745051301607, lifecycle_stage='active', name='GRU', tags={}>

In [None]:
seed=42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ["TF_DETERMINISTIC_OPS"] = "1"
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)

In [None]:
def load_datasets(data_path, dataset_name):
    train_data = pd.read_csv(f"{data_path}/{dataset_name}_train.csv", index_col=0)
    val_data = pd.read_csv(f"{data_path}/{dataset_name}_val.csv", index_col=0)
    test_data = pd.read_csv(f"{data_path}/{dataset_name}_test.csv", index_col=0)

    try:
        train_data.index = pd.to_datetime(train_data.index, utc=True),
        val_data.index = pd.to_datetime(val_data.index, utc=True),
        test_data.index = pd.to_datetime(test_data.index, utc=True)
    except:
        pass

    scaler = joblib.load(f"{data_path}/{dataset_name}_scaler.joblib")
    return train_data, val_data, test_data, scaler

In [None]:
def create_sequences(data, feature_cols, target_cols, sequence_length):
    features = data[feature_cols].values
    targets = data[target_cols].values

    n_samples = len(data) - sequence_length
    n_features = len(feature_cols)
    n_targets = len(target_cols)

    X = np.zeros((n_samples, sequence_length, n_features))
    y = np.zeros((n_samples, n_targets))

    for i in range(n_samples):
        X[i] = features[i:i+sequence_length]
        y[i] = targets[i+sequence_length-1]

    return X, y

In [None]:
def create_all_sequences(train,val,test, feature_cols, target_cols, sequence_length):
    X_train, y_train = create_sequences(
        data=train,
        feature_cols=feature_cols,
        target_cols=target_cols,
        sequence_length=sequence_length,
    )
    X_val, y_val = create_sequences(
        data=val,
        feature_cols=feature_cols,
        target_cols=target_cols,
        sequence_length=sequence_length,
    )
    X_test, y_test = create_sequences(
        data=test,
        feature_cols=feature_cols,
        target_cols=target_cols,
        sequence_length=sequence_length,
    )
    return X_train, y_train, X_val, y_val, X_test, y_test

In [None]:
feature_cols_custom = ['Custom_Normalized']
target_cols_custom = ['Target']
sequence_length = 10

train_log_data_custom, val_log_data_custom, test_log_data_custom, custom_log_scaler = load_datasets(
    data_path=f"{PROCESSED_DATA_PATH}/custom_split_first",
    dataset_name="log_data"
)

X_train_log_custom, y_train_log_custom, X_val_log_custom, y_val_log_custom, X_test_log_custom, y_test_log_custom = create_all_sequences(
    train=train_log_data_custom,
    val=val_log_data_custom,
    test=test_log_data_custom,
    feature_cols=feature_cols_custom,
    target_cols=target_cols_custom,
    sequence_length=sequence_length,
)

In [None]:
print("X train shape:",X_train_log_custom.shape)
print("y train shape:", y_train_log_custom.shape)

X train shape: (1925, 10, 1)
y train shape: (1925, 1)


In [None]:
def inverse_transform_simple(df, scaler, log_scaled=False):
    df = df.copy()
    inverse_scaled = scaler.inverse_transform(df)

    if log_scaled:
        inverse_scaled = np.exp(inverse_scaled)

    return inverse_scaled

In [None]:
def inverse_custom_normalize(normalized_value, last_value, index, n):
    if index == 0:
        return 0
    part1 = last_value * (index / n)
    sqrt_part = np.sqrt(index**2 + ((last_value * index) / n)**2)
    part2 = normalized_value * (sqrt_part / index)
    return part1 + part2

def add_first_value(data, first_value):
    return data + first_value

def inverse_transform_custom(arr, scaler, n, first_value, last_value, train_data,val_data,column_name='Custom_Normalized', log_scaled=False):
    start_index = len(train_data) + len(val_data)
    original_indices = np.arange(start_index, start_index + len(arr))

    inverse_minmax = scaler.inverse_transform(arr)[:,0]

    df_real = pd.DataFrame(inverse_minmax, columns=[column_name],
    index=original_indices)

    real_values = []

    for i, index in enumerate(df_real.index):
        real_value = inverse_custom_normalize(df_real.iloc[i, 0], last_value, index+sequence_length, n)
        real_values.append(real_value)

    real_values = add_first_value(np.array(real_values), first_value)

    if log_scaled:
        real_values = np.exp(real_values)

    return real_values

In [None]:
log_data_path = f"{PROCESSED_DATA_PATH}/custom_split_first/log_data_custom_scaler.csv"
raw_data_log = pd.read_csv(log_data_path)

first_value_log = raw_data_log['first_value'].iloc[0]
first_index_log = 0
last_value_log = raw_data_log['last_value'].iloc[0]
last_index_log = raw_data_log['last_index'].iloc[0]

In [None]:
def log_run_metadata(params: dict, tags: dict):
    for k, v in params.items():
        mlflow.log_param(k, v)
    for k, v in tags.items():
        mlflow.set_tag(k, v)

In [None]:
def evaluate_and_log_metrics(model, NORM_TYPE, X_test, y_test, scaler, model_name, custom, log_scaled, last_index, first_value, last_value, train_data,val_data):
    if(NORM_TYPE == 'minmax_split_first_log'): log_scaled = True;
    y_pred = model.predict(X_test)

    if custom:
      y_pred_ext = np.hstack([y_pred, np.zeros_like(y_pred)])
      y_pred_real = inverse_transform_custom(y_pred_ext, scaler, last_index, first_value, last_value, train_data,val_data,log_scaled=log_scaled)

      y_test_ext = np.hstack([y_test, np.zeros_like(y_test)])
      y_test_real = inverse_transform_custom(y_test_ext, scaler, last_index, first_value, last_value, train_data,val_data,log_scaled=log_scaled)
    else:
      y_pred_ext = np.hstack([y_pred, np.zeros_like(y_pred)])
      y_pred_real = inverse_transform_simple(y_pred_ext, scaler,log_scaled=log_scaled)[:, 0].reshape(-1, 1)

      y_test_ext = np.hstack([y_test, np.zeros_like(y_test)])
      y_test_real = inverse_transform_simple(y_test_ext, scaler,log_scaled=log_scaled)[:, 0].reshape(-1, 1)

    metrics = evaluate_predictions(model_name, y_test_real, y_pred_real, should_print=True)

    mlflow.log_metric('mape', metrics['mape'])
    mlflow.log_metric('mse', metrics['mse'])
    mlflow.log_metric('mae', metrics['mae'])
    mlflow.log_metric('mpd', metrics['mpd'])
    mlflow.log_metric('rmse', metrics['rmse'])

    return y_test_real, y_pred_real

In [None]:
def get_callbacks(model_name, save_path):
    checkpoint_path = os.path.join(save_path)

    callbacks = [
        ModelCheckpoint(
            filepath=checkpoint_path,
            monitor='val_loss',
            mode='min',
            save_best_only=True,
            save_weights_only=False,
            verbose=1
        ),
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            mode='min',
            restore_best_weights=True,
            verbose=0
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1,
            mode='min'
        ),
    ]

    return callbacks

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, GRU, Dense, LayerNormalization, Dropout
from tensorflow.keras.regularizers import l2

def create_gru_model(
    input_shape,
    units,
    activation,
    loss,
    optimizer,
    use_layer_norm = False,
    use_regularization = False,
    dropout = 0.0
):
    model = Sequential()
    model.add(Input(shape=input_shape))

    layers = len(units)

    for i in range(layers):
        return_seq = i < layers - 1

        gru_kwargs = {
            "units": units[i],
            "activation": activation,
            "return_sequences": return_seq
        }

        if use_regularization:
            gru_kwargs.update({
                "recurrent_regularizer": l2(),
                "activity_regularizer": l2(),
                "kernel_regularizer": l2(),
            })

        model.add(GRU(**gru_kwargs))

        if use_layer_norm:
            model.add(LayerNormalization())

        if dropout > 0.0:
            model.add(Dropout(dropout))

    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss=loss)

    return model

In [None]:
from mlflow.models.signature import infer_signature
import time
from bokeh.plotting import reset_output

def run_experiment_group(
    model_type: str,
    units: list[int],
    optimizer_config: dict,
    learning_rate: float,
    loss: str,
    activation: str,
    experiment_name,
    batch_size: int,
    epochs: int,
    sequence_length: int,
    X_train_data,
    y_train_data,
    X_val_data,
    y_val_data,
    X_test_data,
    y_test_data,
    scaler,
    feature_cols,
    norm_type: str,
    create_model_fn,
    use_layer_norm: bool = True,
    use_regularization: bool = True,
    dropout: float = 0.0,
    save_dir: str = CHECKPOINTS_PATH,
    custom=False,
    log_scaled=False,
    train_data=None,
    val_data=None,
    extra_name: str = "v1",
    num_runs: int = 1
):
    # Generate the description from parameters
    description = generate_description(
        model_type=model_type,
        layers=len(units),
        units=units,
        activation=activation,
        norm=use_layer_norm,
        reg=use_regularization,
        dropout=dropout,
        seq_len=sequence_length,
        loss=loss,
        optimizer_name=optimizer_config["name"],
        extra=extra_name
    )

    for run_index in range(num_runs):
        with mlflow.start_run() as run:
            run_id = run.info.run_id
            model_name = f"{model_type}_{run_index}_{run_id}"
            checkpoint_path = os.path.join(save_dir, model_type, f"{model_name}.keras")

            model_dir = os.path.dirname(checkpoint_path)
            os.makedirs(model_dir, exist_ok=True)

            input_shape = X_train_data.shape[1:]

            # --- Logging setup ---
            run_group = f"{model_type}_{description}"
            mlflow.set_tag("mlflow.runName", model_name)
            mlflow.set_tag("architecture_type", description)
            mlflow.set_tag("run_group", run_group)
            mlflow.set_tag("run_index", run_index)
            mlflow.set_tag("experiment_name", experiment_name)

            log_run_metadata(
                params={
                    "normalization_method": norm_type,
                    "extra_name": extra_name,
                    "input_shape": input_shape,
                    "sequence_length": sequence_length,
                    "features": feature_cols,
                    "activation": activation,
                    "optimizer": optimizer_config['name'],
                    "learning_rate": learning_rate,
                    "lossfn": loss,
                    "layers": len(units),
                    "units": "_".join(map(str, units)),
                    "batch_size": batch_size,
                    "epochs": epochs,
                    "use_layer_norm": use_layer_norm,
                    "use_regularization": use_regularization,
                    "dropout": dropout
                },
                tags={
                    "model": model_type,
                    "architecture": "_".join(map(str, units)),
                    "description": description,
                    "sequence_length": sequence_length
                }
            )

            # --- Model creation & training ---
            model = create_model_fn(
                input_shape=input_shape,
                units=units,
                activation=activation,
                loss=loss,
                optimizer=optimizer_config['create'](),
                use_layer_norm=use_layer_norm,
                use_regularization=use_regularization,
                dropout=dropout
            )

            callbacks = get_callbacks(model_name, checkpoint_path)

            history = model.fit(
                X_train_data, y_train_data,
                validation_data=(X_val_data, y_val_data),
                epochs=epochs,
                batch_size=batch_size,
                callbacks=callbacks,
                shuffle=False,
                verbose=0
            )

            # --- Load best model ---
            loaded_model = load_model(checkpoint_path)

            # --- Evaluation & logging ---
            y_test_real, y_pred_real = evaluate_and_log_metrics(
                loaded_model, norm_type, X_test_data, y_test_data,
                scaler, model_name, custom=custom, log_scaled=log_scaled,
                last_index=last_index_log, first_value=first_value_log, last_value=last_value_log, train_data=train_data, val_data=val_data
            )

            print(y_test_real[-5:])

            # --- Artifact logging ---
            artifact_dir = os.path.join("artifacts", model_type, run_id)
            os.makedirs(artifact_dir, exist_ok=True)

            # Log model
            mlflow.keras.log_model(loaded_model, artifact_path="best_model")

            # Save model summary
            model_summary_path = os.path.join(artifact_dir, "model_summary.txt")
            with open(model_summary_path, "w") as f:
                loaded_model.summary(print_fn=lambda x: f.write(x + "\n"))

            # Save predictions plot
            fig = plot_predictions_bokeh(y_test_real, y_pred_real)
            pred_plot_path = os.path.join(artifact_dir, "predictions_plot.html")
            save(fig, filename=pred_plot_path)

            # Save training plot
            training_plot = plot_training_history(history)
            history_plot_path = os.path.join(artifact_dir, "training_history_plot.html")
            save(training_plot, filename=history_plot_path)

            # 🚀 Log all artifacts at once
            mlflow.log_artifacts(artifact_dir)

            print(f"[✓] Run {run_index + 1}/{num_runs} complete — {model_name}")

architecture - with_drop -
hyperparameter - optimizer - activation_fn - batch_size
silu
0,002
1layer, 16 neuron
loss scale - lamb
128 batch
regularizations


In [None]:
from keras.optimizers import Lamb, Lion,Adamax, LossScaleOptimizer

learning_rate = 0.002
run_experiment_group(
    model_type="GRU",
    num_runs=5,
    extra_name="",
    experiment_name="final_verison",
    units=[16],
    batch_size=128,
    epochs=500,
    optimizer_config={
        "name": "LosScaleOptimizer-Lamb",
        "create": lambda: LossScaleOptimizer(Lamb(learning_rate))
    },
    learning_rate=learning_rate,
    loss="mse",
    activation="silu",
    use_layer_norm=False,
    use_regularization=True,
    dropout=0,
    sequence_length=sequence_length,
    feature_cols=feature_cols_custom,
    norm_type='custom_log_split_first',
    custom=True,
    log_scaled=True,
    scaler=custom_log_scaler,
    create_model_fn=create_gru_model,
    X_train_data=X_train_log_custom,
    y_train_data=y_train_log_custom,
    X_val_data=X_val_log_custom,
    y_val_data=y_val_log_custom,
    X_test_data=X_test_log_custom,
    y_test_data=y_test_log_custom,
    train_data=train_log_data_custom,
    val_data=val_log_data_custom,
)




Epoch 1: val_loss improved from inf to 0.68522, saving model to /content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction/checkpoints/GRU/GRU_0_10ab968352434ed8987c023d1a5ba9ae.keras

Epoch 2: val_loss improved from 0.68522 to 0.64770, saving model to /content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction/checkpoints/GRU/GRU_0_10ab968352434ed8987c023d1a5ba9ae.keras

Epoch 3: val_loss improved from 0.64770 to 0.61242, saving model to /content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction/checkpoints/GRU/GRU_0_10ab968352434ed8987c023d1a5ba9ae.keras

Epoch 4: val_loss improved from 0.61242 to 0.57943, saving model to /content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction/checkpoints/GRU/GRU_0_10ab968352434ed8987c023d1a5ba9ae.keras

Epoch 5: val_loss improved from 0.57943 to 0.54878, saving model to /content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction/checkpoints/GRU/GRU_0_10ab968352434ed8987c023d1a5ba9ae.keras

Epoch 6: val_loss improved from 0.5487



[5867.08007812 5930.85009766 5974.06982422 6040.04003906 6037.58984375]




  save(fig, filename=pred_plot_path)
  save(fig, filename=pred_plot_path)


  save(training_plot, filename=history_plot_path)
  save(training_plot, filename=history_plot_path)


[✓] Run 1/6 complete — GRU_0_10ab968352434ed8987c023d1a5ba9ae
🏃 View run GRU_0_10ab968352434ed8987c023d1a5ba9ae at: https://dagshub.com/bojte.csongor/stock_market_prediction_thesis.mlflow/#/experiments/2/runs/10ab968352434ed8987c023d1a5ba9ae
🧪 View experiment at: https://dagshub.com/bojte.csongor/stock_market_prediction_thesis.mlflow/#/experiments/2
🏃 View run GRU_1_b7eeb6a61d1844c89fe5da6297c6975d at: https://dagshub.com/bojte.csongor/stock_market_prediction_thesis.mlflow/#/experiments/2/runs/b7eeb6a61d1844c89fe5da6297c6975d
🧪 View experiment at: https://dagshub.com/bojte.csongor/stock_market_prediction_thesis.mlflow/#/experiments/2


KeyboardInterrupt: 

In [None]:
run_id = "GRU_0_10ab968352434ed8987c023d1a5ba9ae"
loaded_model = load_model(f"{CHECKPOINTS_PATH}/GRU/{run_id}.keras")


y_test_real, y_pred_real = evaluate_and_log_metrics(
                loaded_model, "custom_log_split_first", X_test_log_custom, y_test_log_custom,
                custom_log_scaler, model_name="Final GRU", custom=True, log_scaled=True,
                last_index=last_index_log, first_value=first_value_log, last_value=last_value_log, train_data=train_log_data_custom, val_data=val_log_data_custom
            )

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

Model Performance Metrics:
--------------------------------------------------
MAPE: 0.6607%
RMSE: 43.18498075
MSE: 1864.94256241
MAE: 33.07525848
MPD (Maximum Percentage Deviation): 4.0411%

Point of Maximum Deviation (Index 304):
--------------------------------------------------
True Value: 5186.330078
Predicted Value: 5395.914673
Absolute Difference: 209.584595
Percentage Deviation: 4.04%


In [None]:
def generate_description(
    model_type: str,
    layers: int,
    units: list[int],
    activation: str,
    norm: bool,
    reg: bool,
    dropout: float,
    seq_len: int,
    loss: str,
    optimizer_name: str,
    extra: str = ""
) -> str:
    desc = f"{model_type}_L{layers}_U{'-'.join(map(str, units))}_{activation}"
    if norm:
        desc += "_norm"
    if reg:
        desc += "_reg"
    if dropout > 0:
        desc += f"_drop{dropout}"
    desc += f"_seq{seq_len}_{loss}_{optimizer_name}"
    if extra:
        desc += f"_{extra}"
    return desc

In [None]:
def calculate_mpd(y_true, y_pred):
    # Convert inputs to numpy arrays if they aren't already
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()

    # Calculate percentage deviations
    epsilon = 1e-7  # Avoid division by zero
    percentage_deviations = np.abs((y_true - y_pred) / (y_true + epsilon)) * 100

    # Find maximum deviation and its index
    max_deviation = np.max(percentage_deviations)
    max_deviation_idx = np.argmax(percentage_deviations)

    return {
        'mpd': max_deviation,
        'index': max_deviation_idx,
        'true_value': y_true[max_deviation_idx],
        'pred_value': y_pred[max_deviation_idx],
        'all_deviations': percentage_deviations
    }

In [None]:
def evaluate_predictions(model_name, y_true, y_pred, n_samples=None, should_print=False):
    # Flatten arrays if needed
    y_pred = y_pred.copy().flatten()
    y_true = y_true.copy().flatten()

    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    rmse = root_mean_squared_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    # Calculate MPD
    mpd_results = calculate_mpd(y_true, y_pred)
    mpd = mpd_results['mpd']
    mpd_index = mpd_results['index']
    true_value_at_mpd = mpd_results['true_value']
    pred_value_at_mpd = mpd_results['pred_value']
    percentage_deviations = mpd_results['all_deviations']

    if should_print:
        # Print metrics
        print("\nModel Performance Metrics:")
        print("-" * 50)
        print(f"MAPE: {mape:.4f}%")
        print(f"RMSE: {rmse:.8f}")
        print(f"MSE: {mse:.8f}")
        print(f"MAE: {mae:.8f}")
        print(f"MPD (Maximum Percentage Deviation): {mpd:.4f}%")

         # Print point of maximum deviation
        print(f"\nPoint of Maximum Deviation (Index {mpd_index}):")
        print("-" * 50)
        print(f"True Value: {y_true[mpd_index]:.6f}")
        print(f"Predicted Value: {y_pred[mpd_index]:.6f}")
        print(f"Absolute Difference: {abs(y_true[mpd_index] - y_pred[mpd_index]):.6f}")
        print(f"Percentage Deviation: {percentage_deviations[mpd_index]:.2f}%")

        # Print sample predictions
        if n_samples:
          print(f"\nFirst {n_samples} Predictions:")
          print("-" * 50)
          print("Index    True Value    Predicted    Difference    % Deviation")
          print("-" * 65)
          for i in range(min(n_samples, len(y_true))):
              diff = y_true[i] - y_pred[i]
              dev = percentage_deviations[i]
              print(f"{i:<8d} {y_true[i]:11.6f}  {y_pred[i]:11.6f}  {diff:11.6f}  {dev:11.2f}%")

    # save_model_metrics(model_name, {
    #     'mape': mape,
    #     'mse': mse,
    #     'rmse': rmse,
    #     'mae': mae,
    #     'mpd': mpd,
    #     'mpd_index': mpd_index,
    # }, f"{METRICS_PATH}/model_metrics.csv")

    return {
        'mape': mape,
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'mpd': mpd,
        'mpd_index': mpd_index,
        'percentage_deviations': percentage_deviations}

In [None]:
def plot_training_history(history):
    output_notebook()

    # Create data sources
    epochs = list(range(1, len(history.history['loss']) + 1))

    # Ensure values are positive for log scale (add small epsilon if needed)
    epsilon = 1e-10
    train_loss = [max(val, epsilon) for val in history.history['loss']]
    val_loss = [max(val, epsilon) for val in history.history['val_loss']]

    source_loss = ColumnDataSource(data={
        'epoch': epochs,
        'train_loss': train_loss,
        'val_loss': val_loss
    })

    p1 = figure(title='Model Loss Over Time (Log Scale)',
               x_axis_label='Epoch',
               y_axis_label='Loss (log)',
               width=600, height=400,
               y_axis_type="log")

    # Add hover tool
    hover_loss = HoverTool(tooltips=[
        ('Epoch', '@epoch'),
        ('Training Loss', '@train_loss{0.000}'),
        ('Validation Loss', '@val_loss{0.000}')
    ])
    p1.add_tools(hover_loss)

    # Plot loss lines
    l1 = p1.line('epoch', 'train_loss', line_color=Category10[3][0],
                 line_width=2, source=source_loss, legend_label='Training Loss')
    l2 = p1.line('epoch', 'val_loss', line_color=Category10[3][1],
                 line_width=2, source=source_loss, legend_label='Validation Loss')

    # Configure legends
    for p in [p1]:
        p.legend.click_policy = "hide"
        p.legend.location = "top_right"
        p.grid.grid_line_alpha = 0.3

    # Show plots
    show(p1)
    return p1

In [None]:
def plot_predictions_bokeh(y_test, y_pred, n_samples=None):
    output_notebook()

    if n_samples is None:
        n_samples = len(y_test)
    else:
        n_samples = min(n_samples, len(y_test))

    # Prepare data
    x_range = list(range(n_samples))
    source = ColumnDataSource(data={
        'index': x_range,
        'actual': y_test[:n_samples],
        'predicted': y_pred[:n_samples],
        'error': y_test[:n_samples] - y_pred[:n_samples]
    })

    # Create time series plot
    p1 = figure(title='Actual vs Predicted Values',
                x_axis_label='Sample Index',
                y_axis_label='Value',
                width=800, height=400)

    # Add hover tool
    hover = HoverTool(tooltips=[
        ('Index', '@index'),
        ('Actual', '@actual{0.000}'),
        ('Predicted', '@predicted{0.000}'),
        ('Error', '@error{0.000}')
    ])
    p1.add_tools(hover)

    # Plot lines
    l1 = p1.line('index', 'actual', line_color=Category10[3][0],
                 line_width=2, source=source, legend_label='Actual')
    l2 = p1.line('index', 'predicted', line_color=Category10[3][1],
                 line_width=2, source=source, legend_label='Predicted')

    # Show plots
    show(row(p1))

    return p1;