In [None]:
# ===============================================================
# 1) IMPORTS
# ===============================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    r2_score, mean_absolute_percentage_error,
    mean_squared_error, mean_absolute_error
)

import tensorflow as tf
import tensorflow_probability as tfp
import openpyxl  # engine for Excel I/O

from google.colab import drive

In [None]:
# ===============================================================
# 2) MOUNT GOOGLE DRIVE
# ===============================================================
drive.mount('/content/drive')

In [None]:
# ===============================================================
# 3) LOAD DATA
# ===============================================================
file_path = '/content/drive/My Drive/Objective1/FFP_Data.xlsx'
data = pd.read_excel(file_path)

# Features & target
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [None]:
# ===============================================================
# 4) SETUP: OUTPUT PATHS, LOSS, AND CONTAINERS
# ===============================================================
iterations = 30  # number of random-subsampling runs

metrics_file_path = '/content/drive/My Drive/Objective1/Maxlike_ANN/Metrics_Maxlike_ANN_2.xlsx'
training_predictions_file_path = '/content/drive/My Drive/Objective1/Maxlike_ANN/Training_Predictions_Maxlike_ANN.xlsx'
testing_predictions_file_path  = '/content/drive/My Drive/Objective1/Maxlike_ANN/Testing_Predictions_Maxlike_ANN.xlsx'

tfd = tfp.distributions
negloglik = lambda y_true, rv_y: -rv_y.log_prob(y_true)  # Normal NLL

metrics = []
all_losses = []

In [None]:
# ===============================================================
# 5) TRAIN/EVAL LOOP (REPEATED RANDOM SUBSAMPLING)
# ===============================================================
with pd.ExcelWriter(training_predictions_file_path, engine='openpyxl') as train_writer, \
     pd.ExcelWriter(testing_predictions_file_path,  engine='openpyxl') as test_writer:

    for iteration in range(iterations):
        print(f'Iteration {iteration + 1}/{iterations}')

        # -----------------------------
        # 5.1 Splits: Train / Val / Test
        # -----------------------------
        X_train, X_temp, y_train, y_temp = train_test_split(
            X, y, test_size=0.30, random_state=iteration
        )
        X_val, X_test, y_val, y_test = train_test_split(
            X_temp, y_temp, test_size=0.50, random_state=iteration
        )

        # -----------------------------
        # 5.2 Scale features (fit on train)
        # -----------------------------
        scaler = StandardScaler()
        X_train_s = scaler.fit_transform(X_train)
        X_val_s   = scaler.transform(X_val)
        X_test_s  = scaler.transform(X_test)

        # -----------------------------
        # 5.3 Build probabilistic model
        # -----------------------------
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(30, activation='relu', input_shape=(X_train_s.shape[1],)),
            tf.keras.layers.Dense(20, activation='relu'),
            tf.keras.layers.Dense(2),  # params for loc & (pre-activation) scale
            tfp.layers.DistributionLambda(
                lambda t: tfd.Normal(
                    loc=t[..., :1],
                    scale=1e-3 + tf.math.softplus(0.01 * t[..., 1:])
                )
            ),
        ])

        # -----------------------------
        # 5.4 Compile & Train
        # -----------------------------
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
            loss=negloglik
        )

        history = model.fit(
            X_train_s, y_train,
            epochs=1000,
            verbose=False,
            validation_data=(X_val_s, y_val)
        )

        # Track loss history
        all_losses.append(history.history['loss'])

        # -----------------------------
        # 5.5 Plot training/validation loss
        # -----------------------------
        plt.figure(figsize=(8, 6))
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'Loss vs Epoch â€” Iteration {iteration + 1}')
        plt.xlabel('Epoch')
        plt.ylabel('Negative Log-Likelihood')
        plt.legend()
        plt.grid(True)
        plt.show()

        # -----------------------------
        # 5.6 Predictions (mean, std, 95% CI)
        # -----------------------------
        yhat_train = model(X_train_s)
        yhat_test  = model(X_test_s)

        mean_train  = yhat_train.mean().numpy().flatten()
        std_train   = yhat_train.stddev().numpy().flatten()
        lower_train = mean_train - 1.96 * std_train
        upper_train = mean_train + 1.96 * std_train

        mean_test  = yhat_test.mean().numpy().flatten()
        std_test   = yhat_test.stddev().numpy().flatten()
        lower_test = mean_test - 1.96 * std_test
        upper_test = mean_test + 1.96 * std_test

        # -----------------------------
        # 5.7 Metrics (train & test)
        # -----------------------------
        metrics.append({
            'Iteration': iteration + 1,
            'R2_Train':  r2_score(y_train, mean_train),
            'R2_Test':   r2_score(y_test,  mean_test),
            'MAPE_Train': mean_absolute_percentage_error(y_train, mean_train),
            'MAPE_Test':  mean_absolute_percentage_error(y_test,  mean_test),
            'MAE_Train':  mean_absolute_error(y_train, mean_train),
            'MAE_Test':   mean_absolute_error(y_test,  mean_test),
            'RMSE_Train': np.sqrt(mean_squared_error(y_train, mean_train)),
            'RMSE_Test':  np.sqrt(mean_squared_error(y_test,  mean_test)),
        })

        # -----------------------------
        # 5.8 Save per-iteration predictions to Excel
        # -----------------------------
        train_results = pd.DataFrame({
            'Actual': y_train.flatten(),
            'Predicted_Mean': mean_train,
            'Predicted_StdDev': std_train,
            'Lower_95CI': lower_train,
            'Upper_95CI': upper_train
        })
        train_results.to_excel(train_writer, sheet_name=f'Iteration_{iteration + 1}', index=False)

        test_results = pd.DataFrame({
            'Actual': y_test.flatten(),
            'Predicted_Mean': mean_test,
            'Predicted_StdDev': std_test,
            'Lower_95CI': lower_test,
            'Upper_95CI': upper_test
        })
        test_results.to_excel(test_writer, sheet_name=f'Iteration_{iteration + 1}', index=False)

In [None]:
# ===============================================================
# 6) SAVE METRICS SUMMARY
# ===============================================================
metrics_df = pd.DataFrame(metrics)
metrics_df.to_excel(metrics_file_path, index=False)