In [None]:
# ===============================================================
# 1) IMPORTS
# ===============================================================
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    r2_score, mean_absolute_percentage_error,
    mean_squared_error, mean_absolute_error
)

import tensorflow as tf
import tensorflow_probability as tfp
from google.colab import drive
import openpyxl

In [None]:
# ===============================================================
# 2) MOUNT GOOGLE DRIVE
# ===============================================================
drive.mount('/content/drive')

In [None]:
# ===============================================================
# 3) DEFINE FILE PATHS AND ENSURE DIRECTORIES EXIST
# ===============================================================
base_dir = '/content/drive/My Drive/Optimization of Monte Carlo Dropout-ANN'
os.makedirs(base_dir, exist_ok=True)

metrics_file_path = f'{base_dir}/Metrics_MCD_ANN_Live.xlsx'
training_predictions_file_path = f'{base_dir}/Training_Predictions_MCD_ANN_Live.xlsx'
testing_predictions_file_path  = f'{base_dir}/Testing_Predictions_MCD_ANN_Live.xlsx'

# ---------------------------------------------------------------
# Create empty Excel placeholders
# ---------------------------------------------------------------
if not os.path.exists(training_predictions_file_path):
    with pd.ExcelWriter(training_predictions_file_path, engine='openpyxl') as writer:
        pd.DataFrame().to_excel(writer, sheet_name='Init', index=False)

if not os.path.exists(testing_predictions_file_path):
    with pd.ExcelWriter(testing_predictions_file_path, engine='openpyxl') as writer:
        pd.DataFrame().to_excel(writer, sheet_name='Init', index=False)

if not os.path.exists(metrics_file_path):
    with pd.ExcelWriter(metrics_file_path, engine='openpyxl') as writer:
        pd.DataFrame(columns=[
            'Iteration', 'R2_Train', 'R2_Test',
            'MAPE_Train', 'MAPE_Test',
            'MAE_Train', 'MAE_Test',
            'RMSE_Train', 'RMSE_Test'
        ]).to_excel(writer, index=False)

In [None]:
# ===============================================================
# 4) LOAD DATA
# ===============================================================
data_path = '/content/drive/My Drive/Objective1/FFP_Data.xlsx'
data = pd.read_excel(data_path)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [None]:
# ===============================================================
# 5) SETTINGS
# ===============================================================
iterations = 30
mc_iterations = 5000
tfd = tfp.distributions

In [None]:
# ===============================================================
# 6) NEGATIVE LOG-LIKELIHOOD LOSS
# ===============================================================
def NLL(y_true, distr):
    """Normal negative log-likelihood loss."""
    return -distr.log_prob(y_true)

In [None]:
# ===============================================================
# 7) MODEL BUILDER (MCD-ANN)
# ===============================================================
def build_model():
    """Define Monte Carlo Dropout ANN with probabilistic output."""
    inputs = tf.keras.Input(shape=(X.shape[1],))
    x = tf.keras.layers.Dense(30, activation='relu')(inputs)
    x = tf.keras.layers.Dropout(0.1)(x, training=True)
    x = tf.keras.layers.Dense(20, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.1)(x, training=True)
    x = tf.keras.layers.Dense(2)(x)
    outputs = tfp.layers.DistributionLambda(
        lambda t: tfd.Normal(
            loc=t[..., :1],
            scale=1e-3 + tf.math.softplus(0.05 * t[..., 1:])
        )
    )(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.009), loss=NLL)
    return model

In [None]:
# ===============================================================
# 8) MONTE CARLO PREDICTION FUNCTION
# ===============================================================
def mc_predict_full_distribution(X_input):
    """Generate mean, epistemic & aleatoric stds, and 95% CI via MC sampling."""
    preds_mean, preds_std, full_samples = [], [], []

    for _ in range(mc_iterations):
        pred = model(X_input)
        mu = pred.mean().numpy().flatten()
        sigma = pred.stddev().numpy().flatten()
        preds_mean.append(mu)
        preds_std.append(sigma)
        full_samples.append(np.random.normal(loc=mu, scale=sigma))

    preds_mean = np.array(preds_mean)
    preds_std = np.array(preds_std)
    full_samples = np.array(full_samples)

    mean_pred = np.mean(preds_mean, axis=0)
    epistemic_std = np.std(preds_mean, axis=0)
    aleatoric_std = np.sqrt(np.mean(preds_std ** 2, axis=0))
    lower_95 = np.percentile(full_samples, 2.5, axis=0)
    upper_95 = np.percentile(full_samples, 97.5, axis=0)

    return mean_pred, epistemic_std, aleatoric_std, lower_95, upper_95

In [None]:
# ===============================================================
# 9) TRAINING LOOP WITH MCD PREDICTIONS AND EXCEL EXPORT
# ===============================================================
total_start_time = time.time()

for iteration in tqdm(range(iterations), desc="Iterations"):
    iter_start_time = time.time()

    # -----------------------------
    # Split data (70/15/15)
    # -----------------------------
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=iteration)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=iteration)

    # -----------------------------
    # Normalize features
    # -----------------------------
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)

    # -----------------------------
    # Build and train model
    # -----------------------------
    model = build_model()
    model.fit(X_train, y_train, epochs=1000, verbose=False, validation_data=(X_val, y_val))

    # -----------------------------
    # Predict with full MC sampling
    # -----------------------------
    mean_train, epi_train, alea_train, low_train, up_train = mc_predict_full_distribution(X_train)
    mean_test, epi_test, alea_test, low_test, up_test = mc_predict_full_distribution(X_test)

    # -----------------------------
    # Save predictions (train/test)
    # -----------------------------
    train_df = pd.DataFrame({
        'Actual': y_train.flatten(),
        'Predicted_Mean': mean_train,
        'Epistemic_Std': epi_train,
        'Aleatoric_Std': alea_train,
        'Lower_95CI': low_train,
        'Upper_95CI': up_train
    })
    with pd.ExcelWriter(training_predictions_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        train_df.to_excel(writer, sheet_name=f'Iter_{iteration+1}', index=False)

    test_df = pd.DataFrame({
        'Actual': y_test.flatten(),
        'Predicted_Mean': mean_test,
        'Epistemic_Std': epi_test,
        'Aleatoric_Std': alea_test,
        'Lower_95CI': low_test,
        'Upper_95CI': up_test
    })
    with pd.ExcelWriter(testing_predictions_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        test_df.to_excel(writer, sheet_name=f'Iter_{iteration+1}', index=False)

    # -----------------------------
    # Save metrics
    # -----------------------------
    metrics_row = pd.DataFrame([{
        'Iteration': iteration + 1,
        'R2_Train': r2_score(y_train, mean_train),
        'R2_Test':  r2_score(y_test, mean_test),
        'MAPE_Train': mean_absolute_percentage_error(y_train, mean_train),
        'MAPE_Test':  mean_absolute_percentage_error(y_test, mean_test),
        'MAE_Train':  mean_absolute_error(y_train, mean_train),
        'MAE_Test':   mean_absolute_error(y_test, mean_test),
        'RMSE_Train': np.sqrt(mean_squared_error(y_train, mean_train)),
        'RMSE_Test':  np.sqrt(mean_squared_error(y_test, mean_test)),
    }])

    existing_metrics = pd.read_excel(metrics_file_path)
    with pd.ExcelWriter(metrics_file_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
        metrics_row.to_excel(writer, index=False, header=False, startrow=len(existing_metrics) + 1)

    # -----------------------------
    # Timing info
    # -----------------------------
    iter_time = time.time() - iter_start_time
    print(f"Iteration {iteration+1} took {iter_time:.2f} seconds")

    if iteration == 0:
        est = iter_time * iterations
        print(f"Estimated total time: {est/60:.2f} minutes")

In [None]:
# ===============================================================
# 10) RUNTIME SUMMARY
# ===============================================================
total_end_time = time.time()
print(f"Actual total time: {(total_end_time - total_start_time)/60:.2f} minutes")