<a href="https://colab.research.google.com/github/Parviz-Tafazzoli/Probabilistic-Models-for-FFP-Interpretation/blob/main/Deterministic_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================================
# 1. IMPORT LIBRARIES
# ===============================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    r2_score, mean_absolute_percentage_error,
    mean_absolute_error, mean_squared_error
)
from google.colab import drive

In [None]:
# ===============================================================
# 2. MOUNT GOOGLE DRIVE
# ===============================================================
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# ===============================================================
# 3. LOAD DATASET
# ===============================================================
file_path = '/content/drive/My Drive/Objective1/ANN/FFP_Data.xlsx'
data = pd.read_excel(file_path)

# Split predictors and target
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [None]:
# ===============================================================
# 4. INITIALISE STORAGE LISTS AND PARAMETERS
# ===============================================================
num_iterations = 30  # Number of random subsampling iterations

train_metrics = []
test_metrics = []
train_actual_pred = []
test_actual_pred = []
loss_history = []

overall_start_time = time.time()

In [None]:
# ===============================================================
# 5. MODEL TRAINING LOOP (REPEATED RANDOM SUBSAMPLING)
# ===============================================================
for i in range(num_iterations):
    iteration_start_time = time.time()
    print(f"\nStarting iteration {i+1}/{num_iterations}...")

    # -----------------------------------------------------------
    # 5.1 Split data into train, validation, and test sets
    # -----------------------------------------------------------
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y, test_size=0.15, random_state=None
    )
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=0.1765, random_state=None
    )

    # -----------------------------------------------------------
    # 5.2 Standardize input features
    # -----------------------------------------------------------
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    # -----------------------------------------------------------
    # 5.3 Define Neural Network architecture
    # -----------------------------------------------------------
    model = Sequential([
        Dense(30, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        Dense(20, activation='relu'),
        Dense(1)
    ])

    # -----------------------------------------------------------
    # 5.4 Compile and train the model
    # -----------------------------------------------------------
    optimizer = Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history = model.fit(
        X_train_scaled, y_train,
        validation_data=(X_val_scaled, y_val),
        epochs=1000,
        verbose=0
    )

    loss_history.append(history.history)

    # -----------------------------------------------------------
    # 5.5 Make predictions
    # -----------------------------------------------------------
    y_train_pred = model.predict(X_train_scaled, verbose=0)
    y_test_pred = model.predict(X_test_scaled, verbose=0)

    # -----------------------------------------------------------
    # 5.6 Compute metrics
    # -----------------------------------------------------------
    # Training metrics
    train_r2 = r2_score(y_train, y_train_pred)
    train_mape = mean_absolute_percentage_error(y_train, y_train_pred)
    train_mae = mean_absolute_error(y_train, y_train_pred)
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))

    # Testing metrics
    test_r2 = r2_score(y_test, y_test_pred)
    test_mape = mean_absolute_percentage_error(y_test, y_test_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

    # -----------------------------------------------------------
    # 5.7 Store results
    # -----------------------------------------------------------
    train_metrics.append([train_r2, train_mape, train_mae, train_rmse])
    test_metrics.append([test_r2, test_mape, test_mae, test_rmse])

    train_actual_pred.append(pd.DataFrame({
        'Actual': y_train.flatten(),
        'Predicted': y_train_pred.flatten()
    }))
    test_actual_pred.append(pd.DataFrame({
        'Actual': y_test.flatten(),
        'Predicted': y_test_pred.flatten()
    }))

    # -----------------------------------------------------------
    # 5.8 Report iteration time
    # -----------------------------------------------------------
    iteration_time = time.time() - iteration_start_time
    print(f"Iteration {i+1} completed in {iteration_time:.2f} seconds.")

In [None]:
# ===============================================================
# 6. TIMING SUMMARY
# ===============================================================
total_time = time.time() - overall_start_time
avg_iteration_time = total_time / num_iterations
print(f"\nAverage time per iteration: {avg_iteration_time:.2f} seconds")
print(f"Total runtime: {total_time:.2f} seconds")

In [None]:
# ===============================================================
# 7. SAVE RESULTS TO EXCEL FILES
# ===============================================================

# (1) Metrics summary
metrics_df = pd.DataFrame(train_metrics, columns=['Train R2', 'Train MAPE', 'Train MAE', 'Train RMSE'])
metrics_df[['Test R2', 'Test MAPE', 'Test MAE', 'Test RMSE']] = pd.DataFrame(test_metrics)
metrics_file = '/content/drive/My Drive/Objective1/ANN/metrics_MLP.xlsx'
metrics_df.to_excel(metrics_file, index=False)

# (2) Train actual vs predicted
train_actual_pred_file = '/content/drive/My Drive/Objective1/ANN/train_actual_vs_pred_MLP.xlsx'
with pd.ExcelWriter(train_actual_pred_file, engine='openpyxl') as writer:
    for i, df in enumerate(train_actual_pred):
        df.to_excel(writer, sheet_name=f'Iteration_{i+1}', index=False)

# (3) Test actual vs predicted
test_actual_pred_file = '/content/drive/My Drive/Objective1/ANN/test_actual_vs_pred_MLP.xlsx'
with pd.ExcelWriter(test_actual_pred_file, engine='openpyxl') as writer:
    for i, df in enumerate(test_actual_pred):
        df.to_excel(writer, sheet_name=f'Iteration_{i+1}', index=False)

print(f'\nMetrics saved to: {metrics_file}')
print(f'Training actual vs predicted saved to: {train_actual_pred_file}')
print(f'Testing actual vs predicted saved to: {test_actual_pred_file}')

In [None]:
# ===============================================================
# 8. PLOT TRAINING & VALIDATION LOSS CURVES
# ===============================================================
for i, history in enumerate(loss_history):
    plt.figure(figsize=(10, 6))
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title(f'Iteration {i+1} - Loss vs Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid(True)
    plt.show()