In [1]:
# === 0. Imports and Seed Setup ===
import os, random
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# Set reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# === 1. Load and Preprocess Data ===
df = pd.read_csv("combined_feedstocks_dataset.csv")

df.drop(columns=[
    'syn_H2', 'syn_CO', 'syn_CO2', 'syn_CH4', 'syn_C2Hn', 'syn_N2',
    'syn_LHV', 'syn_tar_content', 'syn_yield', 'syn_char_yield'
], errors='ignore', inplace=True)

groups = df["Iteration"]
df.drop(columns=["Feedstock_ID", "Year"], inplace=True)

X = df.drop(columns=["ANNUALENERGY_H2_kwh", "Iteration"]).values
y = df[["ANNUALENERGY_H2_kwh"]].values

for i in range(X.shape[1]):
    if np.isnan(X[:, i]).any():
        X[:, i][np.isnan(X[:, i])] = np.nanmedian(X[:, i])

scaler_X = MinMaxScaler()
X = scaler_X.fit_transform(X)

gss = GroupShuffleSplit(n_splits=1, test_size=0.3, random_state=42)
train_idx, val_idx = next(gss.split(X, y, groups))
X_train, X_val = X[train_idx], X[val_idx]
y_train, y_val = y[train_idx], y[val_idx]

# === 2. Define Heavy Grid Combos (v9) ===
combos = [
    ([238, 238, 238], 'relu', 0.05, 0.0005),
    ([256, 128, 64], 'relu', 0.05, 0.0005),
    ([128, 128, 64, 32], 'relu', 0.05, 0.0005),
    ([238, 128, 64, 32], 'relu', 0.1, 0.0005),
    ([238, 238, 128, 64, 32], 'relu', 0.05, 0.0001)
]

results = []

def build_model(neurons, activation, dropout, l2_strength):
    model = Sequential()
    model.add(Dense(neurons[0], activation=activation, input_shape=(X_train.shape[1],),
                    kernel_regularizer=regularizers.l2(l2_strength)))
    model.add(Dropout(dropout))
    for n in neurons[1:]:
        model.add(Dense(n, activation=activation, kernel_regularizer=regularizers.l2(l2_strength)))
        model.add(Dropout(dropout))
    model.add(Dense(1, activation='linear'))
    return model

# === 3. Grid Search ===
for idx, (neurons, activation, dropout, l2_strength) in enumerate(combos):
    print(f"\nüîÅ Running combo {idx+1}/{len(combos)}: {neurons}, {activation}, dropout={dropout}, l2={l2_strength}")
    model = build_model(neurons, activation, dropout, l2_strength)
    model.compile(optimizer='adam', loss='mean_absolute_error')
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(
        X_train, y_train, epochs=200, batch_size=64, verbose=1,
        validation_data=(X_val, y_val), callbacks=[early_stop]
    )

    y_pred = model.predict(X_val).flatten()
    y_true = y_val.flatten()

    results.append({
        'neurons': str(neurons),
        'activation': activation,
        'dropout': dropout,
        'l2_strength': l2_strength,
        'val_loss': min(history.history['val_loss']),
        'r2': r2_score(y_true, y_pred),
        'mae': mean_absolute_error(y_true, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
        'epochs_ran': len(history.history['val_loss'])
    })

# === 4. Save Grid Results ===
results_df = pd.DataFrame(results)
results_df.sort_values(by='val_loss', inplace=True)
results_df.to_csv("grid_search_results_NN1_v9.csv", index=False)

# === 5. Train Best Model ===
top = results_df.iloc[0]
neurons = eval(top['neurons'])
activation = top['activation']
dropout = top['dropout']
l2_strength = top['l2_strength']

model = build_model(neurons, activation, dropout, l2_strength)
model.compile(optimizer='adam', loss='mean_absolute_error')
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train, epochs=200, batch_size=64, verbose=1,
    validation_data=(X_val, y_val), callbacks=[early_stop]
)

# === 6. Evaluate ===
y_pred = model.predict(X).flatten()
y_true = y.flatten()

percentage_diff = ((y_pred - y_true) / y_true) * 100
r2 = r2_score(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
msd = np.mean(y_true - y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
val_loss = model.evaluate(X_val, y_val, verbose=0)

# === 7. Save Outputs ===
model.save("best_model_allfeedstocks_NN1_v9.keras")
pd.DataFrame({
    'Row ID': range(1, len(y_true)+1),
    'Actual H2 (kWh/year)': y_true,
    'Predicted H2 (kWh/year)': y_pred,
    'Percentage Difference (%)': percentage_diff
}).to_csv("predicted_vs_actual_ANNUALENERGY_H2_kwh_NN1_v9.csv", index=False)

pd.DataFrame({
    'Characteristic': ['Model Version', 'Number of Layers', 'Neurons per Layer', 'Activation Functions',
                       'Epochs', 'Train/Test Split', 'Optimizer', 'Regularisation Strength', 'Dropout Rate'],
    'Value': ['NN1_v9', len(model.layers), neurons,
              [layer.activation.__name__ for layer in model.layers if hasattr(layer, 'activation')],
              len(history.history['loss']), f"{X_train.shape[0]}:{X_val.shape[0]}",
              'adam', l2_strength, dropout]
}).to_csv("model_characteristics_NN1_v9.csv", index=False)

# === 8. Plots ===
plt.figure(figsize=(10, 6))
plt.scatter(y_true, y_pred, alpha=0.5, edgecolors='k')
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--')
plt.xlabel("Actual H2 (kWh/year)")
plt.ylabel("Predicted H2 (kWh/year)")
plt.title("All Feedstocks - Actual vs Predicted Hydrogen Yield (NN1_v9)")
plt.grid(True)
plt.tight_layout()
plt.savefig("h2_prediction_plot_NN1_v9.png", dpi=300)
plt.close()

plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel("Epoch")
plt.ylabel("MAE (kWh)")
plt.title("Training vs Validation Loss (NN1_v9)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("loss_curve_NN1_v9.png", dpi=300)
plt.close()

print("‚úÖ NN1_v9 complete ‚Äî top model trained, saved, and evaluated.")


2025-06-25 13:54:47.722793: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-25 13:54:47.725783: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-25 13:54:47.761073: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-25 13:54:47.761917: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



üîÅ Running combo 1/5: [238, 238, 238], relu, dropout=0.05, l2=0.0005
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
E

In [2]:
# === 9. Print Final Model Performance Summary ===
print("\n" + "="*25 + " FINAL MODEL PERFORMANCE METRICS (NN1_v9) " + "="*25)

print(f"Best Neural Configuration: {neurons}, activation={activation}, dropout={dropout}, L2={l2_strength}")
print(f"R¬≤: {round(r2, 4)}")
print(f"MAE (kWh): {round(mae, 2):,}")
print(f"RMSE (kWh): {round(rmse, 2):,}")
print(f"Mean Signed Deviation (MSD) (kWh): {round(msd, 2):,}")
print(f"Mean Absolute Percentage Error (MAPE): {round(mape, 2)}%")
print(f"Validation Loss (MAE, kWh): {round(val_loss, 2):,}")
print(f"Epochs Trained: {len(history.history['loss'])}")

print("\nNetwork Architecture:")
for i, layer in enumerate(model.layers):
    config = layer.get_config()
    layer_type = type(layer).__name__
    units = config.get('units', '-')
    act = config.get('activation', '-')
    print(f"  Layer {i+1}: {layer_type:<12} | Units: {units:<4} | Activation: {act}")

print("="*85)



Best Neural Configuration: [238, 238, 128, 64, 32], activation=relu, dropout=0.05, L2=0.0001
R¬≤: 0.9994
MAE (kWh): 3,588,551.81
RMSE (kWh): 9,304,747.19
Mean Signed Deviation (MSD) (kWh): -282,537.58
Mean Absolute Percentage Error (MAPE): 5.61%
Validation Loss (MAE, kWh): 3,712,926.5
Epochs Trained: 111

Network Architecture:
  Layer 1: Dense        | Units: 238  | Activation: relu
  Layer 2: Dropout      | Units: -    | Activation: -
  Layer 3: Dense        | Units: 238  | Activation: relu
  Layer 4: Dropout      | Units: -    | Activation: -
  Layer 5: Dense        | Units: 128  | Activation: relu
  Layer 6: Dropout      | Units: -    | Activation: -
  Layer 7: Dense        | Units: 64   | Activation: relu
  Layer 8: Dropout      | Units: -    | Activation: -
  Layer 9: Dense        | Units: 32   | Activation: relu
  Layer 10: Dropout      | Units: -    | Activation: -
  Layer 11: Dense        | Units: 1    | Activation: linear


In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('combined_feedstocks_dataset.csv')

# Print all column names
print(df.columns.tolist())


['ANNUALENERGY_H2_kwh', 'Population', 'Waste_per_capita', 'WasteTotal', 'recovery_H2_separation', 'recovery_H2_storage', 'eff_CGE', 'eff_CCE', 'ratio_H2_stored', 'ratio_H2_CHP', 'is_plasma_cleanup', 'fs_shape_pellets', 'fs_shape_fibres', 'fs_shape_dust', 'fs_shape_chips', 'fs_shape_particles', 'fs_shape_other', 'fs_size', 'fs_lhv', 'fs_C', 'fs_H', 'fs_N', 'fs_S', 'fs_O', 'fs_ash', 'fs_moisture', 're_temp', 're_mode_continuous', 're_mode_batch', 're_ER', 're_steambiomass_ratio', 're_agent_air', 're_agent_air_and_steam', 're_agent_oxygen', 're_agent_steam', 're_agent_other', 're_type_fluidised_bed', 're_type_fixed_bed', 're_type_other', 're_material_olivine', 're_material_silica', 're_material_dolomite', 're_material_alumina', 're_material_calcium_oxide', 're_catalyst', 're_scale_pilot', 're_scale_lab', 'syn_N2', 'syn_H2', 'syn_CO', 'syn_CO2', 'syn_CH4', 'syn_C2Hn', 'syn_LHV', 'syn_tar_content', 'syn_yield', 'syn_char_yield', 'Feedstock_ID', 'Iteration', 'Year', 'feedstock_type_HB', 'fee