# **Part 2: Feedforward network Case study – Multi-layer Perceptron model for predicting heat influx into a home**

## 2. Develop feed-forward neural network models (Train few networks)

### (ii) Gradient descent with adaptive learning rate

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adagrad
import tensorflow.keras.backend as K

# Configuration dictionary
config = {
    'batch_size': 10,
    'epochs': 500,
    'patience': 30,
    'hidden_neurons': 5,  # Using the best number of neurons from previous model
    'seed': 42
}

def load_and_preprocess_data(seed):
    """Load and preprocess the data"""
    # Load data
    file_path = 'Heat_Influx_insulation_east_south_north.csv'
    data = pd.read_csv(file_path)
    
    # Define features and target
    features = ['Insulation', 'East', 'South', 'North']
    target = 'HeatFlux'
    
    # Normalize the data
    scaler = MinMaxScaler()
    data_normalized = pd.DataFrame(
        scaler.fit_transform(data[features + [target]]),
        columns=features + [target]
    )
    
    # Split the dataset (60% train, 20% validation, 20% test)
    train_data, temp_data = train_test_split(data_normalized, train_size=0.6, random_state=seed)
    val_data, test_data = train_test_split(temp_data, train_size=0.5, random_state=seed)
    
    # Prepare data sets
    X_train = train_data[features].values
    y_train = train_data[target].values
    X_val = val_data[features].values
    y_val = val_data[target].values
    X_test = test_data[features].values
    y_test = test_data[target].values
    X_all = data_normalized[features].values
    y_all = data_normalized[target].values
    
    return X_train, y_train, X_val, y_val, X_test, y_test, X_all, y_all

def build_and_train_model_adagrad(X_train, y_train, X_val, y_val):
    """Build and train the model with Adagrad optimizer"""
    K.clear_session()
    tf.random.set_seed(config['seed'])
    
    # Define the model
    initializer = tf.keras.initializers.GlorotUniform(seed=config['seed'])
    model = Sequential([
        Input(shape=(4,)),
        Dense(config['hidden_neurons'], activation='sigmoid', kernel_initializer=initializer),
        Dense(1, activation='linear', kernel_initializer=initializer)
    ])
    
    # Use Adagrad optimizer with default learning rate
    optimizer = Adagrad()
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    
    # Early stopping callback
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=config['patience'],
        restore_best_weights=True
    )
    
    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=config['epochs'],
        batch_size=config['batch_size'],
        validation_data=(X_val, y_val),
        callbacks=[early_stopping],
        verbose=1
    )
    
    return model, history

def evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, X_all, y_all):
    """Evaluate the model and return metrics"""
    y_train_pred = model.predict(X_train).flatten()
    y_val_pred = model.predict(X_val).flatten()
    y_test_pred = model.predict(X_test).flatten()
    y_all_pred = model.predict(X_all).flatten()
    
    metrics = {
        'MSE_Trn': mean_squared_error(y_train, y_train_pred),
        'MSE_Val': mean_squared_error(y_val, y_val_pred),
        'MSE_Test': mean_squared_error(y_test, y_test_pred),
        'MSE_All': mean_squared_error(y_all, y_all_pred),
        'R2_Trn': r2_score(y_train, y_train_pred),
        'R2_Val': r2_score(y_val, y_val_pred),
        'R2_Test': r2_score(y_test, y_test_pred),
        'R2_All': r2_score(y_all, y_all_pred)
    }
    
    return metrics, y_all_pred

def plot_results(history, y_all, y_all_pred):
    """Plot training history and prediction results"""
    plt.figure(figsize=(15, 5))
    
    # Plot training history
    plt.subplot(121)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss During Training (Adagrad)')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    plt.legend()
    
    # Plot predictions vs actual values
    plt.subplot(122)
    plt.scatter(y_all, y_all_pred, alpha=0.5)
    plt.plot([y_all.min(), y_all.max()], [y_all.min(), y_all.max()], 'r--', lw=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Predictions vs Actual Values (Adagrad)')
    
    plt.tight_layout()
    plt.savefig('model_results_adagrad.png')
    plt.close()

# Main execution
if __name__ == "__main__":
    print("Loading and preprocessing data...")
    X_train, y_train, X_val, y_val, X_test, y_test, X_all, y_all = load_and_preprocess_data(config['seed'])
    
    print("\nTraining model with Adagrad optimizer...")
    model_adagrad, history_adagrad = build_and_train_model_adagrad(X_train, y_train, X_val, y_val)
    
    print("\nEvaluating model...")
    metrics_adagrad, y_all_pred_adagrad = evaluate_model(model_adagrad, X_train, y_train, X_val, y_val, X_test, y_test, X_all, y_all)
    
    print("\nModel Performance Metrics (Adagrad):")
    for key, value in metrics_adagrad.items():
        print(f"{key}: {value:.6f}")
    
    print("\nPlotting results...")
    plot_results(history_adagrad, y_all, y_all_pred_adagrad)
    
    print("\nSaving model...")
    save_model(model_adagrad, 'best_heat_flux_model_adagrad.keras')
    
    print("\nProcess completed successfully.")

### Results Table

| Number of Neurons in Hidden Layer | MSE    |        |        |        | R²      |         |         |         |
|----------------------------------|--------|--------|--------|--------|---------|---------|---------|---------|
| 5                                | Trn    | Val    | Test   | All    | Trn     | Val     | Test    | All     |
|                                  | 0.3291 | 0.2949 | 0.3095 | 0.3183 | -5.7059 | -4.9968 | -3.3841 | -4.9401 |

### Analysis of Results

The Adagrad optimizer, which automatically adapts the learning rate during training, showed significantly different performance compared to the constant learning rate method:

1. The model demonstrated high Mean Squared Error (MSE) values across all datasets (training, validation, test, and all data).
2. The negative R² values indicate that the model's predictions are worse than simply using the mean of the target values.
3. The adaptive learning rate did not lead to better convergence in this case, suggesting potential issues with the optimization process.
