# The snippet below consists back propagation neural networ with sigmoid activation function with 2-3 neurons and linear output activation function. You can also use this code the compare different activation functions namely 'relu', 'tanh', 'sigmoid', 'linear', 'elu', 'leaky_relu' and graph the best results of each run with defined regression metrics.

In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, Callback


In [None]:
print("...............Reading the Dataset and Dataset Pre-Processing ................")
start_time = time.time()
# Adjust your path accordingly
# Loading the dataset
data = pd.read_csv('path_to_your_data')

# Transforming the target variable by applying a logarithmic function to make the distribution more symmetric
data['Target_log'] = np.log(data['Target'] + 1)

# Data preparation by dropping irrelevant columns
X = data.drop(columns=['List_of_columns_to_drop'])
y = data['Target_log']  # Specifying the target variable

# Encoding categorical variables
for col in X.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

end_time = time.time()
total_time = end_time - start_time
print("Time Cost for Pre-processing and Reading the Dataset: %f seconds \n " % total_time)

def build_bpnn_model(input_dim):
    model = Sequential()
    model.add(Dense(128, activation=activation_function, input_dim=input_dim))  # Input layer
    model.add(Dense(3, activation='sigmoid'))  # Hidden layer with 2-3 neurons and sigmoid activation
    model.add(Dense(1, activation='linear'))  # Output layer with linear activation
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])
    return model
class R2Callback(Callback):
    def __init__(self, train_data, validation_data):
        super(R2Callback, self).__init__()
        self.train_data = train_data
        self.validation_data = validation_data
        self.train_r2s = []
        self.validation_r2s = []

    def on_epoch_end(self, epoch, logs=None):
        y_train_pred = self.model.predict(self.train_data[0])
        train_r2 = r2_score(self.train_data[1], y_train_pred)
        self.train_r2s.append(train_r2)
        y_val_pred = self.model.predict(self.validation_data[0])
        val_r2 = r2_score(self.validation_data[1], y_val_pred)
        self.validation_r2s.append(val_r2)
        print(f" - train_r2: {train_r2:.4f} - val_r2: {val_r2:.4f}")

activation_functions = ['relu', 'tanh', 'sigmoid', 'linear', 'elu', 'leaky_relu']
histories = {}
r2_callbacks = {}
results = []

for activation_function in activation_functions:
    print(f"Training BPNN model with {activation_function} activation function")
    ann_model = build_ann_model(x_train.shape[1], activation_function)
    early_stop = EarlyStopping(monitor='val_loss', patience=10)
    r2_callback = R2Callback(train_data=(x_train, y_train), validation_data=(x_test, y_test))
    history = ann_model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test), callbacks=[early_stop, r2_callback], verbose=1)
    histories[activation_function] = history
    r2_callbacks[activation_function] = r2_callback
    results.append({
        'Model': f'BPNN_{activation_function}',
        'Final Train Loss': history.history['loss'][-1],
        'Final Validation Loss': history.history['val_loss'][-1],
        'Final Train MAE': history.history['mae'][-1],
        'Final Validation MAE': history.history['val_mae'][-1],
        'Final Train R²': r2_callback.train_r2s[-1],
        'Final Validation R²': r2_callback.validation_r2s[-1]
    })

plt.figure(figsize=(20, 15))

for i, activation_function in enumerate(activation_functions):
    plt.subplot(3, len(activation_functions), i + 1)
    plt.plot(histories[activation_function].history['loss'], label='Train Loss')
    plt.plot(histories[activation_function].history['val_loss'], label='Validation Loss')
    plt.title(f'{activation_function} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(3, len(activation_functions), i + 1 + len(activation_functions))
    plt.plot(histories[activation_function].history['mae'], label='Train MAE')
    plt.plot(histories[activation_function].history['val_mae'], label='Validation MAE')
    plt.title(f'{activation_function} MAE')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend()

    plt.subplot(3, len(activation_functions), i + 1 + 2 * len(activation_functions))
    plt.plot(r2_callbacks[activation_function].train_r2s, label='Train R²')
    plt.plot(r2_callbacks[activation_function].validation_r2s, label='Validation R²')
    plt.title(f'{activation_function} R²')
    plt.xlabel('Epoch')
    plt.ylabel('R²')
    plt.legend()

plt.tight_layout()
plt.show()

results_df = pd.DataFrame(results)
print("Tabulated Results: ")
print(results_df)


# Instead of running a BPNN algorithm in which some layer informations already defined, you can run the below ANN code to search the best hperparameters for each activation functions. If your local machine is struggleing to run the code because the intense computation needed, use the example code which is revised for the tanh activation algorithm. You can use that code for other activation functions too by changing the name of the activaiton functions.

In [None]:
class R2Callback(Callback):
    def __init__(self, train_data, validation_data):
        super(R2Callback, self).__init__()
        self.train_data = train_data
        self.validation_data = validation_data
        self.train_r2s = []
        self.validation_r2s = []

    def on_epoch_end(self, epoch, logs=None):
        X_train, y_train = self.train_data
        X_val, y_val = self.validation_data
        y_train_pred = self.model.predict(X_train)
        y_val_pred = self.model.predict(X_val)
        train_r2 = r2_score(y_train, y_train_pred)
        val_r2 = r2_score(y_val, y_val_pred)
        self.train_r2s.append(train_r2)
        self.validation_r2s.append(val_r2)

print("...............Reading the Dataset and Dataset Pre-Processing ................")
start_time = time.time()
# Adjust your path accordingly
# Loading the dataset
data = pd.read_csv('path_to_your_data')

# Transforming the target variable by applying a logarithmic function to make the distribution more symmetric
data['Target_log'] = np.log(data['Target'] + 1)

# Data preparation by dropping irrelevant columns
X = data.drop(columns=['List_of_columns_to_drop'])
y = data['Target_log']  # Specifying the target variable

# Encoding categorical variables
for col in X.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

def build_model(input_dim, num_hidden_layers, activation='relu', is_leaky_relu=False):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim))
    if is_leaky_relu:
        model.add(LeakyReLU(alpha=0.01))
    else:
        model.add(Activation(activation))
    model.add(Dropout(0.2))

    for _ in range(num_hidden_layers - 1):
        model.add(Dense(64))
        if is_leaky_relu:
            model.add(LeakyReLU(alpha=0.01))
        else:
            model.add(Activation(activation))
        model.add(Dropout(0.2))

    model.add(Dense(32, activation=activation))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])
    return model


# Hyperparameters
activation_functions = ['relu', 'tanh', 'sigmoid', 'linear', 'elu', 'leaky_relu']
num_hidden_layers_list = [1, 2, 3, 4, 5]
additional_learning_rates = [0.01, 0.0001]
additional_batch_sizes = [32, 128]

early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

best_val_r2 = float('-inf')
results = []

for activation_function in activation_functions:
    for num_hidden_layers in num_hidden_layers_list:
        for batch_size in [32, 64] + additional_batch_sizes:
            for learning_rate in [0.001] + additional_learning_rates:
                print(f"Training model with activation_function={activation_function}, num_hidden_layers={num_hidden_layers}, batch_size={batch_size}, learning_rate={learning_rate}")

                r2_callback = R2Callback(train_data=(X_train, y_train), validation_data=(X_test, y_test))
                lr_schedule_callback = LearningRateScheduler(lambda epoch, lr: lr if epoch < 10 else lr * 0.9)

                is_leaky_relu = activation_function == 'leaky_relu'
                ann_model = build_model(X_train.shape[1], num_hidden_layers, activation_function, is_leaky_relu)
                ann_model.optimizer.lr.assign(learning_rate)

                history = ann_model.fit(X_train, y_train, epochs=100, batch_size=batch_size,
                        validation_data=(X_test, y_test), verbose=1,
                        callbacks=[r2_callback, lr_schedule_callback, early_stop])

                final_train_r2 = r2_callback.train_r2s[-1] if r2_callback.train_r2s else float('-inf')
                final_val_r2 = r2_callback.validation_r2s[-1] if r2_callback.validation_r2s else float('-inf')

                results.append({
                    'Activation Function': activation_function,
                    'Number of Hidden Layers': num_hidden_layers,
                    'Batch Size': batch_size,
                    'Learning Rate': learning_rate,
                    'Final Train Loss': history.history['loss'][-1] if 'loss' in history.history else float('nan'),
                    'Final Validation Loss': history.history['val_loss'][-1] if 'val_loss' in history.history else float('nan'),
                    'Final Train MAE': history.history['mae'][-1] if 'mae' in history.history else float('nan'),
                    'Final Validation MAE': history.history['val_mae'][-1] if 'val_mae' in history.history else float('nan'),
                    'Final Train R²': final_train_r2,
                    'Final Validation R²': final_val_r2
                })

results_df = pd.DataFrame(results)
print(results_df)

# Plot the best model
if best_model:
    activation_function, num_hidden_layers, history, r2_callback = best_model
    metrics = ['loss', 'mae', 'R²']
    num_metrics = len(metrics)
    fig, axs = plt.subplots(1, num_metrics, figsize=(5 * num_metrics, 5))

    train_metrics = [history.history['loss'], history.history['mae'], history.history['train_r2']]
    val_metrics = [history.history['val_loss'], history.history['val_mae'], history.history['val_r2']]

    for i, metric in enumerate(metrics):
        axs[i].plot(train_metrics[i], label=f'Train {metric}')
        axs[i].plot(val_metrics[i], label=f'Validation {metric}')
        axs[i].set_title(f'{activation_function}_{num_hidden_layers}hidden {metric}')
        axs[i].set_xlabel('Epoch')
        axs[i].set_ylabel(metric)
        axs[i].legend()

    plt.tight_layout()
    # Saving the plot in PNG format with 1000 dpi
    plt.savefig(f"{save_directory}best_model_plot.png", dpi=1000)
    # Saving the plot in TIFF format with 1000 dpi
    plt.savefig(f"{save_directory}best_model_plot.tiff", dpi=1000)
    plt.show()

save_directory


# Individual Activation Functions, change the activation function which is "sigmoid" here to the name of the activation function you want to investigate.

In [None]:
class R2Callback(Callback):
    def __init__(self, train_data, validation_data):
        super(R2Callback, self).__init__()
        self.train_data = train_data
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        X_train, y_train = self.train_data
        X_val, y_val = self.validation_data
        y_train_pred = self.model.predict(X_train)
        y_val_pred = self.model.predict(X_val)
        train_r2 = r2_score(y_train, y_train_pred)
        val_r2 = r2_score(y_val, y_val_pred)
        logs['train_r2'] = train_r2
        logs['val_r2'] = val_r2


print("...............Reading the Dataset and Dataset Pre-Processing ................")
start_time = time.time()
# Adjust your path accordingly
# Loading the dataset
data = pd.read_csv('path_to_your_data')

# Transforming the target variable by applying a logarithmic function to make the distribution more symmetric
data['Target_log'] = np.log(data['Target'] + 1)

# Data preparation by dropping irrelevant columns
X = data.drop(columns=['List_of_columns_to_drop'])
y = data['Target_log']  # Specifying the target variable

# Encoding categorical variables
for col in X.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


def build_model(input_dim, num_hidden_layers, activation='sigmoid'):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim))

    if activation == 'leaky_relu':
        model.add(LeakyReLU(alpha=0.01))
    else:
        model.add(Activation(activation))

    model.add(Dropout(0.2))

    for _ in range(num_hidden_layers - 1):
        model.add(Dense(64))

        if activation == 'leaky_relu':
            model.add(LeakyReLU(alpha=0.01))
        else:
            model.add(Activation(activation))

        model.add(Dropout(0.2))

    model.add(Dense(32, activation=activation))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])
    return model


activation_functions = ['sigmoid']
num_hidden_layers_list = [1, 2, 3, 4, 5]
additional_batch_sizes = [32, 128]
additional_learning_rates = [0.01, 0.0001]

early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
best_val_r2 = float('-inf')
best_model = None
results = []

for activation_function in activation_functions:
    for num_hidden_layers in num_hidden_layers_list:
        for batch_size in [32, 64] + additional_batch_sizes:
            for learning_rate in [0.001] + additional_learning_rates:
                print(f"Training model with activation_function={activation_function}, num_hidden_layers={num_hidden_layers}, batch_size={batch_size}, learning_rate={learning_rate}")

                r2_callback = R2Callback(train_data=(X_train, y_train), validation_data=(X_test, y_test))
                lr_schedule_callback = LearningRateScheduler(lambda epoch, lr: lr if epoch < 10 else lr * 0.9)

                ann_model = build_model(X_train.shape[1], num_hidden_layers, activation_function)
                ann_model.optimizer.lr.assign(learning_rate)

                history = ann_model.fit(X_train, y_train, epochs=100, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1,
                                        callbacks=[r2_callback, lr_schedule_callback, early_stop])
                # Print metrics after the first training iteration to verify the results.
                print("Train Loss:", history.history['loss'][0])
                print("Validation Loss:", history.history['val_loss'][0])
                print("Train MAE:", history.history['mae'][0])
                print("Validation MAE:", history.history['val_mae'][0])
                print("Train R²:", history.history['train_r2'][0])
                print("Validation R²:", history.history['val_r2'][0])
                final_val_r2 = history.history['val_r2'][-1] if 'val_r2' in history.history else float('-inf')

                if final_val_r2 > best_val_r2:
                    best_val_r2 = final_val_r2
                    best_model = (activation_function, num_hidden_layers, history, r2_callback)

                results.append({
                    'Activation Function': activation_function,
                    'Number of Hidden Layers': num_hidden_layers,
                    'Batch Size': batch_size,
                    'Learning Rate': learning_rate,
                    'Final Train Loss': history.history['loss'][-1] if 'loss' in history.history else float('nan'),
                    'Final Validation Loss': history.history['val_loss'][-1] if 'val_loss' in history.history else float('nan'),
                    'Final Train MAE': history.history['mae'][-1] if 'mae' in history.history else float('nan'),
                    'Final Validation MAE': history.history['val_mae'][-1] if 'val_mae' in history.history else float('nan'),
                    'Final Train R²': history.history['train_r2'][-1] if 'train_r2' in history.history else float('nan'),
                    'Final Validation R²': final_val_r2
                })

results_df = pd.DataFrame(results)
print(results_df)

# Define a directory to save the plots
save_directory = "/saving_directory_for_plots/"



# Plot the best model
if best_model:
    activation_function, num_hidden_layers, history, r2_callback = best_model
    metrics = ['loss', 'mae', 'R²']
    num_metrics = len(metrics)
    fig, axs = plt.subplots(1, num_metrics, figsize=(5 * num_metrics, 5))

    train_metrics = [history.history['loss'], history.history['mae'], history.history['train_r2']]
    val_metrics = [history.history['val_loss'], history.history['val_mae'], history.history['val_r2']]

    for i, metric in enumerate(metrics):
        axs[i].plot(train_metrics[i], label=f'Train {metric}')
        axs[i].plot(val_metrics[i], label=f'Validation {metric}')
        axs[i].set_title(f'{activation_function}_{num_hidden_layers}hidden {metric}')
        axs[i].set_xlabel('Epoch')
        axs[i].set_ylabel(metric)
        axs[i].legend()

    plt.tight_layout()
    # Saving the plot in PNG format with 1000 dpi
    plt.savefig(f"{save_directory}best_model_plot.png", dpi=1000)
    # Saving the plot in TIFF format with 1000 dpi
    plt.savefig(f"{save_directory}best_model_plot.tiff", dpi=1000)
    plt.show()

save_directory


After running each code, you can save the results_df dataframe to csv file to check the results. You can also save the plots for the best results too.

In [None]:
results_df.to_csv('path_to_saving_directory/file_name.csv', index=False)
