# Neural Networks for Non-Linear Regression

## Table of Contents

#### Import the Dataset
#### Review the Data
#### Train the Model
#### Evaluate Model Performance

## Import the dataset

In [None]:
#Import libraries
from tabulate import tabulate
import pandas as pd

In [None]:
#update the folder variable. Enter it as a string and make sure to use forward slashes (/) or double back slashes (\\)
folder = 'filelocation'
data_file_location = folder + '\\concrete_data.csv'
save_file_location = folder + '\\concrete_data_with_predictions.csv'

In [None]:
#Import the CSV File
import pandas as pd

df = pd.read_csv(data_file_location, sep=";", header=0)

#Print the dataframe to check it
print(tabulate(df, headers='keys', tablefmt='psql'))

In [None]:
#Normalize the data
from sklearn.preprocessing import StandardScaler

X = df.drop(['compressive_strength'], axis=1)
y = df.compressive_strength

#assign the df to a new variable
X_norm = X

# Normalize data
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

#normalize the values and save them to the new dataframe
for index, row in X_norm.iterrows():
    X_norm.at[index, 'cement'] = X_normalized[index, 0]
    X_norm.at[index, 'slag'] = X_normalized[index, 1]
    X_norm.at[index, 'fly_ash'] = X_normalized[index, 2]
    X_norm.at[index, 'water'] = X_normalized[index, 3]
    X_norm.at[index, 'superplasticizer'] = X_normalized[index, 4]
    X_norm.at[index, 'coarse_aggregate'] = X_normalized[index, 5]
    X_norm.at[index, 'fine_aggregate'] = X_normalized[index, 6]
    X_norm.at[index, 'age'] = X_normalized[index, 7]


print(tabulate(X_norm, headers='keys', tablefmt='psql'))

## Review the Data

It's always a good idea to plot the features and target of your data set to understand it.

In [None]:
#Plot features against target (strength)

import matplotlib.pyplot as plt

# plot select features against the strength
plt.scatter(X['cement'], y, s=25, c='r', label = "cement")
plt.scatter(X['age'], y, s=25, c='m', label = "age")
plt.title('Feature and Strength Visualization')
plt.xlabel('Feature Value') 
plt.ylabel('Strength')
plt.legend()
plt.show()


In [None]:
#plot 2 features against the target (strength) with strength represented in the Z axis
#use the Z filter to visualize points with strengths above a threshold

import numpy as np

#set Z filter value here
z_filter = 60

#3D plot to compare 2 features against the target
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(121, projection='3d')

# Filter points where z < z_filter value
mask_over_filter = y >= z_filter
mask_under_filter = y < z_filter

# Create a scatter plot with filtered points
ax.scatter(X['cement'][mask_over_filter], X['age'][mask_over_filter], y[mask_over_filter], marker='^', color='blue')
ax.scatter(X['cement'][mask_under_filter], X['age'][mask_under_filter], y[mask_under_filter], marker='^', color='orange', alpha = 0.2)

ax.set_xlabel('cement')
ax.set_ylabel('age')
ax.set_zlabel('strength')

ax2 = fig.add_subplot(122)
ax2.scatter(X['cement'][mask_over_filter], X['age'][mask_over_filter], y[mask_over_filter], marker='^', color='blue')
ax2.scatter(X['cement'][mask_under_filter], X['age'][mask_under_filter], y[mask_under_filter], marker='^', color='orange', alpha = 0.2)

# Set labels for the top view plot
ax2.set_xlabel('cement')
ax2.set_ylabel('age')
ax2.set_title('Top-Down View')

## Train the model

In [None]:
# Import train_test_split function
from sklearn.model_selection import train_test_split

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3,random_state=42) # 70% training and 30% test

In [None]:
#Train a regression model using the MLPRegressor function
#https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor

from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor(hidden_layer_sizes=(50, 50, 50), activation='relu', solver='adam', max_iter=300)
mlp.fit(X_train, y_train)

predict_train = mlp.predict(X_train)
y_pred = mlp.predict(X_test)

print('training complete')

In [None]:
#Plot the loss function
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(6,4))
ax.plot(mlp.loss_curve_)
ax.set_xlabel('Number of iterations')
ax.set_ylabel('Loss')
plt.show()

### Evaluate model performance

Check the documentation to see what the .score() function returns
https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor.score

In [None]:
mlp.score(X_test, y_test)

Look at other metrics like Root Mean Squared Error and Mean Absolute Error

In [None]:
from sklearn.metrics import root_mean_squared_error

y_true = y_test
y_pred = mlp.predict(X_test)

rmse = root_mean_squared_error(y_true, y_pred)
print(rmse)

In [None]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, y_pred)
print(mae)

These metrics are relative to the scale of your output data. In our dataset, we measure the strength in MPa. It can be helpful to compare the errors against values in your dataset to understand the scale of the error.

In [None]:
lowest_value = df['compressive_strength'].min()
highest_value = df['compressive_strength'].max()
median_value = df['compressive_strength'].median()
mean_value = df['compressive_strength'].mean()

# Print the results
print(f"Lowest value: {lowest_value}")
print(f"Highest value: {highest_value}")
print(f"Median value: {median_value}")
print(f"Mean value: {mean_value}")

In [None]:
percentage_median = round(rmse / median_value*100, 2)
percentage_min = round(rmse / lowest_value*100, 2)
percentage_max = round(rmse / highest_value*100, 2)

print("RMSE is {}% of the median value of the dataset".format(percentage_median))
print("RMSE is {}% of the minimum value of the dataset".format(percentage_min))
print("RMSE is {}% of the maximum value of the dataset".format(percentage_max))

In [None]:
percentage_median = round(mae / median_value*100, 2)
percentage_min = round(mae / lowest_value*100, 2)
percentage_max = round(mae / highest_value*100, 2)

print("RMSE is {}% of the median value of the dataset".format(percentage_median))
print("RMSE is {}% of the minimum value of the dataset".format(percentage_min))
print("RMSE is {}% of the maximum value of the dataset".format(percentage_max))