### Import Packages

In [1]:
#Import packages
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error 

### Exploratory Data Analysis (EDA)

In [None]:
#Load data
#Data can be downloaded here: https://cocl.us/concrete_data
concrete = pd.read_csv('concrete_data.csv')
concrete.head()

In [None]:
#Perform EDA
concrete.describe()

#Then we can check how many values are null (aka any missing data points)
concrete.isnull().sum()

#No null values so we can proceed to sorting the data and building the model


In [24]:
#Spliting Data into predictors and targets
#Here we want to understand the taget variable, which in this case is Strength. Hence we will be want to use the following methodology to split the data
concrete_columns = concrete.columns

predictors = concrete[concrete_columns[concrete_columns != 'Strength']] 
target = concrete['Strength']

#Now we want to normalize the data by subtracting the mean and divding by the standard deviation 
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

#Lastly we want to create a variable for the number of predictors as this will be used to enumerate the number of inputs
n_cols = predictors_norm.shape[1]


### Build and Run Keras Model

In [26]:
#Now we will build the neural network by defining a function that builds the model
def regression_model():
    #Create the model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape = (n_cols,))) #We added one layer with 10 nodes with the input being the number of columns
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))

    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [None]:
#Create a list where we will store all of the MSE's
mse_list = []

#Loop the function to repeat steps 1-3 50 times
for i in range(50):

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    # Build the model
    model = regression_model()
    # Train the model on the training set
    model.fit(X_train, y_train, epochs=50, verbose=0)

    #Model evaluation
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean of the Mean Squared Errors over 50 runs is: {mean_mse}")
print(f"Standard Deviation of the Mean Squared Errors over 50 runs is: {std_mse}")

### Model Evaluation

In [None]:
'''
A)Unnormalized data with 50 epochs:
Mean of the Mean Squared Errors over 50 runs is: 840.7152468023643
Standard Deviation of the Mean Squared Errors over 50 runs is: 2107.079341831513

B)Normalized data with 50 epochs: 
Mean of the Mean Squared Errors over 50 runs is: 694.2724336560079
Standard Deviation of the Mean Squared Errors over 50 runs is: 143.70527562917263

The mean of the mean squared errors is lower than the unnormalized data which indicates that the model performs better. The standard deviation
also decreased drasatically showcasing the more stable performance across runs. Normalizing the data improved performance. 

C)Normalized data with 100 epochs:
Mean of the Mean Squared Errors over 50 runs is: 168.13926165692024
Standard Deviation of the Mean Squared Errors over 50 runs is: 56.37702409980878

The mean of the mean squared errors is signficantly lower comapred to part B, bringing it closer to the strength range in the data. Increasing
training epochs allowed the model to learn more patterns in the data and improve predictive accuracy. More epochs also led to reduced error rates 
resulting the the smaller MSE and smaller STDEV. 

D)Normalized data with 50 epochs, three hidden layers, each of 10 nodes and ReLU activation function:
Mean of the Mean Squared Errors over 50 runs is: 182.37070142096144
Standard Deviation of the Mean Squared Errors over 50 runs is: 106.0576653820055

Increasing the number of hidden layers from 1 to 3 yielded better performance compared to the model parameters in part B and this was done without
the need to train with more epochs. 

Overall, the best performance was seen with the model parameters used in part C, suggesting training a single-layer model for more epochs was more
beneficial then increasing the number of layers and keeping the number of epochs the same. This is most likely to do with the extra time alloted
for the model to learn the data more effectively with a simpler architecture. 

'''