In [77]:
import sklearn   #importing necessary libraries
import pandas as pd
import numpy as np

In [78]:
filepath='https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv' #importing dataset
concrete_data = pd.read_csv(filepath)

concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [79]:
predictors = concrete_data.drop('Strength', axis=1) #splitting data into predictors and target
target = concrete_data['Strength']

In [80]:
from sklearn.model_selection import train_test_split #importing necessary scikit-learn modules
from sklearn.metrics import mean_squared_error

In [81]:
from keras import Sequential #importing necessary keras modules
from keras.layers import Dense, Input

Part A: Baseline Model

In [82]:
def baseline_model(): #defining a baseline model

    # random_state_list = []  #initializing random state list
    random_state = np.random.randint(1,1000) #choosing a random state
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=random_state) #splitting with 30% for testing randomly
    # random_state_list.append(random_state) #storing the random state number
    shape = (predictors.shape[1],)#assigning input shape
    model = Sequential() #initialize the model
    model.add(Input(shape=shape))
    model.add(Dense(10, activation='relu')) #adding 10 hidden layers nodes and input shape
    model.add(Dense(1))#adding output layer

    model.compile(optimizer='adam', loss='mean_squared_error') #compiling the model

    model.fit(X_train, y_train, epochs=50, verbose=0) #fitting the model

    y_pred = model.predict(X_test) #predicting the model

    mse = mean_squared_error(y_test, y_pred) #calculating the mean squared error

    return mse #returning the mean squared error

In [83]:
mse_list = [] #intialising empty list of mean squared errors
for i in range(50):
    mse = baseline_model() 
    mse_list.append(mse) #appending each mean squared error to the list

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [84]:
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)


print("Mean MSE:", mean_mse)
print("Standard Deviation of MSE:", std_mse) 

Mean MSE: 407.8208750009391
Standard Deviation of MSE: 955.7130614291005


Mean MSE: 407.8208750009391, 
Standard Deviation of MSE: 955.7130614291005 


Part B: Normalized Data

In [85]:
normalized_predictors = (predictors - predictors.mean()) / predictors.std() #normalizing the predictors data

In [86]:
normalized_predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [87]:
predictors = normalized_predictors #re-assigning to predictors because predictors is defined in the model

In [88]:
n_mse_list = [] #empty list for normalized data mean squared errors

for i in range(50):
    n_mse = baseline_model()
    n_mse_list.append(n_mse)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [89]:
mean_mse = np.mean(n_mse_list)
std_mse = np.std(n_mse_list)


print("Mean MSE:", mean_mse)
print("Standard Deviation of MSE:", std_mse)

Mean MSE: 373.55482438416146
Standard Deviation of MSE: 98.48302664868066


Mean MSE: 373.55482438416146
Standard Deviation of MSE: 98.48302664868066

Compared to above mean there is decrease compared to part A

Part C: Increase Number of Epoch to 100

In [92]:
def epoch_model(): #defining another model because of change in the number of epochs

    # random_state_list = []  #initializing random state list
    random_state = np.random.randint(1,1000)
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=random_state)
    # random_state_list.append(random_state)
    shape = (predictors.shape[1],)
    model = Sequential()
    model.add(Input(shape=shape))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')

    model.fit(X_train, y_train, epochs=100, verbose=0)#changed number of epochs from 50 to 100

    y_pred = model.predict(X_test) #predicting the model

    mse = mean_squared_error(y_test, y_pred) #calculating the mean squared error

    return mse #returning the mean squared error

In [93]:
e_mse_list = [] #empty list for mean squared errors of epoch_model
for i in range(50):
    e_mse = epoch_model()
    e_mse_list.append(e_mse)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [94]:
mean_mse = np.mean(e_mse_list)
std_mse = np.std(e_mse_list)


print("Mean MSE:", mean_mse)
print("Standard Deviation of MSE:", std_mse)

Mean MSE: 165.98667934054484
Standard Deviation of MSE: 17.58385190615498


Mean MSE: 165.98667934054484
Standard Deviation of MSE: 17.58385190615498

As we can see there is significant decrease in mean of mean squared errors compared to part B

Part D: Addition of two Hidden Layers

In [97]:
def layer_model(): #define another model because of change in the number of layers

    # random_state_list = []  #initializing random state list
    random_state = np.random.randint(1,1000) #choosing a random state
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=random_state)
    # random_state_list.append(random_state) #storing the random state number
    shape = (predictors.shape[1],)
    model = Sequential() #initialize the model
    model.add(Input(shape=shape))
    model.add(Dense(10, activation='relu')) #added an extra layer with 10 nodes
    model.add(Dense(10, activation='relu')) #another layer with 10 nodes
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')

    model.fit(X_train, y_train, epochs=50, verbose=0)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    return mse

In [None]:
l_mse_list = [] #empty list for mean squared errors of layer model
for i in range(50):
    l_mse = layer_model()
    l_mse_list.append(l_mse)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
mean_mse = np.mean(l_mse_list)
std_mse = np.std(l_mse_list)


print("Mean MSE:", mean_mse)
print("Standard Deviation of MSE:", std_mse)

Mean MSE: 126.34715403383832
Standard Deviation of MSE: 17.02369565998775

As we see the mean of mean squared errors compared to that of part B decreased even more with addition of two hidden layers compared to adding 100 epochs and 1 hidden layer