Author: Joan Mas Castella

Part A

Importing Libararies 

In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split  
from sklearn.metrics import mean_squared_error 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import keras

In [None]:
#Downloading dataset
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Cleaning data

In [None]:
concrete_data.isnull().sum()

In [None]:
concrete_data.head()

In [50]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data.drop(columns=['Strength'])
target = concrete_data['Strength'] 

In [None]:
predictors.shape

In [None]:
target

Creating Model/Testing

In [None]:
def regression_model(input_dim):
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(input_dim,)))  # Hidden layer with 10 nodes
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Repeat the training and evaluation 50 times
n_repeats = 50
mse_list = []

for _ in range(n_repeats):
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)
    
    # Create and fit the model
    model = regression_model(input_dim=X_train.shape[1])
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=0)  # Setting verbose to 0 to suppress output
    
    # Predict on the test set and calculate mean squared error
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mse_list.append(mse)

# Calculate mean and standard deviation of MSE
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard Deviation of MSE: {std_mse}")

Part B

In [54]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [55]:
n_cols = predictors_norm.shape[1] # number of predictors

In [58]:
def regression_model(n_cols):
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols)))  # Hidden layer with 10 nodes
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Repeat the training and evaluation 50 times
n_repeats = 50
mse_list = []

for _ in range(n_repeats):
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)
    
    # Create and fit the model
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=0)  # Setting verbose to 0 to suppress output
    
    # Predict on the test set and calculate mean squared error
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mse_list.append(mse)

# Calculate mean and standard deviation of MSE
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard Deviation of MSE: {std_mse}")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 290us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259us/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 251us/step
[1m10/10[0m [32m━━━━━━