# Assignment: Build a Regression Model in Keras

Install Requored Libraries 


In [None]:
# Install all Libraries required for this assignment are listed below. 

!pip install numpy==2.0.2
!pip install pandas==2.2.2
!pip install tensorflow==2.18.0
!pip install scikit-learn

# A. Build a baseline model (5 marks) 

Use the Keras library to build a neural network with the following:

- One hidden layer of 10 nodes, and a ReLU activation function

- Use the adam optimizer and the mean squared error  as the loss function.

1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the 
train_test_split
helper function from Scikit-learn.

2. Train the model on the training data using 50 epochs.

3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

5. Report the mean and the standard deviation of the mean squared errors.

Submit your Jupyter Notebook with your code and comments.


In [None]:
# import the Keras, Pandas, and Numpy Libraries
import pandas as pd
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Download the dataset into pandas library 
# Please download the data to your project folder before calling this code 

filepath='concrete_data.csv'
concrete_data = pd.read_csv(filepath)

concrete_data.head()


In [None]:
# Read data column names
concrete_data_columns = concrete_data.columns
print(concrete_data_columns)

# Split data into predictors and target data
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column




In [None]:
# Import Keras packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input


In [None]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(8,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [None]:
# build the model
model = regression_model()

# Split the data into training and test sets
# Train the model on the training data using 50 epochs
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)


model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=2)




In [None]:
# List to store mean squared errors
mse_list = []

# Repeat the split and train steps 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    
    # Create and train the model
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    # Calculate mean squared error
    mse = mean_squared_error(y_test, y_pred)  # Now this will work
    mse_list.append(mse)

# Print the list of mean squared errors
print(mse_list)

In [None]:
#  Report the mean and the standard deviation of the mean squared errors.

mean_mse = np.mean(mse_list)  # Calculate the mean of the MSEs
std_mse = np.std(mse_list)    # Calculate the standard deviation of the MSEs

# Print the results
print(f"Mean of Mean Squared Errors: {mean_mse}")
print(f"Standard Deviation of Mean Squared Errors: {std_mse}")

# B. Normalize the data (5 marks) 

Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

How does the mean of the mean squared errors compare to that from Step A?

In [None]:

# normalize the predictors data
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

# List to store mean squared errors
mse_list = []


# Repeat the split and train steps 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    
    # Create and train the model
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate mean squared error
    mse = mean_squared_error(y_test, y_pred)  # Now this will work
    mse_list.append(mse)

# Print the list of mean squared errors
print(mse_list)

#  Report the mean and the standard deviation of the mean squared errors.

mean_mse = np.mean(mse_list)  # Calculate the mean of the MSEs
std_mse = np.std(mse_list)    # Calculate the standard deviation of the MSEs

# Print the results
print(f"Mean of Mean Squared Errors: {mean_mse}")
print(f"Standard Deviation of Mean Squared Errors: {std_mse}")

#### How does the mean of the mean squared errors compare to that from Step A?

#### Answer: the mean of the mean squared errors has been reduced from 389.29 to 368.07

# C. Increate the number of epochs (5 marks)

Repeat Part B but use 100 epochs this time for training.



In [None]:

# Repeat the split and train steps 100 times
mse_list = []

for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    
    # Create and train the model
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=0)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate mean squared error
    mse = mean_squared_error(y_test, y_pred)  # Now this will work
    mse_list.append(mse)

# Print the list of mean squared errors
print(mse_list)

#  Report the mean and the standard deviation of the mean squared errors.

mean_mse = np.mean(mse_list)  # Calculate the mean of the MSEs
std_mse = np.std(mse_list)    # Calculate the standard deviation of the MSEs

# Print the results
print(f"Mean of Mean Squared Errors: {mean_mse}")
print(f"Standard Deviation of Mean Squared Errors: {std_mse}")

## How does the mean of the mean squared errors compare to that from Step B?
## the mean has been 

# D. Increase the number of hidden layers (5 marks)

Repeat part B but use a neural network with the following instead:

- Three hidden layers, each of 10 nodes and ReLU activation function.

How does the mean of the mean squared errors compare to that from Step B?

In [None]:
# increase the number of hiddern layers to 3 each of 10 nodes and ReLU activation function.
mse_list = []

def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(8,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model
# Repeat the split and train steps 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    
    # Create and train the model
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    
    # Predict on the test set
    y_pred = model.predict(X_test, verbose=0)
    
    
    # Calculate mean squared error
    mse = mean_squared_error(y_test, y_pred)  # Now this will work
    mse_list.append(mse)

# Print the list of mean squared errors
print(mse_list)

#  Report the mean and the standard deviation of the mean squared errors.

mean_mse = np.mean(mse_list)  # Calculate the mean of the MSEs
std_mse = np.std(mse_list)    # Calculate the standard deviation of the MSEs

# Print the results
print(f"Mean of Mean Squared Errors: {mean_mse}")
print(f"Standard Deviation of Mean Squared Errors: {std_mse}")