In [54]:
# for array computations and loading data
import numpy as np

# for ploting
import matplotlib.pyplot as plt

# for building linear regression models and preparing data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# for building and training neural networks
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [69]:
#load the dataset
data = np.loadtxt('/Users/berekettesfaye/Desktop/ML_projects/coursera_evaluation/data_w3_ex1.csv',delimiter = ',')

# split the inputs and  outputs into separate arrays
x = data[:, 0]
y = data[:, 1]

# convert 1-D arrays into 2-S because the commandes later will requaire it
x = np.expand_dims(x, axis=1)
y = np.expand_dims(y, axis=1)

print(f"The shape of the input(x): {x.shape}")
print(f"The shape of the target value(y): {y.shape}")


The shape of the input(x): (50, 1)
The shape of the target value(y): (50, 1)



----Split the dataset into training, cross validation, and test sets----

training set - used to train the model

cross validation set (also called validation, development, or dev set) - used to evaluate the different model configurations you are choosing from. For example, you can use this to make a decision on what polynomial features to add to your dataset.

test set - used to give a fair estimate of your chosen model's performance against new examples. This should not be used to make decisions while you are still developing the models.

In [56]:
# get the 60% of the dataset as the training set. and save the rest 40% in x_,y_
x_train,x_,y_train,y_ = train_test_split(x,y,test_size=0.40, random_state=1)

#use  x_ and y_ to creat cross validation and test set
x_cv,x_test, y_cv, y_test = train_test_split(x_,y_,test_size= 0.50, random_state=1)

# delete x_, y_ because we are not gonna use it
del x_, y_

print(f"the shape of the training set (input) is: {x_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the cross validation set (input) is: {x_cv.shape}")
print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n")
print(f"the shape of the test set (input) is: {x_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")

the shape of the training set (input) is: (30, 1)
the shape of the training set (target) is: (30, 1)

the shape of the cross validation set (input) is: (10, 1)
the shape of the cross validation set (target) is: (10, 1)

the shape of the test set (input) is: (10, 1)
the shape of the test set (target) is: (10, 1)


- i will perform feature scaleing on the training dataset to help my model to converge faster
  

In [57]:
# Intialize the class
scaler_neural = StandardScaler()

# Computes the mean and standard deviation of the training set then transform it
x_train_scaled = scaler_neural.fit_transform(x_train)
x_cv_scaled = scaler_neural.transform(x_cv)
x_test_scaled = scaler_neural.transform(x_test)

print(f"Computed mean of the training set: {scaler_neural.mean_.squeeze():.2f}")
print(f"Computed standard deviation of the training set: {scaler_neural.scale_.squeeze():.2f}")


Computed mean of the training set: 2504.06
Computed standard deviation of the training set: 574.85


* i will be evaluating between three neural network architectures
  1. model_1 has 1 input layer with 25 units and activation is relu, 1 hidden layer 15 units activation is relu and 1 output layer with 1 unit and activation is linear
  2. model_2 has 1 input layer with 20 units activation is relu, 3 hidden layer the first two has 12 units and the last one has 20 units and activation is relu and it has one output layer it has 1 unit  activation is linear
  3. model_3 has 1 input layer with 32 units and activation is relu and it has 4 hidden layer (1 h_layer 16 units, 2 h_layer 8 units, 3 h_layer 4 units, 4 h_layer 12 units) and activation is relu for all of them and it has 1 output layer with one unit and activation is linear

In [61]:
def build_models():
    tf.random.set_seed(20)

    model_1 = Sequential(
        [
            Dense(25, activation = 'relu'),
            Dense(15, activation = 'relu'),
            Dense(1, activation = 'linear')
        ],
        name = 'model_1'
    )

    tf.random.set_seed(20)
    model_2 = Sequential(
        [
            Dense(20, activation = 'relu'),
            Dense(12, activation = 'relu'),
            Dense(12, activation = 'relu'),
            Dense(20, activation = 'relu'),
            Dense(1, activation = 'linear')
        ],
        name = 'model_2'
    )
    
    tf.random.set_seed(20)
    model_3 = Sequential(
        [
            Dense(32, activation = 'relu'),
            Dense(16, activation = 'relu'),
            Dense(8, activation = 'relu'),
            Dense(4, activation = 'relu'),
            Dense(12, activation = 'relu'),
            Dense(1, activation = 'linear')
        ],
        name = 'model_3'
    )

    model_list = [model_1, model_2, model_3]

    return model_list
    

* Build and train the models
    -For each model, i will  record the training and cross validation errors.

In [62]:
# Initialize lists that will contain the errors for each model
nn_train_mses = []
nn_cv_mses = []

# build the models
nn_models = build_models()
# Print summaries to ensure the models are built

# loop over the models 
for model in nn_models:
     
    #setup the loss and optimizer
    model.compile(
        loss = 'mse',
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.1),
    )
    print(f"Training {model.name}...")

    # Train the model
    model.fit(
        x_train_scaled,y_train, 
        epochs=300,
        verbose=0
    )
    print("Done!\n")
  

    # Recored the training MSE
    yhat = model.predict(x_train_scaled)
    train_mse = mean_squared_error(y_train,yhat) / 2
    nn_train_mses.append(train_mse)

    # Record the cross validation MSE
    yhat = model.predict(x_cv_scaled)
    cv_mse = mean_squared_error(y_cv,yhat) / 2
    nn_cv_mses.append(cv_mse)

#print the results
print("Results: ")
for model_num in range(len(nn_train_mses)):
    print(
        f"Model {model_num + 1}: Training MSE: {nn_train_mses[model_num]:.2f}, " +
        f"CV MSE: {nn_cv_mses[model_num]:.2f}"
    )

Training model_1...
Done!

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Training model_2...
Done!

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Training model_3...
Done!

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Results: 
Model 1: Training MSE: 406.19, CV MSE: 551.78
Model 2: Training MSE: 73.40, CV MSE: 112.28
Model 3: Training MSE: 73.78, CV MSE: 112.96


* From the recorded errors , i will decide the best model for my application. and use it to estimate the error on the test set to know how well it generalizesn to new examples

In [68]:
# Select the model with the lowest CV MSE
model_num = 2
# compute the test MSE
yhat = nn_models[model_num - 1 ].predict(x_test_scaled)
test_mse = mean_squared_error(y_test,yhat)/2

print(f"Selected Model: {model_num}")
print(f"Training MSE: {nn_train_mses[model_num-1]:.2f}")
print(f"Cross Validation MSE: {nn_cv_mses[model_num-1]:.2f}")
print(f"Test MSE: {test_mse:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Selected Model: 2
Training MSE: 73.40
Cross Validation MSE: 112.28
Test MSE: 131.57
