In [None]:
%load_ext autoreload
%autoreload 1
# custom functions being developed interactively
%aimport utils_practice_version
import utils_practice_version as utils

import numpy as np

# for building linear regression models and preparing data
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# for building and training neural networks
import tensorflow as tf

# reduce display precision on numpy arrays
np.set_printoptions(precision=2)

# suppress warnings
import logging

logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.keras.utils.disable_interactive_logging()
tf.autograph.set_verbosity(0)

In [None]:
# Part 1 - Regression
# Load the dataset from the csv file
data = np.loadtxt("./data/data_w3_ex1.csv", delimiter=",")

# Split the inputs and outputs into separate arrays
x = data[:, 0]
y = data[:, 1]

# Convert 1-D arrays into 2-D because the commands later will require it
x = np.expand_dims(x, axis=1)
y = np.expand_dims(y, axis=1)

print(f"The shape of the input x is : {x.shape}")
print(f"The shape of the output y is : {y.shape}")

In [None]:
# Plot the entire dataset
utils.plot_dataset(x, y, title="input vs. target")

In [None]:
# Split the dataset into training, cross validation, and test sets
# Get 60% of the dataset as the training set. Put the remaining 40% in temporary variables: x_ and y_.
x_train, x_, y_train, y_ = train_test_split(
    x,
    y,
    test_size=0.40,
    random_state=1,
)

# Split the 40% subset above into two: one half for cross validation and the other for the test set
x_cv, x_test, y_cv, y_test = train_test_split(x_, y_, test_size=0.50, random_state=1)

# Delete temporary variables
del x_, y_
print(f"the shape of the training set (input) is: {x_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the cross validation set (input) is: {x_cv.shape}")
print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n")
print(f"the shape of the test set (input) is: {x_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")

In [None]:
utils.plot_train_cv_test(
    x_train, y_train, x_cv, y_cv, x_test, y_test, title="input vs. target"
)

In [None]:
# Fit a Linear Model
# Feature Scaling
# Initialize the class
scaler_linear = StandardScaler()

# Compute the mean and standard deviation of the training set then transform it
X_train_scaled = scaler_linear.fit_transform(x_train)

print(f"Computed mean of the training set: {scaler_linear.mean_.squeeze():.2f}")
print(
    f"Computed standard deviation of the training set: {scaler_linear.scale_.squeeze():.2f}"
)

# Plot the results
utils.plot_dataset(x=X_train_scaled, y=y_train, title="scaled input vs. target")

In [None]:
# Train the model
# Initialize the class
linear_model = LinearRegression()

# Train the model
linear_model.fit(X_train_scaled, y_train)

In [None]:
# Evaluate the model
# Feed the scaled training set and get the predictions
yhat = linear_model.predict(X_train_scaled)

# Use scikit-learn's utility function and divide by 2
print(f"training MSE (using sklearn function): {mean_squared_error(y_train, yhat) / 2}")

mse = np.sum((yhat - y_train) ** 2) / (2 * len(yhat))
print(f"training MSE (manually calculated): {mse}")

In [None]:
# Scale the cross validation set using the mean and standard deviation of the training set
X_cv_scaled = scaler_linear.transform(x_cv)

print(f"Mean used to scale the CV set: {scaler_linear.mean_.squeeze():.2f}")
print(
    f"Standard deviation used to scale the CV set: {scaler_linear.scale_.squeeze():.2f}"
)

# Feed the scaled cross validation set
yhat = linear_model.predict(X_cv_scaled)

# Use scikit-learn's utility function and divide by 2
print(f"Cross validation MSE: {mean_squared_error(y_cv, yhat) / 2}")

In [None]:
# Adding Polynomial Features
# Create the additional features
# Instantiate the class to make polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)

# Compute the number of features and transform the training set
X_train_mapped = poly.fit_transform(x_train)

# Preview the first 5 elements of the new training set. Left column is `x` and right column is `x^2`
print(X_train_mapped[:5])

In [None]:
# Scale the inputs as before.
# Instantiate the class
scaler_poly = StandardScaler()

# Compute the mean and standard deviation of the training set then transform it
X_train_mapped_scaled = scaler_poly.fit_transform(X_train_mapped)

# Preview the first 5 elements of the scaled training set.
print(X_train_mapped_scaled[:5])

In [None]:
# Initialize the class
model = LinearRegression()

# Train the model
model.fit(X_train_mapped_scaled, y_train)

# Compute the training MSE
yhat = model.predict(X_train_mapped_scaled)
print(f"Training MSE: {mean_squared_error(y_train, yhat) / 2}")

# Add the polynomial features to the cross validation set
X_cv_mapped = poly.transform(x_cv)

# Scale the cross validation set using the mean and standard deviation of the training set
X_cv_mapped_scaled = scaler_poly.transform(X_cv_mapped)

# Compute the cross validation MSE
yhat = model.predict(X_cv_mapped_scaled)
print(f"Cross Validation MSE: {mean_squared_error(y_cv, yhat) / 2}")

In [None]:
# Initialize lists to save the errors, models, and feature transforms
train_mses = []
cv_mses = []
models = []
polys = []
scalers = []

# Loop over 10 times. Each adding one more degree of polynomial higher than the last.
for degree in range(1, 11):

    # Add polynomial features to the training set
    poly = PolynomialFeatures(degree, include_bias=False)
    X_train_mapped = poly.fit_transform(x_train)
    polys.append(poly)

    # Scale the training set
    scaler_poly = StandardScaler()
    X_train_mapped_scaled = scaler_poly.fit_transform(X_train_mapped)
    scalers.append(scaler_poly)

    # Create and train the model
    model = LinearRegression()
    model.fit(X_train_mapped_scaled, y_train)
    models.append(model)

    # Compute the training MSE
    yhat = model.predict(X_train_mapped_scaled)
    train_mse = mean_squared_error(y_train, yhat) / 2
    train_mses.append(train_mse)

    # Add polynomial features and scale the cross validation set
    X_cv_mapped = poly.transform(x_cv)
    X_cv_mapped_scaled = scaler_poly.transform(X_cv_mapped)

    # Compute the cross validation MSE
    yhat = model.predict(X_cv_mapped_scaled)
    cv_mse = mean_squared_error(y_cv, yhat) / 2
    cv_mses.append(cv_mse)

# Plot the results
degrees = range(1, 11)
utils.plot_train_cv_mses(
    degrees, train_mses, cv_mses, title="degree of polynomial vs. train and CV MSEs"
)

In [None]:
# Choosing the best model
# Get the model with the lowest CV MSE (add 1 because list indices start at 0)
# This also corresponds to the degree of the polynomial added
degree = np.argmin(cv_mses) + 1
print(f"Lowest CV MSE is found in the model with degree={degree}")

In [None]:
# Add polynomial features to the test set
X_test_mapped = polys[degree - 1].transform(x_test)

# Scale the test set
X_test_mapped_scaled = scalers[degree - 1].transform(X_test_mapped)

# Compute the test MSE
yhat = models[degree - 1].predict(X_test_mapped_scaled)
test_mse = mean_squared_error(y_test, yhat) / 2

print(f"Training MSE: {train_mses[degree - 1]:.2f}")
print(f"Cross Validation MSE: {cv_mses[degree - 1]:.2f}")
print(f"Test MSE: {test_mse:.2f}")

In [None]:
# Part 2 - Neural Networks
# The same model selection process can also be used when choosing between different neural network architectures.
# Step 1 - Prepare the data
# You will use the same training, cross validation, and test sets you generated in the previous section. From earlier lectures in this course, you may have known that neural networks can learn non-linear relationships so you can opt to skip adding polynomial features. The code is still included below in case you want to try later and see what effect it will have on your results. The default degree is set to 1 to indicate that it will just use x_train, x_cv, and x_test as is (i.e. without any additional polynomial features).
# Add polynomial features
degree = 1
poly = PolynomialFeatures(degree, include_bias=False)
X_train_mapped = poly.fit_transform(x_train)
X_cv_mapped = poly.transform(x_cv)
X_test_mapped = poly.transform(x_test)

In [None]:
# Scale the features using the z-score
scaler = StandardScaler()
X_train_mapped_scaled = scaler.fit_transform(X_train_mapped)
X_cv_mapped_scaled = scaler.transform(X_cv_mapped)
X_test_mapped_scaled = scaler.transform(X_test_mapped)

In [None]:
# # Initialize lists that will contain the errors for each model
# nn_train_mses = {"model_1": [], "model_2": [], "model_3": []}
# nn_cv_mses = {"model_1": [], "model_2": [], "model_3": []}
# histories = {42: [], 1: [], 123: [], 99: [], 2026: [], 20: []}

# seeds = [42, 1, 123, 99, 2026, 20]  # 6 different starting points
# alpha = 0.1
# epochs = 300
# for seed in seeds:
#     tf.random.set_seed(seed)

#     # Build the models
#     nn_models = utils.build_models()
#     print("\n----------------------------------------------------------")
#     print(f"SEED = {seed}, Learning Rate = {alpha}, epochs = {epochs}")
#     print("----------------------------------------------------------")

#     # Loop over the the models
#     for i, model in enumerate(nn_models):

#         # Setup the loss and optimizer
#         model.compile(
#             loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=alpha)
#         )

#         # print(f"Training {model.name}...")

#         # Train the model
#         history = model.fit(X_train_mapped_scaled, y_train, epochs=epochs, verbose=0)
#         histories[seed].append(history)

#         # print("Done!\n")

#         # Record the training MSEs
#         yhat = model.predict(X_train_mapped_scaled)
#         train_mse = mean_squared_error(y_train, yhat) / 2
#         nn_train_mses[model.name].append(train_mse)

#         # Record the cross validation MSEs
#         yhat = model.predict(X_cv_mapped_scaled)
#         cv_mse = mean_squared_error(y_cv, yhat) / 2
#         nn_cv_mses[model.name].append(cv_mse)

#     # print results
#     print("RESULTS:")
#     for model in nn_models:
#         print(
#             f"Model {model.name}: Training MSE: {nn_train_mses[model.name][-1]:.2f}, CV MSE: {nn_cv_mses[model.name][-1]:.2f}"
#         )
# # Analyze results
# print("\n------------------------Averages----------------------------------")
# for name, errors in nn_train_mses.items():
#     mean_err = np.mean(errors)
#     std_err = np.std(errors)
#     print(f"{name}: Mean Training MSE = {mean_err:.2f} (+/- {std_err:.2f})")

# for name, errors in nn_cv_mses.items():
#     mean_err = np.mean(errors)
#     std_err = np.std(errors)
#     print(f"{name}: Mean CV MSE = {mean_err:.2f} (+/- {std_err:.2f})")

# utils.plot_nn_loss_curves(histories, alpha, seeds)

In [None]:
# Initialize lists that will contain the errors for each model
nn_train_mses = []
nn_cv_mses = []

# Build the models
nn_models = utils.build_models2(20)

# Loop over the the models
for model in nn_models:

    # Setup the loss and optimizer
    model.compile(
        loss="mse",
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    )

    print(f"Training {model.name}...")

    # Train the model
    model.fit(X_train_mapped_scaled, y_train, epochs=300, verbose=0)

    print("Done!\n")

    # Record the training MSEs
    yhat = model.predict(X_train_mapped_scaled)
    train_mse = mean_squared_error(y_train, yhat) / 2
    nn_train_mses.append(train_mse)

    # Record the cross validation MSEs
    yhat = model.predict(X_cv_mapped_scaled)
    cv_mse = mean_squared_error(y_cv, yhat) / 2
    nn_cv_mses.append(cv_mse)


# print results
print("RESULTS:")
for model_num in range(len(nn_train_mses)):
    print(
        f"Model {model_num+1}: Training MSE: {nn_train_mses[model_num]:.2f}, "
        + f"CV MSE: {nn_cv_mses[model_num]:.2f}"
    )

In [None]:
# Select the model with the lowest CV MSE
model_num = 3

# Compute the test MSE
yhat = nn_models[model_num - 1].predict(X_test_mapped_scaled)
test_mse = mean_squared_error(y_test, yhat) / 2

print(f"Selected Model: {model_num}")
print(f"Training MSE: {nn_train_mses[model_num-1]:.2f}")
print(f"Cross Validation MSE: {nn_cv_mses[model_num-1]:.2f}")
print(f"Test MSE: {test_mse:.2f}")

In [None]:
# Model evaluation for a classification task
# Step 1 - Load the dataset
data = np.loadtxt("./data/data_w3_ex2.csv", delimiter=",")

# Split the inputs and outputs into separate arrays
x_bc, y_bc = data[:, :-1], data[:, -1]

# Convert y into 2-D because the commands later will require it (x is already 2-D)
y_bc = np.expand_dims(y_bc, axis=1)

print(f"the shape of the inputs x is: {x_bc.shape}")
print(f"the shape of the targets y is: {y_bc.shape}")

In [None]:
utils.plot_bc_dataset(x=x_bc, y=y_bc, title="x1 vs. x2")

In [None]:
# Step 2 - Preprocessing - Split the dataset
# Get 60% of the dataset as the training set. Put the remaining 40% in temporary variables.
x_bc_train, x_, y_bc_train, y_ = train_test_split(
    x_bc, y_bc, test_size=0.40, random_state=1
)

# Split the 40% subset above into two: one half for cross validation and the other for the test set
x_bc_cv, x_bc_test, y_bc_cv, y_bc_test = train_test_split(
    x_, y_, test_size=0.50, random_state=1
)

# Delete temporary variables
del x_, y_

print(f"the shape of the training set (input) is: {x_bc_train.shape}")
print(f"the shape of the training set (target) is: {y_bc_train.shape}")
print(f"the shape of the cross validation set (input) is: {x_bc_cv.shape}")
print(f"the shape of the cross validation set (target) is: {y_bc_cv.shape}")
print(f"the shape of the test set (input) is: {x_bc_test.shape}")
print(f"the shape of the test set (target) is: {y_bc_test.shape}")

In [None]:
# Preprocessing - Scale the features
# Initialize the class
scaler_linear = StandardScaler()

# Compute the mean and standard deviation of the training set then transform it
x_bc_train_scaled = scaler_linear.fit_transform(x_bc_train)
x_bc_cv_scaled = scaler_linear.transform(x_bc_cv)
x_bc_test_scaled = scaler_linear.transform(x_bc_test)

In [None]:
# Understanding classification model performance evaluation through an example
# Get performance metric by getting the fraction of the data that the model has misclassified.
# Sample model output
probabilities = np.array([0.2, 0.6, 0.7, 0.3, 0.8])

# Apply a threshold to the model output. If greater than 0.5, set to 1. Else 0.
predictions = np.where(probabilities >= 0.5, 1, 0)

# Ground truth labels
ground_truth = np.array([1, 1, 1, 1, 1])

print(f"probabilities: {probabilities}")
print(f"predictions with threshold=0.5: {predictions}")
print(f"targets: {ground_truth}")
print(f"fraction of misclassified data : {np.mean(predictions != ground_truth)}")
print(
    f"fraction of misclassified data without thresholding: {np.mean((probabilities >= 0.5) != ground_truth)}"
)

In [None]:
# Step 3 - Build and train the model
# Initialize lists that will contain the errors for each model
nn_train_error = []
nn_cv_error = []

# Build the models
models_bc = utils.build_models2(random_state=20)

# Loop over each model
for model in models_bc:

    # Setup the loss and optimizer
    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    )

    print(f"Training {model.name}...")

    # Train the model
    model.fit(x_bc_train_scaled, y_bc_train, epochs=200, verbose=0)

    print("Done!\n")

    # Set the threshold for classification
    threshold = 0.5

    # Record the fraction of misclassified examples for the training set
    yhat = tf.math.sigmoid(model.predict(x_bc_train_scaled))
    train_error = np.mean((yhat >= threshold) != y_bc_train)
    nn_train_error.append(train_error)

    # Record the fraction of misclassified examples for the cross validation set
    yhat = tf.math.sigmoid(model.predict(x_bc_cv_scaled))
    cv_error = np.mean((yhat >= threshold) != y_bc_cv)
    nn_cv_error.append(cv_error)

# Print the result
for model_num in range(len(nn_train_error)):
    print(
        f"Model {model_num+1}: Training Set Classification Error: {nn_train_error[model_num]:.5f}, "
        + f"CV Set Classification Error: {nn_cv_error[model_num]:.5f}"
    )

In [None]:
# Select the model with the lowest error
model_num = 3

# Compute the test error
yhat = models_bc[model_num - 1].predict(x_bc_test_scaled)
yhat = tf.math.sigmoid(yhat)
nn_test_error = np.mean((yhat >= threshold) != y_bc_test)

print(f"Selected Model: {model_num}")
print(f"Training Set Classification Error: {nn_train_error[model_num-1]:.4f}")
print(f"CV Set Classification Error: {nn_cv_error[model_num-1]:.4f}")
print(f"Test Set Classification Error: {nn_test_error:.4f}")