In [1]:
# Problem 3

import numpy as np
from sklearn.model_selection import train_test_split

# Calculate MSE given y_pred and y
def MSE(y_pred, y):
	a = y.shape[0]
	return (1/(2 * a)) * np.sum(np.square(y_pred - y))

# Calculate the MSE given the prediction parameters and actual y value
def calculate_error(x, w, b, y):
	y_pred = x.dot(w) + b
	return MSE(y_pred, y)

# Loop through the samples based on the batch_size hyperparameter
def batch_loop(x, y, size):
	if(len(x) == len(y)):
		random_x = x[np.random.permutation(x.shape[0])]
		random_y = y[np.random.permutation(y.shape[0])]
		for i in np.arange(0, y.shape[0], size):
			yield random_x[i:i + size], random_y[i:i + size]

# Perform gradient_descent using the prediction parameters, actual y, learning rate, and regularized term
def gradient_descent(x, y, b, w, learn_rate, alpha = 0.1):
	a = y.shape[0]
	y_pred = x.dot(w) + b

	# Store the derivatives of w and b
	d_of_w = (1/a) * x.T.dot(y_pred - y)
	d_of_b = (1/a) * np.sum(y_pred - y)
	# Add the regularized term to the derivative of w
	d_of_w += ((alpha/a) * w)
	# Update parameters with gradient descent
	w = w - (learn_rate * d_of_w)
	b = b - (learn_rate * d_of_b)

	return w, b

# Perform SGD
def stochastic_gradient_descent(x, y, b, w, learn_rate: float, num_of_epoch: int, batch_size: int, dataset_size: int, alpha):
	
	for n in range(num_of_epoch - 1):
		for mini_batch_x, mini_batch_y in batch_loop(x, y, batch_size):
			w, b = gradient_descent(mini_batch_x, mini_batch_y, b, w, learn_rate, alpha)
		
	return w, b 

# Find the lowest error by performing SGD
def find_lowest_error(x, y, learn_rate, num_of_epoch, batch_size, alpha):
	dataset_size = len(y)
	m = np.expand_dims(a=y, axis=-1)
	w = np.random.rand(x.shape[1]) * np.sqrt(1/(x.shape[1] + m.shape[1]))
	b = np.random.rand(m.shape[1])
	w_trained, b_trained = stochastic_gradient_descent(x, y, b, w, learn_rate, num_of_epoch, batch_size, dataset_size, alpha)
	return w_trained, b_trained

# Implementation of grid_search to tune our hyperparameters by looping through the various values we have for each
def grid_search():
	hyperparameters = {
		"learn_rate": [0.1, 0.01, 0.001, 0.0001],
		"num_of_epoch": [5, 10, 15, 20],
		"batch_size": [10, 20, 50, 100],
		"alpha": [0.75, 0.5, 0.25, 0.1]	
	}
	for a in range(len(hyperparameters["num_of_epoch"])):
		for b in range(len(hyperparameters["batch_size"])):
			for c in range(len(hyperparameters["learn_rate"])):
				for d in range(len(hyperparameters["alpha"])):
						yield hyperparameters["num_of_epoch"][a], hyperparameters["batch_size"][b], hyperparameters["learn_rate"][c], hyperparameters["alpha"][d]

# This function will train the age regressor based on various hyperparameters provided in the grid_search() function
def train_age_regressor():
	
	# Load data
	starting_x_tr = np.reshape(np.load("age_regression_Xtr.npy"), (-1, 48*48))
	starting_y_tr = np.load("age_regression_ytr.npy")
	x_te = np.reshape(np.load("age_regression_Xte.npy"), (-1, 48*48))
	y_te = np.load("age_regression_yte.npy")
	x_tr, x_val, y_tr, y_val = train_test_split(starting_x_tr, starting_y_tr, train_size=0.8)

	# Initialize best hyperparameter values as the worst they could be
	best_error = 1000000
	best_num_of_epoch = -1
	best_batch_size = -1
	best_learn_rate = -1
	best_alpha = -1

	# Loop through each combination of the hyperparameters in grid_search() to find the best combination to minimize MSE
	for num_of_epoch, batch_size, learn_rate, alpha in grid_search():

		w_trained, b_trained = find_lowest_error(x_tr, y_tr, learn_rate, num_of_epoch, batch_size, alpha)

		# Calculate the MSE from the validation dataset
		error = calculate_error(x_val, w_trained, b_trained, y_val)
		print("train/validation unregularized MSE: ", error)
		print(x_val.shape)

		# Store the hyperparameters that led to reduced error in the following variables
		if error < best_error:
			best_error = error
			best_learn_rate = learn_rate
			best_num_of_epoch = num_of_epoch
			best_batch_size = batch_size
			best_alpha = alpha

	# Finally, calculate the error using the trained weights and biases
	error = calculate_error(x_te, w_trained, b_trained, y_te)
	print("\n")
	print("Results of training:")
	print("best error from validation dataset: ", best_error)
	print("best learning rate: ", best_learn_rate)
	print("best number of epochs: ", best_num_of_epoch)
	print("best batch size: ", best_batch_size)
	print("best reg term: ", best_alpha)
	print("unregularized MSE from test dataset: ", error)

	return w_trained, b_trained

def main():

	print("Problem 3 Output:")
	w_output, b_output = train_age_regressor()

if __name__ == '__main__':
	main()

Problem 3 Output:


FileNotFoundError: [Errno 2] No such file or directory: 'age_regression_Xtr.npy'