In [1]:
import numpy as np
import math

hyperparameters = {
    number_of_epochs: 20, 
    batch_size: 100, 
    learning_rate: .01, 
    regularized_rate: .01
    }

preferences = {
    "epochs": [50, 100, 150, 200, 250, 300, 350, 400, 450, 500],
    "batch": [50, 75, 100, 125, 150, 200, 250, 300],
    "learning_rate": [.1, .01, .001, .0001, .5, .05, .005, .0005],
    "regularized_rate": [.1, .01, .001, .0001, .5, .05, .005, .0005]
}

In [3]:
def linear_regression (x, y):
    X_t = x.T
    X_y = X_t.dot(y) # x is a column vector so it needs to be transformed
    X_XT = X_t.dot(x)
    w = np.linalg.solve(X_XT, X_y)
    return w

In [None]:
def model(x, w, b):
    y_hat = x.dot(w) + b
    return y_hat

In [4]:
def mean_square_error(y_hat, y):
    n = len(y)
    mse = (1/(2 * n)) * np.sum(np.square(y_hat-y))
    return mse

In [None]:
def l2(w, alpha):
    wt_w = (w).dot(w.T) # (w^T)w
    reg = (1/(2 * n) * alpha) * wt_w # (alpha/2n) * ((w^T)w) 
    return reg

In [5]:
def regularized_mean_square_error(y, y_hat, reg):
    n = len(y)
    mse = mean_square_error(y_hat, y)
    return mse + reg

In [None]:
#Train/Test Split performed by randomly taking inputs and their associated labels and assigning them to either a training group or a test group
#---------------------------------------------------#----------------#
#                                                   |                |
#                  Training set                     |   Testing Set  |
#                                                   |                |
#---------------------------------------------------#----------------#
#train_perc should be a value between 0 and 1, eg. train_perc 0.8
def random_train_test_split(dataset, train_perc = 0.8):
    perc_of_dataset = dataset.shape[1] * train_perc
    numpy.random.shuffle(dataset)

    train = dataset_arrx[:perc_of_dataset,:] 
    test = dataset_arr[perc_of_dataset:,:]

    return train, test

In [6]:
#Validation/Test Split performed by randomly taking inputs and their associated labels and assigning them to either a validation group or a test group
#---------------------------------------------------#----------------#
#                                                   |                |
#                  Training set                     |   Testing Set  |
#                                                   |                |
#---------------------------------------------------#----------------#
#                                                             V
#                                                |------------#---------#
#                                                |            |         |
#                                                | Validation | Testing |
#                                                |     Set    |   Set   |
#                                                |------------#---------#
#train_perc should be a value between 0 and 1, eg. train_perc 0.5 for a 50/50 split of the test set 
def random_test_validation_split(parameter, labels, train_perc = 0.8):
    dataset_row_size, dataset_col_size = dataset.shape
    validation_set_size = dataset_row_size * train_perc
    test_set_size = dataset_row_size - validation_set_size

    indices = numpy.random.permutation(parameter.shape[0])
    validation_p_index = indices[:validation_set_size]
    test_p_index = indices[test_set_size:]

    validation_l, test_l = labels[validation_p_idx,:], labels[test_p_idx,:]
    validation_p, test_p = parameter[validation_p_idx,:], parameter[test_p_idx,:]

    return validation_p, validation_l, test_p, test_l

In [None]:
#X is the input 
#y is the real result
#w is the weight 
#epsilon is the learning rate
#alpha is the regularized term
#reg_bool if you wish to use the regularized term
def gradient_descent(x, y, b, w, epsilon, alpha, reg_bool):
    X_t = x.t
    n = len(y)
    y_hat = model(x, w, b)

    #taking derivative 
    derivative_of_w = (1/n) * np.sum(x.dot(y_hat - y))
    derivative_of_b = (1/n) * np.sum(y_hat - y)

    # if regularized term is wanted then add that to the derivative with respect to w
    if reg_bool == true:
        derivative_of_w += ((alpha/n) * w)

    #this is gradient descent
    w_new = w - (epsilon * derivative_of_w)
    b_new = b - (epsilon * derivative_of_b)

    return w_new, b_new

In [None]:
def stochastic_gradient_descent(x, y, w, learning_rate: float, num_of_epochs: int, size_of_batch: int, alpha: float, epsilon: float, n: int):

    past_cost = 10000000
    epochs_since_improved = 0
    current_cost = 0 
    for e in range(num_of_epochs - 1):
        #TODO shuffle set
        for b in range((math.ceil(n/size_of_batch)) - 1):
            #TODO draw from batch iterator
            y_hat = model(x, w, b)
            current cost = regularized_mean_square_error(y, y_hat, reg)

            if (past_cost < current_cost):
                epoch_since_improved++

                if epoch_since_improved >= 3:
                    lr /= 10
                
            else:
                epoch_since_improved = 0 
                past_cost = current_cost

            w, b = gradient_descent(x, y, b, w, epsilon, alpha, reg_bool)
    


In [None]:
def load_data():
    #training set
    x_tr = np.reshape(np.load("age_regression_Xtr.npy"), (-1, 48*48))
    y_tr = np.load("age_regression_ytr.npy")

    #testing set
    x_te = np.reshape(np.load("age_regression_Xte.npy"), (-1, 48*48))
    y_te = np.load("age_regression_yte.npy")

    return x_tr, y_tr, x_te, y_te

In [7]:
#Function Arguments:
#mse_bool is the boolean value that determines if mse will be performed without or with regularization, default is True
#alpha is the value for regularization, if used, default is 0.0
#ttv_val is the value associated with the type of split wanted. A Train/Test Split is 0, and a train/validation/split is 1, No Split Needed is 2. Default is 2
#learning rate is the hyperparameter associated with the gradient descent
def train_age_regressor(mse_bool: bool = True, alpha: float = 0.0, ttv_val: int = 2, learning_rate: float, num_of_epochs: int):
    # Load data
    x_tr, y_tr, x_te, y_te = load_data()

    if ttv_val == 0:
        print("I made this before I realized train/test split is already given to you")

    elif ttv_val == 1:
        x_val, y_val, x_te, y_te = random_test_validation_split(x_te, y_te, train_perc = 0.8)

    #set intial w value 
    w = linear_regression(X_tr, ytr)

    #set initial bias value 
    b = np.zeros(())

    w_new, b_new = stochastic_gradient_descent(learning_rate = 0.01, num_of_epochs = 20, size_of_batch = 100, alpha= .01, n = len(y))
    

    if mse_bool == True:
        train = mean_square_error(ytr, X_tr, w, b)
        test = mean_square_error(yte, X_te, w, b)
    else:
        train = regularized_mean_square_error(ytr, X_tr, w, b_val, alpha)
        test = regularized_mean_square_error(yte, X_te, w, b_val, alpha)


    return train, test


    # Report fMSE cost on the training and testing data (separately)
    # ...

NameError: name 'boolean' is not defined

In [18]:
print(train_age_regressor(mse_bool=False, b_val=2, alpha=1.0))

(94.98633435331956, 358.60415394117115)
