In [371]:
import numpy as np

In [372]:
def split_data(X_tr, ytr):
    train_split = int(0.8*X_tr.shape[0])
    X, y = X_tr[:train_split], ytr[:train_split]
    X_v, y_v = X_tr[train_split:],ytr[train_split:]
    return X,y,X_v,y_v

In [373]:
def generate_batches(batch_size, X, y):
    y = y.reshape((y.shape[0],1))
    batches = []
    mini_batches = X.shape[0]//batch_size
    data = np.hstack((X,y))
    for i in range(mini_batches + 1):
        mini_batch = data[i*batch_size:(i+1)*batch_size,:]
        X_i = mini_batch[:,:-1]
        y_i = mini_batch[:,-1].reshape((-1,1))
        if y_i.shape[0] != 0:
            batches.append((X_i,y_i))
    return batches
    

In [374]:
def compute_loss(y_pred, y):
    meanSquareDiff = np.sum(np.square(y_pred - y.reshape((-1,1))))
    L = meanSquareDiff/(2*y.shape[0])
    return L

In [375]:
def predict(X, w, b):
        return X.dot(w) + b

In [376]:
def get_final_loss(X,y,w,b):
    y_pred = predict(X,w,b)
    return compute_loss(y_pred, y)

In [377]:
def dL_dw(X, y, y_pred, w, alpha):
    n = X.shape[0]
    return -1*(((X.T).dot(y - y_pred)) + (alpha*w))/n

In [378]:
def dL_db(y, y_pred):
    n = y.shape[0]
    return -1*(np.sum(y-y_pred))/n

In [379]:
def update_weights(X, y, w, b, e):
    y_pred = predict(X,w,b)
    w = w.reshape((-1,1))
    dw = dL_dw(X, y, y_pred, w, alpha)
    db = dL_db(y, y_pred)
    w = w - e*dw
    b = b - e*db
    return w,b

In [380]:
X_tr = np.reshape(np.load("age_regression_Xtr.npy"), (-1, 48*48))
ytr = np.load("age_regression_ytr.npy")
X_te = np.reshape(np.load("age_regression_Xte.npy"), (-1, 48*48))
yte = np.load("age_regression_yte.npy")

In [381]:
n,m = X_tr.shape

In [382]:
hyperparams = [[1000,100,0.01,0.001],[2000,64,0.05,0.003],[3000,30,0.1,0.005],[5000,10,0,0.002]]

In [383]:
X_train,y_train,X_valid,y_valid = split_data(X_tr,ytr)

In [384]:
train_loss = []
validation_loss = []
test_loss = []
weights =[]

In [386]:
for params in hyperparams:
    epochs = params[0]
    batch_size = params[1]
    alpha = params[2]
    e = params[3]
    w = np.zeros((m,1))
    b = 0
    loss = []
    for i in range(epochs):
        batches = generate_batches(batch_size,X_train,y_train)
        for batch in batches:
            X_b, y_b = batch
            y_pred = predict(X_b,w,b)
            dw = dL_dw(X_b, y_b, y_pred, w, alpha)
            db = dL_db(y_b, y_pred)
            w,b = update_weights(X_b,y_b,w,b,e)
            loss.append(compute_loss(y_pred,y_b))
        if i%100 == 0:
            print(i,params,loss[-1])
        train_loss.append(loss)
    weights.append((w,b))
    validation_loss.append(get_final_loss(X_valid, y_valid, w, b))
    test_loss.append(get_final_loss(X_te,yte,w,b))

0 [1000, 100, 0.01, 0.001] 96.23941036530834
100 [1000, 100, 0.01, 0.001] 70.68332233936609
200 [1000, 100, 0.01, 0.001] 65.00008661308684
300 [1000, 100, 0.01, 0.001] 61.64376183654303
400 [1000, 100, 0.01, 0.001] 59.25693858768546
500 [1000, 100, 0.01, 0.001] 57.46095757949947
600 [1000, 100, 0.01, 0.001] 56.07762682404146
700 [1000, 100, 0.01, 0.001] 54.99661959342083
800 [1000, 100, 0.01, 0.001] 54.14150148071346
900 [1000, 100, 0.01, 0.001] 53.456993774164424
0 [2000, 64, 0.05, 0.003] 247.6275497754986
100 [2000, 64, 0.05, 0.003] 77.12876547652189
200 [2000, 64, 0.05, 0.003] 65.83134735869501
300 [2000, 64, 0.05, 0.003] 60.34944599284981
400 [2000, 64, 0.05, 0.003] 56.404398869003074
500 [2000, 64, 0.05, 0.003] 53.34070166519528
600 [2000, 64, 0.05, 0.003] 50.95400190305258
700 [2000, 64, 0.05, 0.003] 49.02798845406698
800 [2000, 64, 0.05, 0.003] 47.39433780280619
900 [2000, 64, 0.05, 0.003] 45.946425526983845
1000 [2000, 64, 0.05, 0.003] 44.62327128807233
1100 [2000, 64, 0.05, 0.

  meanSquareDiff = np.sum(np.square(y_pred - y.reshape((-1,1))))


100 [3000, 30, 0.1, 0.005] nan
200 [3000, 30, 0.1, 0.005] nan
300 [3000, 30, 0.1, 0.005] nan
400 [3000, 30, 0.1, 0.005] nan
500 [3000, 30, 0.1, 0.005] nan
600 [3000, 30, 0.1, 0.005] nan
700 [3000, 30, 0.1, 0.005] nan
800 [3000, 30, 0.1, 0.005] nan
900 [3000, 30, 0.1, 0.005] nan
1000 [3000, 30, 0.1, 0.005] nan
1100 [3000, 30, 0.1, 0.005] nan
1200 [3000, 30, 0.1, 0.005] nan
1300 [3000, 30, 0.1, 0.005] nan
1400 [3000, 30, 0.1, 0.005] nan
1500 [3000, 30, 0.1, 0.005] nan
1600 [3000, 30, 0.1, 0.005] nan
1700 [3000, 30, 0.1, 0.005] nan
1800 [3000, 30, 0.1, 0.005] nan
1900 [3000, 30, 0.1, 0.005] nan
2000 [3000, 30, 0.1, 0.005] nan
2100 [3000, 30, 0.1, 0.005] nan
2200 [3000, 30, 0.1, 0.005] nan
2300 [3000, 30, 0.1, 0.005] nan
2400 [3000, 30, 0.1, 0.005] nan
2500 [3000, 30, 0.1, 0.005] nan
2600 [3000, 30, 0.1, 0.005] nan
2700 [3000, 30, 0.1, 0.005] nan
2800 [3000, 30, 0.1, 0.005] nan
2900 [3000, 30, 0.1, 0.005] nan
0 [5000, 10, 0, 0.002] 91.87427316033788
100 [5000, 10, 0, 0.002] 49.449088351242

In [387]:
validation_loss

[82.40408920637144,
 82.40408920637144,
 96.08023960503395,
 nan,
 145.17170621853117]

In [388]:
test_loss

[86.62227004538484, 101.19339626349785, nan, 161.94303406487404]