In [25]:
import numpy as np
from numpy.linalg import *
from linreg import LinearRegression
from sklearn.linear_model import LinearRegression as sklLinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

## Implementation Essential Functions 

In [46]:
def Train_model(dataPath, alpha, n_iter):

    # load the data
    file = open(dataPath, 'r')
    allData = np.loadtxt(file, delimiter=',')

    X = np.matrix(allData[:, :-1])
    y = np.matrix((allData[:, -1])).T

    n, d = X.shape

    # Standardize
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mean) / std

    # Add a row of ones for the bias term
    X = np.c_[np.ones((n, 1)), X]

    init_theta = np.matrix(np.random.randn((d + 1))).T

    lr_model = LinearRegression(init_theta=init_theta, alpha=alpha, n_iter=n_iter)
    lr_model.fit(X, y)
    return lr_model.theta, lr_model.JHist[-1][0] * 2


def calculate_loss(y, y_predicted):

    # TODO : implement a function to calculate MSE and RMSE
    MSE_loss=(y-y_predicted).T@(y-y_predicted) / len(y)
    RMSE_loss=np.sqrt(MSE_loss)

    return MSE_loss, RMSE_loss


with open("data/multivariateData.dat",'r') as file:
    allData = np.loadtxt(file, delimiter=',')

    X = np.matrix(allData[:,:-1])
    y = np.matrix((allData[:,-1])).T

    n,d = X.shape

    # Standardize
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mean) / std

    # Add a row of ones for the bias term
    X = np.c_[np.ones((n,1)), X]


with open('data/holdout.npz', 'rb') as file:
    allData = np.load(file)['arr_0']
    
    X_train = np.matrix(allData[:, :-1])
    y_train = np.matrix((allData[:, -1])).T
    
    n, d = X_train.shape
    
    # Standardize
    X_train = (X_train - mean) / std
    
    # Add a row of ones for the bias term
    X_train = np.c_[np.ones((n, 1)), X_train]


def model_evaluation(tetha, alpha, n_iter):
    model = LinearRegression(init_theta=tetha, alpha=alpha, n_iter=n_iter)
    model.fit(X,y)
    return calculate_loss(y_train, model.predict(X_train))
model_evaluation(tetha=np.matrix(np.random.randn((2+1))).T, alpha=0.05, n_iter=2000)

(matrix([[2.30600774e+09]]), matrix([[48020.90935979]]))

## Hyper-Parameter Tuning and Loss Calculation

<!-- TODO -->

In [42]:
def grid_search():
    import sys, os
    sys.stdout = open(os.devnull, 'w'); e=float('inf'); setting=None
    for n_iter in np.arange(1020,1040,1):
        for alpha in np.arange(0.074,0.075,0.0001):
            new_e=model_evaluation(alpha=alpha, n_iter=n_iter,
                tetha=np.matrix(np.random.randn((2+1))).T)[0]
            if e>new_e:
                e=new_e
                setting = {'alpha': alpha, 'n_iter': n_iter, 'error': e}
    sys.stdout = sys.__stdout__
    return setting
grid_search()

{'alpha': 0.07470000000000002,
 'n_iter': 1026,
 'error': matrix([[2.30600774e+09]])}

In [28]:
model_evaluation(tetha=np.matrix(np.random.randn((2+1))).T, alpha=0.068, n_iter=1096)

(matrix([[2.30600774e+09]]), matrix([[48020.90935979]]))

In [44]:
with open("data/multivariateData.dat",'r') as file:
    allData = np.loadtxt(file, delimiter=',')

    X = np.array(allData[:,:-1])
    y = np.array((allData[:,-1])).T

with open('data/holdout.npz', 'rb') as file:
    allData = np.load(file)['arr_0']

    X_train = np.array(allData[:,:-1])
    y_train = np.array((allData[:,-1])).T

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
X_train = scaler.transform(X_train)

model = sklLinearRegression()
model.fit(X,y)
model.predict(X_train)
calculate_loss(y_train, model.predict(X_train))


(2306007735.7410774, 48020.90935978907)

In [49]:
(2306007735.7410774-model_evaluation(tetha=np.matrix(np.random.randn((2+1))).T, alpha=0.068, n_iter=1096)[0])/model_evaluation(tetha=np.matrix(np.random.randn((2+1))).T, alpha=0.068, n_iter=1096)[0]*100

matrix([[3.72204686e-13]])