In [None]:
%run algebra.py
%run cache.py
%run costs.py
%run features.py
%run gradients.py
%run helpers.py
%run model.py
%run splits.py

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import csv
import warnings
warnings.filterwarnings('ignore')

In [None]:
SUB_SAMPLE = False
CACHE_DIR = "test/cache/" if SUB_SAMPLE else "cache/"
SUBMISSIONS_DIR = "test/submissions/" if SUB_SAMPLE else "submissions/"

In [None]:
y, x, ids = load_csv_data('data/train.csv', SUB_SAMPLE)

# 1 - Analytical Results

### Ridge Regression with Fixed Degree

##### Without Validation

In [None]:
class RidgeRegression_MSE_FixedDegree_Model(Model):

    def prepare(self, x, y, h):
        
        degree = int(h['degree'])

        x = remove_errors(x)
        x = remove_outliers(x)
        x = standardize_all(x)
        x = remove_nan_features(x)
        x = build_poly(x, degree)
        
        return x, y

    def fit(self, x, y, h):

        lambda_ = float(h['lambda'])
        degree = int(h['degree'])

        return ridge_regression(y, x, lambda_)     
        
    def test(self, x, y, w, h):
                
        return { 'mse': compute_mse(y, x, w) }

In [None]:
myModel = RidgeRegression_MSE_FixedDegree_Model()

hs = { 
    'degree': np.arange(4, 16), 
    'lambda': np.logspace(-8, -2, 7),
}

res = myModel.evaluate(x, y, hs,filename=CACHE_DIR+'RidgeRegression_MSE_FixedDegree')
res_mse = np.vectorize(lambda x: x['mse'])(res)

plot_heatmap(res, hs, 'mse', 'degree', 'lambda')
find_arg_min(res, 'mse')

##### Using Cross-Validation

Here, we implement the same model with cross-validation.

In [None]:
myModel = CrossValidationModel(RidgeRegression_MSE_FixedDegree_Model())

hs = { 
    'degree': np.arange(4, 16), 
    'lambda': np.logspace(-8, -2, 7),
    'k_fold': 4,
    'seed': 0
}

res = myModel.evaluate(x, y, hs, CACHE_DIR+'RidgeRegression_MSE_FixedDegree_CrossValidation')

plot_heatmap(res, hs, 'avg_mse_te', 'degree', 'lambda')
best_h = find_arg_min(res, 'avg_mse_te')
best_h

In [None]:
myModel.predict(best_h, x, y, SUBMISSIONS_DIR + 'RidgeRegression_MSE_FixedDegree_CrossValidation_Model')

## Gradient Descents

#### Least Square

In [None]:
class MSE_Gradient_FixedDegree_Model(Model):
    
    def prepare(self, x, y, h):
        
        degree = int(h['degree'])

        x = remove_errors(x)
        x = remove_outliers(x)
        x = standardize_all(x)
        x = remove_nan_features(x)
        x = build_poly(x, degree)
        
        return x, y

    def compute_gradient(self, y, x, w, h):
    
        e = y - x @ w
        grad = -x.T.dot(e) / len(e)
    
        return grad
    
    def test(self, x, y, w, h):
        
        return { 'mse': compute_mse(y, x, w) }

#### Ridge Regression

In [None]:
class MSE_Gradient_RidgeRegression_FixedDegree_Model(Model):
    
    def prepare(self, x, y, h):
        
        degree = int(h['degree'])

        x = remove_errors(x)
        x = remove_outliers(x)
        x = standardize_all(x)
        x = remove_nan_features(x)
        x = build_poly(x, degree)
        
        return x, y

    def compute_gradient(self, y, x, w, h):
    
        lambda_ = float(h['lambda'])
    
        e = y - x @ w
        grad = (-x.T.dot(e) + (2 * lambda_ * w)) / len(e)
    
        return grad
    
    def test(self, x, y, w, h):

        return { 'mse': compute_mse(y, x, w) }

#### Lasso

In [None]:
class MSE_Gradient_Lasso_FixedDegree_Model(Model):
    
    def prepare(self, x, y, h):
        
        degree = int(h['degree'])

        x = remove_errors(x)
        x = remove_outliers(x)
        x = standardize_all(x)
        x = remove_nan_features(x)
        x = build_poly(x, degree)
        
        return x, y

    def compute_gradient(self, y, x, w, h):
    
        lambda_ = float(h['lambda'])
    
        e = y - x @ w
        
        grad = (-x.T.dot(e) + (lambda_ * np.sign(w))) / len(e)
    
        return grad
    
    def test(self, x, y, w, h):

        return { 'mse': compute_mse(y, x, w) }

In [None]:
myModel = CrossValidationModel(StochasticGradientDescent(MSE_Gradient_Lasso_FixedDegree_Model()))

hs = { 
    'degree': np.arange(4, 8), 
    'lambda': 0.0000001,
    'k_fold': 4,
    'seed': 0,
    'batch_size': 1,
    'max_iters': np.array([1000]),
    'num_batches': 1,
    'gamma': np.array([0.0000001])
}

res = myModel.evaluate(x, y, hs, CACHE_DIR+'MSE_Lasso_Gradient_FixedDegree_CrossValidation')

plot_heatmap(res, hs, 'avg_mse_te', 'degree', 'lambda')
best_h = find_arg_min(res, 'avg_mse_te')
best_h