In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from string import ascii_letters
from sklearn import linear_model
import statsmodels.api as sm
from scipy import stats
%matplotlib inline
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

  from pandas.core import datetools


In [2]:
def run_lm(x_train = None, y_train = None, x_test = None, y_test = None):
    
    np.random.seed(1337)
    print("Starting linear regression model.")
    lm = linear_model.LinearRegression()
    model = lm.fit(x_train,y_train)
    
    x_train2 = sm.add_constant(x_train)
    est = sm.OLS(y_train, x_train2)
    est2 = est.fit()
    print(est2.summary())
    
    y_train_pred = lm.predict(x_train)
    train_error = mean_absolute_error(y_train, y_train_pred)
    print("The mean absolute error of linear regression on training set is: ", train_error)
    
    y_test_pred = lm.predict(x_test)
    test_error = mean_absolute_error(y_test, y_test_pred)
    test_error_relative = test_error/np.mean(y_train)
    r_squared = lm.score(x_test, y_test)
    print("The mean absolute error of linear regression test set is: ", test_error)
    return [train_error, test_error, r_squared, test_error_relative]

In [3]:
def run_lm_quick(x_train = None, y_train = None, x_test = None, y_test = None):
    
    np.random.seed(1337)
    #print("Starting linear regression model.")
    lm = linear_model.LinearRegression()
    model = lm.fit(x_train,y_train)
    
    #x_train2 = sm.add_constant(x_train)
    #est = sm.OLS(y_train, x_train2)
    #est2 = est.fit()
    #print(est2.summary())
    
    y_train_pred = lm.predict(x_train)
    train_error = mean_absolute_error(y_train, y_train_pred)
    #print("The mean absolute error of linear regression on training set is: ", train_error)
    
    y_test_pred = lm.predict(x_test)
    test_error = mean_absolute_error(y_test, y_test_pred)
    test_error_relative = test_error/np.mean(y_train)
    r_squared = lm.score(x_test, y_test)
    #print("The mean absolute error of linear regression test set is: ", test_error)
    return [train_error, test_error, r_squared, test_error_relative]

In [9]:
from sklearn.linear_model import Ridge
def run_ridge(x_train, y_train, x_test, y_test, alpha=0.5):
    
    np.random.seed(1337)
    #Fit the model
    ridgereg = Ridge(alpha = alpha, normalize = True)
    ridgereg.fit(x_train, y_train)
    
    y_train_pred = ridgereg.predict(x_train)
    train_error = mean_absolute_error(y_train, y_train_pred)
    #print("The mean absolute error of ridge regression on training set is: ", train_error)
    
    y_test_pred = ridgereg.predict(x_test)
    test_error = mean_absolute_error(y_test, y_test_pred)
    r_squared = ridgereg.score(x_test, y_test)
    #print("The mean absolute error of ridge regression test set is: ", test_error)    
    
    return [train_error, test_error, r_squared]

In [10]:
from sklearn.linear_model import Lasso
def run_lasso(x_train, y_train, x_test, y_test, alpha=0.5):
    
    np.random.seed(1337)
    #Fit the model
    lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=1e5)
    lassoreg.fit(x_train, y_train)
    
    y_train_pred = lassoreg.predict(x_train)
    train_error = mean_absolute_error(y_train, y_train_pred)
    #print("The mean absolute error of lasso regression on training set is: ", train_error)
    
    y_test_pred = lassoreg.predict(x_test)
    test_error = mean_absolute_error(y_test, y_test_pred)
    r_squared = lassoreg.score(x_test, y_test)
    #print("The mean absolute error of ridge regression test set is: ", test_error)    
    
    return [train_error, test_error, r_squared]

In [6]:
def run_ridge_2(x_train, y_train, x_test, y_test, alpha):
    
    #Fit the model
    ridgereg = Ridge(alpha = alpha, normalize = True)
    ridgereg.fit(x_train, y_train)
    
    y_train_pred = ridgereg.predict(x_train)
    train_error = mean_absolute_error(y_train, y_train_pred)
    print("The mean absolute error of ridge regression on training set is: ", train_error)
    
    y_test_pred = ridgereg.predict(x_test)
    test_error = mean_absolute_error(y_test, y_test_pred)
    print("The mean absolute error of ridge regression test set is: ", test_error)  
        
    #Return the result in pre-defined format
    rss = sum((y_test_pred - y_test)**2)
    ret = [rss]
    ret.extend([ridgereg.intercept_])
    ret.extend(ridgereg.coef_)
    return ret