In [1]:
# import modules
import numpy as np
import pandas as pd
import time
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error
import gsblr

In [2]:
# import data
X_train = np.loadtxt('data/large_model/X_train.txt', delimiter=',')
y_train = np.loadtxt('data/large_model/y_train.txt', delimiter=',')
X_test = np.loadtxt('data/large_model/X_test.txt', delimiter=',')
y_test = np.loadtxt('data/large_model/y_test.txt', delimiter=',')

### Linear Regression

In [5]:
# begin timer
start = time.time()

# fit linear regression model
linreg_model = LinearRegression().fit(X_train, y_train)

# end timer
linreg_time = time.time() - start

In [6]:
# linear regression coefficients
linreg_coef = linreg_model.coef_

# predict with linreg model
linreg_pred = linreg_model.predict(X_test)

# MSE for linreg model
linreg_mse = mean_squared_error(y_test, linreg_pred)

### Ridge

In [7]:
# begin timer
start = time.time()

# fit ridge regression model with cross validation
ridge_model = RidgeCV().fit(X_train, y_train)

# end timer
ridge_time = time.time() - start

In [8]:
# ridge "alpha" parameter (lambda)
ridge_model.alpha_

1.0

In [9]:
# ridge coefficients
ridge_coef = ridge_model.coef_

# predict with ridge model
ridge_pred = ridge_model.predict(X_test)

# MSE for ridge model
ridge_mse = mean_squared_error(y_test, ridge_pred)

### LASSO

In [10]:
# begin timer
start = time.time()

# fit lasso regression model with cross validation
lasso_model = LassoCV(random_state=141).fit(X_train, y_train)

# end timer
lasso_time = time.time() - start

In [11]:
# lasso "alpha" parameter (lambda)
lasso_model.alpha_ 

19.48740023977807

In [12]:
# lasso coefficients
lasso_coef = lasso_model.coef_

# predict with lasso model
lasso_pred = lasso_model.predict(X_test)

# MSE for lasso model
lasso_mse = mean_squared_error(y_test, lasso_pred)

### Gibbs

#### Iterations: 5000, Burn proportion: .5

In [13]:
# begin timer
start = time.time()

# initialize gibbs sampler
gibbs = gsblr.Gsblr(rseed=141)

# fit gibbs sampler
gibbs.fit(X_train, y_train)

# end timer
gibbs_time = time.time() - start

In [14]:
# gibbs coefficients
gibbs_coef = gibbs.get_coef().values

# predict with gibbs
gibbs_pred = gibbs.predict(X_test)

# MSE for gibbs
gibbs_mse = mean_squared_error(y_test, gibbs_pred)

#### Iterations: 100, Burn proportion: .3

In [3]:
# begin timer
start = time.time()

#initialize gibbs sampler
gibbs_2 = gsblr.Gsblr(rseed=141, burn_prop=0.3)

# fit gibbs_2 sampler
gibbs_2.fit(X_train, y_train, niter= 100)

# end time
gibbs_time_2 = time.time() - start

In [5]:
# predict with gibbs_2
gibbs_2_pred = gibbs_2.predict(X_test)

# MSE for gibbs_2
gibbs_2_mse = mean_squared_error(y_test, gibbs_2_pred)

In [7]:
# results for gibbs_2
pd.Series({'Runtime': gibbs_time_2,
           'MSE': gibbs_2_mse})

Runtime    6.110638e+01
MSE        2.766680e+07
dtype: float64

### Data Summary

In [15]:
# create datafram to summarize results
lrg_results = pd.DataFrame({
    'Method': ['Linreg', 'Ridge', 'LASSO', 'Gibbs'],
    'MSE': [linreg_mse, ridge_mse, lasso_mse, gibbs_mse],
    'runtime': [linreg_time, ridge_time, lasso_time, gibbs_time]
})

In [16]:
# view results
lrg_results

Unnamed: 0,Method,beta1,beta2,beta3,beta4,beta5,beta6,MSE,runtime
0,Linreg,6.124909,-0.001875,-1.65016,-0.117208,-12.022863,0.027118,27914400.0,0.480088
1,Ridge,6.125189,-0.001857,-1.649308,-0.116445,-12.023024,0.027249,27913820.0,0.603109
2,LASSO,6.032212,-0.024099,-1.455686,-0.0,-11.872269,-0.0,27699160.0,5.262211
3,Gibbs,6.125067,-0.00255,-1.650963,-0.117381,-12.022238,0.026746,27914000.0,3162.128275


In [17]:
# save results
lrg_results.to_csv('lrg_model_results.csv')