In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_log_error as mse
from sklearn.metrics import make_scorer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
import pickle

sns.set(style="darkgrid", context="notebook")
rand_seed = 1009
np.random.seed(rand_seed)
xsize, ysize = 12.0, 8.0

In [2]:
X_train = pd.read_csv("../data/X_train.csv", index_col=False)
X_test = pd.read_csv("../data/X_test.csv", index_col=False)
y_train = np.genfromtxt("../data/y_train.txt")
train_id = np.genfromtxt("../data/train_id.txt").astype(int)
test_id = np.genfromtxt("../data/test_id.txt").astype(int)

In [3]:
def log_rmse(y_true, y_pred):
    y_pred[y_pred < 0.0] = 0.0
    return np.sqrt(mse(y_true, y_pred))

log_rmse_scorer = make_scorer(log_rmse, greater_is_better=False)

In [4]:
lin_reg = LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=None)
lin_reg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [5]:
y_pred = lin_reg.predict(X_train)
print("RMSE: "+str("%.5f"%log_rmse(y_train, y_pred)))

RMSE: 0.11098


In [6]:
file = open("../models/linear_regression.pkl", "wb")
file.write(pickle.dumps(lin_reg))
file.close()

In [7]:
ridge_reg = RidgeCV(alphas=np.geomspace(1e-4, 1e4, 500), fit_intercept=True, normalize=False, 
                    scoring=log_rmse_scorer, cv=4, gcv_mode=None, store_cv_values=False)
ridge_reg.fit(X_train, y_train)

RidgeCV(alphas=array([1.00000e-04, 1.03761e-04, ..., 9.63758e+03, 1.00000e+04]),
    cv=4, fit_intercept=True, gcv_mode=None, normalize=False,
    scoring=make_scorer(log_rmse, greater_is_better=False),
    store_cv_values=False)

In [8]:
ridge_reg.alpha_

22.63129568399525

In [9]:
y_pred = ridge_reg.predict(X_train)
print("RMSE: "+str("%.5f"%log_rmse(y_train, y_pred)))

RMSE: 0.12754


In [10]:
file = open("../models/ridge_regression.pkl", "wb")
file.write(pickle.dumps(ridge_reg))
file.close()