In [1]:
import numpy as np  # linear algebra
import pandas as pd  #
import matplotlib.pyplot as plt  # Matlab-style plotting
import seaborn as sns
color = sns.color_palette()
sns.set_style('darkgrid')
from datetime import datetime
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn #ignore annoying warning (from sklearn and seaborn)

from sklearn.linear_model import LassoCV, RidgeCV, Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import learning_curve
from sklearn.kernel_ridge import KernelRidge
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn import linear_model

In [2]:
train = pd.read_csv('preprocessed_train.csv')
test = pd.read_csv('preprocessed_test.csv')
X_train = train.drop(columns="SalePrice")
y_train = train["SalePrice"]

In [3]:
kfolds = KFold(n_splits=10, shuffle=True, random_state=42)
scores = ["neg_mean_squared_error","neg_mean_absolute_error","r2"]
def evaluate(model,scoring=scores):
    results = cross_validate(model,X_train,y_train,scoring=scores,cv=kfolds)
    for scorer in scoring:
        for sample in ('train','test'):
            sample_score_mean = np.mean(results['%s_%s' % (sample, scorer)])
            print(sample,"_",scorer,":",sample_score_mean)
from sklearn.model_selection import cross_validate

### 1. Ordinary Least Square Regression

In [6]:
ols = linear_model.LinearRegression()
evaluate(ols)

train _ neg_mean_squared_error : -0.0060405969440286205
test _ neg_mean_squared_error : -40301366.22383566
train _ neg_mean_absolute_error : -0.05655340698676685
test _ neg_mean_absolute_error : -585.2459208985307
train _ r2 : 0.9615862709873031
test _ r2 : -277220278.62092805


### 2. Lasso Regression

In [4]:
lasso = Pipeline([
    ('rs',RobustScaler()),
    ('ls',Lasso(max_iter=1e5,alpha=0.0004378391959798995))
])
evaluate(lasso)

train _ neg_mean_squared_error : -0.007759183950449574
test _ neg_mean_squared_error : -0.009881712865149248
train _ neg_mean_absolute_error : -0.0635066809186571
test _ neg_mean_absolute_error : -0.07058320494146876
train _ r2 : 0.9506528763490613
test _ r2 : 0.9366805640996627


### 3. Ridge Regression

#### 3.1 Linear Ridge

In [7]:
kr_linear = KernelRidge(kernel="linear",alpha=9.115577889447236)
evaluate(kr_linear)

train _ neg_mean_squared_error : -0.007135290333751466
test _ neg_mean_squared_error : -0.010299395355000178
train _ neg_mean_absolute_error : -0.06083003155738396
test _ neg_mean_absolute_error : -0.07159895320228812
train _ r2 : 0.9546244073860496
test _ r2 : 0.9340491575799266


#### 3.2 RBF Kernel Ridge

In [7]:
kr_rbf = make_pipeline(
    RobustScaler(),
    KernelRidge(kernel="rbf",gamma=0.000505050505050505,alpha=0.006122448979591837)
)
evaluate(kr_rbf)

train _ neg_mean_squared_error : -0.004718194401709781
test _ neg_mean_squared_error : -0.013038441067275416
train _ neg_mean_absolute_error : -0.05073873897428318
test _ neg_mean_absolute_error : -0.07897995133527586
train _ r2 : 0.9699900878172654
test _ r2 : 0.9173944829898476


#### 3.3 Polynomial Kernel Ridge

In [9]:
kr_poly = make_pipeline(
    RobustScaler(),
    KernelRidge(kernel="polynomial",degree=2,gamma=0.00202020202020202,alpha=0.15142857142857144)
)
evaluate(kr_poly)

train _ neg_mean_squared_error : -0.006022251875366477
test _ neg_mean_squared_error : -0.010633828235980894
train _ neg_mean_absolute_error : -0.05611683065044746
test _ neg_mean_absolute_error : -0.0725701879002778
train _ r2 : 0.9616945590050113
test _ r2 : 0.9319289407290807


#### 3.4 Sigmoid Kernel Ridge

In [10]:
kr_sigmoid = make_pipeline(
            RobustScaler(),
            KernelRidge(kernel="sigmoid",alpha=0.5858585858585859,gamma=0.0010050251256281408)
)
evaluate(kr_sigmoid)

train _ neg_mean_squared_error : -0.021206648545322785
test _ neg_mean_squared_error : -0.021921074764629924
train _ neg_mean_absolute_error : -0.10319695708392948
test _ neg_mean_absolute_error : -0.1046544956966031
train _ r2 : 0.8651268507558889
test _ r2 : 0.8599174595201658
