In [15]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.linear_model import LinearRegression,Lasso, ElasticNet, BayesianRidge
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, KFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.kernel_ridge import KernelRidge
import xgboost as xgb
import lightgbm as lgb



path = 'out/train_final.csv'

In [2]:
def load_data(path):
    df = pd.read_csv(path,index_col=0)
    return df

In [3]:
def split_data(df):
    y = df.Item_Outlet_Sales
    X = df.drop('Item_Outlet_Sales',axis=1)
    
    return X, y

In [4]:
def cross_val_testing(model, X, y, cv=5):
    kf = KFold(cv, shuffle=True, random_state=42).get_n_splits(X)
    rmse = np.sqrt(-cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=kf))
    return rmse

In [5]:
df = load_data(path)
X, y = split_data(df)

In [6]:
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))

In [10]:
cross_val_testing(lasso, X, y)

array([ 0.593875  ,  0.59329215,  0.58312327,  0.58504512,  0.58755474])

In [11]:
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))

In [13]:
cross_val_testing(ENet, X, y)



array([ 0.59387192,  0.59334367,  0.58314001,  0.58511978,  0.58759283])

In [16]:
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)

In [17]:
cross_val_testing(KRR, X, y)

array([ 0.57642886,  0.57763149,  0.5671415 ,  0.56866954,  0.56782577])