In [None]:
import os
import numpy as np 
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import RidgeCV, Ridge, Lasso, SGDRegressor
from sklearn.metrics import mean_squared_log_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR

In [None]:
data_dir = 'input_data'
seed=0
scoring='neg_mean_squared_error'

In [None]:
train_df = pd.read_csv(os.path.join(data_dir, 'train_preprocessed.csv'))
test_df = pd.read_csv(os.path.join(data_dir, 'test.csv'), index_col='ID')

In [None]:
X = train_df.iloc[:, 2:]
y = train_df.iloc[:, 1]


In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, random_state=seed)

In [None]:
models = []
models.append(('RCV', Ridge()))
models.append(('SVM', SVR()))
models.append(('RF', RandomForestRegressor()))
models.append(('GBM', AdaBoostRegressor()))
models.append(('GBR', GradientBoostingRegressor()))
models.append(('SGD', SGDRegressor()))
models.append(('LSO', Lasso()))

In [None]:
results = []
names = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)