In [1]:
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

## Data Processing

In [2]:
df = pd.read_csv('./trips_BGMM.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262800 entries, 0 to 262799
Data columns (total 27 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   Cluster              262800 non-null  int64  
 1   Year                 262800 non-null  int64  
 2   Month                262800 non-null  int64  
 3   Day                  262800 non-null  int64  
 4   Time                 262800 non-null  object 
 5   Weekend              262800 non-null  int64  
 6   Holiday              262800 non-null  int64  
 7   Checkin per Hour     262800 non-null  float64
 8   Checkout per Hour    262800 non-null  float64
 9   Temp (°C)            262800 non-null  float64
 10  Dew Point Temp (°C)  262800 non-null  float64
 11  Rel Hum (%)          262800 non-null  float64
 12  Wind Spd (km/h)      262800 non-null  float64
 13  Visibility (km)      262800 non-null  float64
 14  Stn Press (kPa)      262800 non-null  float64
 15  Blowing Snow     

In [4]:
# get df per cluster
res = pd.DataFrame(columns=['Cluster'])
dfs = {}
for c in range(df['Cluster'].max()+1):
    # cluster c
    res.loc[c, 'Cluster'] = c
    df_cur = df[df['Cluster'] == c].drop(columns=['Cluster', 'Year', 'Month', 'Day'])
    # get dummies for time
    dummies = pd.get_dummies(df_cur['Time'], prefix='Time')
    df_cur.drop(columns=['Time'], inplace=True)
    df_cur = pd.concat([df_cur, dummies], axis=1)
    dfs[c] = df_cur

## Ridge Regression 

In [5]:
from sklearn.linear_model import Ridge

In [6]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('ridge', Ridge())])

In [7]:
# set grid search
params = {'ridge__alpha': [0, 1, 5, 10, 50, 100, 500, 1000]}
ridge_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [8]:
res['Ridge Regression'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    ridge_search.fit(X_train, Y_train)
    print('Best params: {}'.format(ridge_search.best_params_))
    # predict
    Y_pred = ridge_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = ridge_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'Ridge Regression'] = r2
    print()

For cluster 0:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    9.0s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    9.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 58.29027628943926
R^2: 0.3477972435907811

For cluster 1:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 60.98337538366479
R^2: 0.20373643564951682

For cluster 2:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 10}
MSE: 10.38059707744449
R^2: 0.40824622777553055

For cluster 3:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 1000}
MSE: 1.4281245051648237
R^2: 0.022912894213363066

For cluster 4:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 637.0526634183014
R^2: 0.48409622474495173

For cluster 5:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 32.775983048689454
R^2: 0.2922902245683986

For cluster 6:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 32.63747732376697
R^2: 0.3557573377982688

For cluster 7:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 500}
MSE: 11.644776339953024
R^2: 0.16806932710871403

For cluster 8:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 50}
MSE: 45.29059729758992
R^2: 0.40034291750271855

For cluster 9:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 41.67811735611069
R^2: 0.4929070713315782

For cluster 10:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 500}
MSE: 1.0074745029903598
R^2: 0.01216660512438883

For cluster 11:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 3.0286230358172865
R^2: 0.32033815609349625

For cluster 12:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 38.47800578294703
R^2: 0.3819609056942812

For cluster 13:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 26.33486881482093
R^2: 0.43174932652902964

For cluster 14:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 500}
MSE: 21.556940651674445
R^2: 0.12346015400101795

For cluster 15:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 5.249908365279252
R^2: 0.3258055455855827

For cluster 16:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 28.753823912127956
R^2: 0.17050133664286649

For cluster 17:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 50}
MSE: 59.33119737551475
R^2: 0.46348385859069197

For cluster 18:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 50}
MSE: 59.47201620650574
R^2: 0.4993819687678803

For cluster 19:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 16.009043583301146
R^2: 0.40612861860866045

For cluster 20:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 2.6339428176619286
R^2: 0.27181108545518995

For cluster 21:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 500}
MSE: 0.3761063490135959
R^2: 0.07569778013843909

For cluster 22:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 49.52809929465605
R^2: 0.1368243023878405

For cluster 23:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 67.17068119495768
R^2: 0.2979408298354893

For cluster 24:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 7.614472950561307
R^2: 0.3806321048388911

For cluster 25:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 5.032495726992568
R^2: 0.2580329857677739

For cluster 26:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 52.388685999549
R^2: 0.20822489641825215

For cluster 27:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 30.8249112186073
R^2: 0.3274020524290112

For cluster 28:
Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'ridge__alpha': 100}
MSE: 21.98757596961035
R^2: 0.16462982441565222

For cluster 29:
Fitting 3 folds for each of 8 candidates, totalling 24 fits
Best params: {'ridge__alpha': 1000}
MSE: 2.6873664773208845
R^2: 0.005124585942789084



[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.4s finished


In [9]:
# save csv
res.to_csv('./pred_res.csv', index=False)

## LASSO Regression

In [10]:
from sklearn.linear_model import Lasso

In [11]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('lasso', Lasso())])

In [12]:
# set grid search
params = {'lasso__alpha': [1, 5, 10, 50, 100, 500, 1000]}
lasso_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [13]:
res['LASSO Regression'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    lasso_search.fit(X_train, Y_train)
    print('Best params: {}'.format(lasso_search.best_params_))
    # predict
    Y_pred = lasso_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = lasso_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'LASSO Regression'] = r2
    print()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


For cluster 0:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 65.1725775137879
R^2: 0.27079201879701986

For cluster 1:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 64.54256553327122
R^2: 0.15726387789263874

For cluster 2:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 15.470393598712498
R^2: 0.11809853503252288

For cluster 3:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 1.4618179354058507
R^2: -0.00013930895181513137

For cluster 4:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 645.7070175758228
R^2: 0.4770876770398025

For cluster 5:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 37.55999031559709
R^2: 0.18899237066431174

For cluster 6:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 37.29862341120476
R^2: 0.2637493331815838

For cluster 7:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 13.999350790876962
R^2: -0.00014710317247801363

For cluster 8:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 51.57694360261877
R^2: 0.3171103635120527

For cluster 9:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished


Best params: {'lasso__alpha': 1}
MSE: 48.7079605805406
R^2: 0.40737576581944546

For cluster 10:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 1.0198870078032152
R^2: -3.913069171801453e-06

For cluster 11:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 4.456668752508548
R^2: -0.0001336139190601937

For cluster 12:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 48.08685666506499
R^2: 0.22762220295582414

For cluster 13:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 32.49421900833649
R^2: 0.2988435991369499

For cluster 14:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 23.894808065670116
R^2: 0.02839870830971447

For cluster 15:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 7.547651935954023
R^2: 0.030728781339448896

For cluster 16:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 34.133883951116275
R^2: 0.015295802076049636

For cluster 17:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 70.83885384853258
R^2: 0.3594232004433253

For cluster 18:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 68.89844473094833
R^2: 0.4200330515717533

For cluster 19:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 20.083898917161047
R^2: 0.25496781043811745

For cluster 20:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 3.6176893726152506
R^2: -0.0001588795854865488

For cluster 21:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.5s finished


Best params: {'lasso__alpha': 1}
MSE: 0.40702294654120014
R^2: -0.00028147360262975596

For cluster 22:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 54.66749149648848
R^2: 0.047254976847462715

For cluster 23:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 73.12281414491719
R^2: 0.23572991511469144

For cluster 24:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 11.355714030704675
R^2: 0.07631628046816352

For cluster 25:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 6.782704632409666
R^2: -9.414322056144897e-06

For cluster 26:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 57.261990467713645
R^2: 0.13457233047873474

For cluster 27:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 37.844848313530555
R^2: 0.17422739286104905

For cluster 28:
Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'lasso__alpha': 1}
MSE: 26.20611916094877
R^2: 0.00435544167654478

For cluster 29:
Fitting 3 folds for each of 7 candidates, totalling 21 fits
Best params: {'lasso__alpha': 1}
MSE: 2.703674925981527
R^2: -0.0009128766626320672



[Parallel(n_jobs=-1)]: Done  17 out of  21 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.3s finished


In [14]:
# save csv
res.to_csv('./pred_res.csv', index=False)

## AdaBoost Regression

In [15]:
from sklearn.ensemble import AdaBoostRegressor

In [16]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('ada', AdaBoostRegressor())])

In [17]:
# set grid search
params = {'ada__n_estimators': [10, 50, 100],
          'ada__learning_rate' : [0.01, 0.05, 0.1, 0.5],
          'ada__loss' : ['linear', 'square', 'exponential']}
ada_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [18]:
res['AdaBoost Regression'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    ada_search.fit(X_train, Y_train)
    print('Best params: {}'.format(ada_search.best_params_))
    # predict
    Y_pred = ada_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = ada_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'AdaBoost Regression'] = r2
    print()

For cluster 0:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   30.9s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 100}
MSE: 47.796395972847264
R^2: 0.4652119841544613

For cluster 1:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   28.4s finished


Best params: {'ada__learning_rate': 0.01, 'ada__loss': 'exponential', 'ada__n_estimators': 50}
MSE: 49.89889009189856
R^2: 0.34846722025899746

For cluster 2:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.2s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 50}
MSE: 10.325503651912086
R^2: 0.41138687008544217

For cluster 3:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   29.1s finished


Best params: {'ada__learning_rate': 0.01, 'ada__loss': 'exponential', 'ada__n_estimators': 10}
MSE: 1.4193373996846006
R^2: 0.028924812243522724

For cluster 4:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   27.7s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 10}
MSE: 460.92882209739497
R^2: 0.6267264339372713

For cluster 5:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   28.1s finished


Best params: {'ada__learning_rate': 0.1, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 30.67338887757287
R^2: 0.33769012749287275

For cluster 6:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   32.8s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'square', 'ada__n_estimators': 100}
MSE: 22.281103338769118
R^2: 0.5601854521609292

For cluster 7:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   29.5s finished


Best params: {'ada__learning_rate': 0.01, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 11.752243300452758
R^2: 0.1603916304186216

For cluster 8:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.3s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'square', 'ada__n_estimators': 100}
MSE: 35.19568641000064
R^2: 0.5340016716839968

For cluster 9:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.2s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 100}
MSE: 34.92607951070651
R^2: 0.5750583502929352

For cluster 10:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.9s finished


Best params: {'ada__learning_rate': 0.1, 'ada__loss': 'exponential', 'ada__n_estimators': 50}
MSE: 1.0390409122458784
R^2: -0.018784404679191402

For cluster 11:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.1s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 50}
MSE: 2.908217794682077
R^2: 0.34735863610641415

For cluster 12:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   32.0s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'exponential', 'ada__n_estimators': 50}
MSE: 42.65256960771316
R^2: 0.31490847943461986

For cluster 13:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   32.0s finished


Best params: {'ada__learning_rate': 0.1, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 22.746536375281714
R^2: 0.509177937992563

For cluster 14:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   32.3s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'exponential', 'ada__n_estimators': 50}
MSE: 21.474242960465464
R^2: 0.12682277500963324

For cluster 15:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.9s finished


Best params: {'ada__learning_rate': 0.1, 'ada__loss': 'square', 'ada__n_estimators': 50}
MSE: 5.169154380064571
R^2: 0.33617599116586594

For cluster 16:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.4s finished


Best params: {'ada__learning_rate': 0.01, 'ada__loss': 'exponential', 'ada__n_estimators': 10}
MSE: 26.951419937064077
R^2: 0.22249760999816315

For cluster 17:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   30.8s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'exponential', 'ada__n_estimators': 10}
MSE: 53.9884827807517
R^2: 0.5117965970121217

For cluster 18:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.9s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 10}
MSE: 54.51300480738128
R^2: 0.5411254757454654

For cluster 19:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   29.6s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'square', 'ada__n_estimators': 100}
MSE: 13.633064880684076
R^2: 0.4942679098122541

For cluster 20:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   29.7s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 2.3334224148349323
R^2: 0.354893992367882

For cluster 21:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   30.1s finished


Best params: {'ada__learning_rate': 0.1, 'ada__loss': 'exponential', 'ada__n_estimators': 10}
MSE: 0.38738146994560896
R^2: 0.04798854488091231

For cluster 22:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   29.2s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 49.50446022252421
R^2: 0.13723628412889044

For cluster 23:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   28.7s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'exponential', 'ada__n_estimators': 10}
MSE: 54.05639444449186
R^2: 0.4350096388685383

For cluster 24:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.6s finished


Best params: {'ada__learning_rate': 0.5, 'ada__loss': 'linear', 'ada__n_estimators': 100}
MSE: 7.044146112121688
R^2: 0.42702298911568104

For cluster 25:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.9s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   30.8s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 4.917820661324148
R^2: 0.27494012701471326

For cluster 26:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   29.5s finished


Best params: {'ada__learning_rate': 0.1, 'ada__loss': 'exponential', 'ada__n_estimators': 50}
MSE: 47.95734091019143
R^2: 0.2751979202723486

For cluster 27:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   30.0s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'exponential', 'ada__n_estimators': 100}
MSE: 29.844730111952707
R^2: 0.34878955281492563

For cluster 28:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   31.5s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'linear', 'ada__n_estimators': 50}
MSE: 21.53750114568413
R^2: 0.18172943945320774

For cluster 29:
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   28.8s finished


Best params: {'ada__learning_rate': 0.05, 'ada__loss': 'linear', 'ada__n_estimators': 10}
MSE: 2.698749594775771
R^2: 0.0009105035737622114



In [19]:
# save csv
res.to_csv('./pred_res.csv', index=False)

## KNN

In [20]:
from sklearn.neighbors import KNeighborsRegressor

In [21]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('knn', KNeighborsRegressor())])

In [22]:
# set grid search
params = {'knn__n_neighbors': [3, 5, 11, 19, 23, 29],
          'knn__weights': ['uniform', 'distance'],
          'knn__metric': ['euclidean', 'manhattan']}
knn_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [23]:
res['KNN'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    knn_search.fit(X_train, Y_train)
    print('Best params: {}'.format(knn_search.best_params_))
    # predict
    Y_pred = knn_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = knn_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'KNN'] = r2
    print()

For cluster 0:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.9s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 37.39444612918919
R^2: 0.5815981259249603

For cluster 1:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.4s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 45.162115993807326
R^2: 0.41031556176420503

For cluster 2:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   23.7s finished


Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 6.620136438327258
R^2: 0.6226141251033671

For cluster 3:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   23.9s finished


Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 29, 'knn__weights': 'uniform'}
MSE: 1.3801023732347335
R^2: 0.05576843708205759

For cluster 4:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.7s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.2s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 11, 'knn__weights': 'distance'}
MSE: 154.43880647457502
R^2: 0.8749309627267023

For cluster 5:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.4s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 24.186632641596628
R^2: 0.4777542955827402

For cluster 6:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   20.9s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 18.51303993134467
R^2: 0.634564583147756

For cluster 7:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   24.4s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 29, 'knn__weights': 'distance'}
MSE: 9.666494813016262
R^2: 0.3094024909090537

For cluster 8:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   24.2s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 26.699167199497328
R^2: 0.6464973821661291

For cluster 9:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.8s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 21.382450789041922
R^2: 0.7398421454578026

For cluster 10:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   21.9s finished


Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 29, 'knn__weights': 'uniform'}
MSE: 0.9988068672324206
R^2: 0.020665261944880386

For cluster 11:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.9s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 29, 'knn__weights': 'distance'}
MSE: 2.4136105964570054
R^2: 0.4583548335134534

For cluster 12:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   24.2s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 30.707482273940286
R^2: 0.5067721378272163

For cluster 13:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.9s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 16.917488641219016
R^2: 0.6349564381197937

For cluster 14:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   23.6s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 20.559514356613956
R^2: 0.16401710988797247

For cluster 15:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.6s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 4.161532107720521
R^2: 0.4655750779483411

For cluster 16:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   23.7s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'uniform'}
MSE: 23.906705595981045
R^2: 0.3103324136037947

For cluster 17:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   24.0s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 33.1715516002307
R^2: 0.7000385352671112

For cluster 18:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.0s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 29.097150078096124
R^2: 0.7550687043132642

For cluster 19:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   21.6s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 10.30981967648629
R^2: 0.6175469932784083

For cluster 20:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.2s finished


Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 29, 'knn__weights': 'distance'}
MSE: 1.9454252033459039
R^2: 0.46216103946779075

For cluster 21:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   24.2s finished


Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 29, 'knn__weights': 'uniform'}
MSE: 0.3821791572329093
R^2: 0.060773517007475575

For cluster 22:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   23.3s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 45.22896400280278
R^2: 0.21174963078773112

For cluster 23:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.3s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 37.6409752117167
R^2: 0.6065814526337647

For cluster 24:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.7s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.1s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 19, 'knn__weights': 'distance'}
MSE: 4.559999914761956
R^2: 0.6290856153172424

For cluster 25:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.3s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 29, 'knn__weights': 'distance'}
MSE: 4.069540497593874
R^2: 0.40000648264806493

For cluster 26:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   25.2s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 43.009493126104154
R^2: 0.3499770947640728

For cluster 27:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   21.9s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 23, 'knn__weights': 'distance'}
MSE: 20.22060723138457
R^2: 0.5587874097668584

For cluster 28:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.6s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   22.6s finished


Best params: {'knn__metric': 'euclidean', 'knn__n_neighbors': 29, 'knn__weights': 'distance'}
MSE: 18.699575264618257
R^2: 0.2895502672169412

For cluster 29:
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   26.8s finished


Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 29, 'knn__weights': 'uniform'}
MSE: 2.7299488541038883
R^2: -0.010639605540229136



In [24]:
# save csv
res.to_csv('./pred_res.csv', index=False)

## Decision Tree

In [25]:
from sklearn.tree import DecisionTreeRegressor

In [26]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('tree', DecisionTreeRegressor())])

In [27]:
# set grid search
params = {'tree__max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
tree_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [28]:
res['Decision Tree'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    tree_search.fit(X_train, Y_train)
    print('Best params: {}'.format(tree_search.best_params_))
    # predict
    Y_pred = tree_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = tree_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'Decision Tree'] = r2
    print()

For cluster 0:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 42.243773594812325
R^2: 0.5273395953236335

For cluster 1:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 47.2724816821951
R^2: 0.3827603912846004

For cluster 2:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 8.12323917187866
R^2: 0.5369286191556761

For cluster 3:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 3}
MSE: 1.4385051342779993
R^2: 0.015810727126560953

For cluster 4:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 8}
MSE: 167.04888316089773
R^2: 0.864718955867141

For cluster 5:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 26.708465685765553
R^2: 0.42330204941476

For cluster 6:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 18.66224561201911
R^2: 0.6316193596557638

For cluster 7:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 3}
MSE: 11.964747091305027
R^2: 0.1452098513483684

For cluster 8:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 27.71451686304216
R^2: 0.6330539379793532

For cluster 9:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 9}
MSE: 25.268301519211917
R^2: 0.6925634401771927

For cluster 10:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 3}
MSE: 1.0237643480039187
R^2: -0.0038056630114085

For cluster 11:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 5}
MSE: 2.7039746863505396
R^2: 0.3931934085333988

For cluster 12:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 37.342598117411015
R^2: 0.4001979819407493

For cluster 13:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 17.45300312626279
R^2: 0.62340117012437

For cluster 14:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 3}
MSE: 22.038928787783032
R^2: 0.10386174189678743

For cluster 15:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 4.735568584125094
R^2: 0.39185718001642744

For cluster 16:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 3}
MSE: 27.13137055612232
R^2: 0.21730634227546763

For cluster 17:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 35.487351440951024
R^2: 0.6790973769932296

For cluster 18:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 37.659236861505455
R^2: 0.6829955629913791

For cluster 19:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 11.309303552693237
R^2: 0.5804701456108481

For cluster 20:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 5}
MSE: 2.1238177722656024
R^2: 0.41284201467597437

For cluster 21:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 3}
MSE: 0.39074796072892726
R^2: 0.03971520700101361

For cluster 22:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 5}
MSE: 47.13213692292182
R^2: 0.17858113378510998

For cluster 23:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 6}
MSE: 40.44075890585766
R^2: 0.5773184798310371

For cluster 24:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 5.287383684128724
R^2: 0.5699195827983616

For cluster 25:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 5}
MSE: 4.585763697713712
R^2: 0.32389701187078623

For cluster 26:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 5}
MSE: 43.78223906247851
R^2: 0.338298218263516

For cluster 27:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 7}
MSE: 24.448229017132856
R^2: 0.4665409239283448

For cluster 28:
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best params: {'tree__max_depth': 5}
MSE: 20.94292323534716
R^2: 0.20431913529067802

For cluster 29:
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best params: {'tree__max_depth': 3}
MSE: 2.730812440894102
R^2: -0.010959309336767253



[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.6s finished


In [29]:
# save csv
res.to_csv('./pred_res.csv', index=False)

## Random Forest 

In [30]:
from sklearn.ensemble import RandomForestRegressor

In [31]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('rf', RandomForestRegressor())])

In [32]:
# set grid search
params = {'rf__n_estimators': [10, 50, 100, 500],
          'rf__max_features': ['auto', 'log2', 'sqrt']}
rf_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [33]:
res['Random Forest'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    rf_search.fit(X_train, Y_train)
    print('Best params: {}'.format(rf_search.best_params_))
    # predict
    Y_pred = rf_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = rf_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'Random Forest'] = r2
    print()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


For cluster 0:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   41.6s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 38.970470063926946
R^2: 0.5639641873020087

For cluster 1:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   43.6s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 47.98397705479452
R^2: 0.3734703538302838

For cluster 2:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   41.0s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 6.886357981735158
R^2: 0.6074379650632715

For cluster 3:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   40.1s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 100}
MSE: 1.4579281392694066
R^2: 0.002521992381026239

For cluster 4:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   40.8s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 100}
MSE: 154.37521461187214
R^2: 0.8749824612666628

For cluster 5:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   44.0s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 24.89513806849315
R^2: 0.46245601403871617

For cluster 6:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   42.8s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 500}
MSE: 18.229938910958904
R^2: 0.640152813918047

For cluster 7:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   46.4s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 10.059544171232877
R^2: 0.281322104689978

For cluster 8:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   50.8s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 500}
MSE: 26.5479175913242
R^2: 0.6484999589519905

For cluster 9:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   46.8s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 500}
MSE: 22.20741460502283
R^2: 0.7298049042379829

For cluster 10:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   49.7s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 1.0209798607305935
R^2: -0.0010754604027745085

For cluster 11:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   45.6s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 2.4503744178082187
R^2: 0.4501045605963182

For cluster 12:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   42.2s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 31.394692141552515
R^2: 0.49573407710824724

For cluster 13:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   47.2s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 500}
MSE: 17.294807748858446
R^2: 0.6268146912010126

For cluster 14:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   45.1s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 20.756086184931505
R^2: 0.15602418348411284

For cluster 15:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   48.8s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 4.351441143835617
R^2: 0.44118691532082555

For cluster 16:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   48.9s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 24.606136328767118
R^2: 0.29015503268465237

For cluster 17:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   47.9s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 33.72051692465753
R^2: 0.6950743887361575

For cluster 18:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   46.4s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 500}
MSE: 29.74056046575343
R^2: 0.7496526639284073

For cluster 19:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   44.3s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 100}
MSE: 10.564399600456621
R^2: 0.6081030979990887

For cluster 20:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   40.7s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 2.1219635525114153
R^2: 0.4133546386635093

For cluster 21:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   42.8s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 0.3836809178082191
R^2: 0.05708285707285887

For cluster 22:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   50.2s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 46.20356697488584
R^2: 0.19476425052272583

For cluster 23:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   47.8s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 36.99720843835617
R^2: 0.6133100187081983

For cluster 24:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   41.5s finished


Best params: {'rf__max_features': 'auto', 'rf__n_estimators': 500}
MSE: 4.849598605022831
R^2: 0.6055294043499331

For cluster 25:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   40.4s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 4.1101500205479455
R^2: 0.3940192095076198

For cluster 26:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   43.4s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 44.19490183333333
R^2: 0.3320614497341293

For cluster 27:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   47.8s finished


Best params: {'rf__max_features': 'sqrt', 'rf__n_estimators': 500}
MSE: 21.554706283105023
R^2: 0.5296774383648346

For cluster 28:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   42.6s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 19.138498495433787
R^2: 0.27287433273006567

For cluster 29:
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   47.0s finished


Best params: {'rf__max_features': 'log2', 'rf__n_estimators': 500}
MSE: 2.7892946187214616
R^2: -0.032609680200575086



In [34]:
# save csv
res.to_csv('./pred_res.csv', index=False)

## Xgboost 

In [35]:
import xgboost as xgb

In [36]:
# set pipeline
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('xgb', xgb.XGBRegressor())])

In [37]:
# set grid search
params = {'xgb__min_child_weight': [1, 5, 10],
          'xgb__gamma': [0.5, 1, 1.5],
          'xgb__subsample': [0.6, 0.8, 1.0],
          'xgb__colsample_bytree': [0.6, 0.8, 1.0],
          'xgb__max_depth': [3, 4, 5]}
xgb_search = GridSearchCV(pipeline, params, cv=3, verbose=2, n_jobs=-1)

In [38]:
res['Xgboost'] = 0
for c in dfs:
    print('For cluster {}:'.format(c))
    # train test split
    df_train, df_val = train_test_split(dfs[c], test_size=0.2, random_state=1207)
    # get X Y
    X_train = df_train.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_train = df_train['Checkin per Hour'] - df_train['Checkout per Hour']
    X_val = df_val.drop(columns=['Checkin per Hour', 'Checkout per Hour'])
    Y_val = df_val['Checkin per Hour'] - df_val['Checkout per Hour']
    # shuffle
    X_train, Y_train = shuffle(X_train, Y_train, random_state=1207)
    # train model
    xgb_search.fit(X_train, Y_train)
    print('Best params: {}'.format(xgb_search.best_params_))
    # predict
    Y_pred = xgb_search.predict(X_val)
    mse = mean_squared_error(Y_val, Y_pred)
    print('MSE: {}'.format(mse))
    r2 = xgb_search.score(X_val, Y_val)
    print('R^2: {}'.format(r2))
    res.loc[c, 'Xgboost'] = r2
    print()

For cluster 0:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   35.7s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  4.1min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 37.339908019494736
R^2: 0.5822083461492642

For cluster 1:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   36.4s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.8min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 1.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 45.79301523298527
R^2: 0.4020778728240091

For cluster 2:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   36.5s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.6min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 6.751943950340278
R^2: 0.6151003383857703

For cluster 3:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   35.7s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.7min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 0.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 5, 'xgb__subsample': 1.0}
MSE: 1.378277574009286
R^2: 0.05701691912083351

For cluster 4:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   41.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.7min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 0.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 0.8}
MSE: 143.92580789165433
R^2: 0.8834446947454608

For cluster 5:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   40.3s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  4.2min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 0.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 5, 'xgb__subsample': 1.0}
MSE: 24.36516521045004
R^2: 0.4738993618032473

For cluster 6:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   44.5s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  5.0min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 17.611826058271443
R^2: 0.6523539612618245

For cluster 7:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   38.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.9min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 0.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 1, 'xgb__subsample': 0.8}
MSE: 10.617053486550695
R^2: 0.24149230579172076

For cluster 8:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   35.7s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.7min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 0.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 0.8}
MSE: 25.68050944213167
R^2: 0.6599846262144123

For cluster 9:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.8min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 1.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 20.372993960336565
R^2: 0.7521240922467808

For cluster 10:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   37.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.6min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 10, 'xgb__subsample': 1.0}
MSE: 1.0078259857322804
R^2: 0.011821974675517932

For cluster 11:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   39.4s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.7min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 1, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 0.6}
MSE: 2.3626795772055558
R^2: 0.4697844072989993

For cluster 12:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   38.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.4min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 0.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 0.8}
MSE: 31.311082524818033
R^2: 0.4970770264308634

For cluster 13:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.2min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 15.756755644869338
R^2: 0.660002596941913

For cluster 14:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   30.3s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.1min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 1.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 10, 'xgb__subsample': 1.0}
MSE: 20.424515457981162
R^2: 0.16950638203144008

For cluster 15:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   33.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 1.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 4.177430506953755
R^2: 0.46353340181776304

For cluster 16:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   32.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 1.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 5, 'xgb__subsample': 1.0}
MSE: 24.54773488040667
R^2: 0.29183981462882647

For cluster 17:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 1.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 31.427519533932546
R^2: 0.7158093505564473

For cluster 18:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   34.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.5min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 1, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 27.641960579517185
R^2: 0.767318063731628

For cluster 19:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   39.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.6min finished


Best params: {'xgb__colsample_bytree': 1.0, 'xgb__gamma': 1.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 9.618406376141305
R^2: 0.6431956567761157

For cluster 20:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   35.3s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 0.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 1.9812874304418082
R^2: 0.4522464444936648

For cluster 21:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   38.3s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 1.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 10, 'xgb__subsample': 0.8}
MSE: 0.3818743388866634
R^2: 0.061522625005329235

For cluster 22:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   31.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 0.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 44.439916106816256
R^2: 0.22550115724982311

For cluster 23:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   30.5s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.2min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1.5, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 34.5773851596139
R^2: 0.6386016949692706

For cluster 24:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   36.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1, 'xgb__max_depth': 5, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 4.6449686797767145
R^2: 0.6221741815931591

For cluster 25:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   30.5s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 0.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 10, 'xgb__subsample': 1.0}
MSE: 4.161414116359272
R^2: 0.38646107728657664

For cluster 26:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   31.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.3min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 0.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 41.646231772486324
R^2: 0.3705807113441688

For cluster 27:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   42.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  4.2min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 1, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 21.42245664717054
R^2: 0.5325631184910712

For cluster 28:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   57.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  4.6min finished


Best params: {'xgb__colsample_bytree': 0.8, 'xgb__gamma': 0.5, 'xgb__max_depth': 4, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 19.445878211359254
R^2: 0.26119610828101225

For cluster 29:
Fitting 3 folds for each of 243 candidates, totalling 729 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   41.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 729 out of 729 | elapsed:  3.9min finished


Best params: {'xgb__colsample_bytree': 0.6, 'xgb__gamma': 0.5, 'xgb__max_depth': 3, 'xgb__min_child_weight': 1, 'xgb__subsample': 1.0}
MSE: 2.7412253484794986
R^2: -0.014814215555545518



In [39]:
# save csv
res.to_csv('./pred_res.csv', index=False)