In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

import os 
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_root = './data/'
train_file = 'train.csv'
train_file2 = 'train_2.csv'  # former train data
test_file = 'test.csv'

In [3]:
trainset = pd.read_csv(os.path.join(data_root, train_file))
trainset.append(pd.read_csv(os.path.join(data_root, train_file2)))
testset = pd.read_csv(os.path.join(data_root, test_file))


In [35]:
# testset['n_jobs'].value_counts()

## Data processing

In [4]:
trainset.head()

Unnamed: 0,id,penalty,l1_ratio,alpha,max_iter,random_state,n_jobs,n_samples,n_features,n_classes,n_clusters_per_class,n_informative,flip_y,scale,time
0,0,none,0.304083,0.0001,417,475,-1,1089,327,4,3,7,0.074798,24.242009,0.409987
1,1,l1,0.727744,0.001,578,569,1,790,373,4,5,7,0.077781,54.626302,3.950953
2,2,none,0.745885,0.01,588,529,2,428,1198,2,5,6,0.030196,17.999964,0.368702
3,3,none,0.474605,0.001,829,103,4,877,313,6,5,7,0.057261,82.257222,1.004559
4,4,elasticnet,0.395049,0.001,167,418,2,216,644,8,5,11,0.073728,95.515601,0.8028


#### general preprocessing
drop features have very low correlations with time


In [5]:
def data_preprocess(data, is_test=False):
    data.set_index('id', inplace=True)
    data.loc[data['n_jobs']==-1, 'n_jobs'] = data['n_jobs'].max()
    data['1/n_jobs'] = 1 / data['n_jobs']
    one_hot = pd.get_dummies(data['penalty'])
    if not is_test:
        time = data['time']
        data = data.drop(['penalty','time'], axis=1).join(one_hot)
    else:
        time = None
        data = data.drop(['penalty'], axis=1).join(one_hot)
    data = data.drop(['l1_ratio','alpha','random_state','scale'], axis=1)
    return data,time

In [6]:
train_x, train_y = data_preprocess(trainset)
test_x, _ =  data_preprocess(testset, True)

In [7]:
train_x.corrwith(train_y).sort_values()

n_jobs                 -0.307182
l2                     -0.223753
none                   -0.222936
n_clusters_per_class   -0.060997
n_informative           0.109154
flip_y                  0.120277
n_classes               0.166696
l1                      0.197349
n_features              0.220124
elasticnet              0.237999
max_iter                0.264227
n_samples               0.296451
1/n_jobs                0.351546
dtype: float64

In [8]:
test_x.head()

Unnamed: 0_level_0,max_iter,n_jobs,n_samples,n_features,n_classes,n_clusters_per_class,n_informative,flip_y,1/n_jobs,elasticnet,l1,l2,none
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,602,4,1376,1078,3,4,8,0.049072,0.25,0,0,1,0
1,522,4,598,1528,10,3,11,0.041393,0.25,1,0,0,0
2,398,16,553,1008,8,5,8,0.005987,0.0625,0,0,1,0
3,439,16,1977,1319,3,4,10,0.002964,0.0625,0,0,0,1
4,278,4,1826,1530,7,5,8,0.050126,0.25,0,0,0,1


#### generate new features

In [9]:
def minmax(col):
    
    col = (col-col.min()) / (col.max()-col.min())
    return col

def new_features(data):
    
    data['f1'] = data['n_samples']*data['elasticnet']*data['max_iter']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f2'] = data['n_samples']*data['l1']*data['max_iter']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f3'] = data['n_samples']*data['none']*data['max_iter']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f4'] = data['n_samples']*data['l2']*data['max_iter']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    
    data['f5'] = data['n_samples']*data['elasticnet']*data['max_iter']*data['flip_y']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f6'] = data['n_samples']*data['l1']*data['max_iter']*data['flip_y']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f7'] = data['n_samples']*data['none']*data['max_iter']*data['flip_y']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f8'] = data['n_samples']*data['l2']*data['max_iter']*data['flip_y']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    
    data['f9'] = data['n_samples']*data['elasticnet']*data['max_iter']*data['flip_y']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f10'] = data['n_samples']*data['l1']*data['max_iter']*data['flip_y']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f11'] = data['n_samples']*data['none']*data['max_iter']*data['flip_y']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    data['f12'] = data['n_samples']*data['l2']*data['max_iter']*data['flip_y']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']
    
    data['f13'] = data['n_samples']*data['elasticnet']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['n_clusters_per_class']
    data['f14'] = data['n_samples']*data['l1']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['n_clusters_per_class']
    data['f15'] = data['n_samples']*data['none']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['n_clusters_per_class']
    data['f16'] = data['n_samples']*data['l2']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['n_clusters_per_class']
    
    data['f17'] = data['n_samples']*data['elasticnet']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['flip_y']
    data['f18'] = data['n_samples']*data['l1']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['flip_y']
    data['f19'] = data['n_samples']*data['none']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['flip_y']
    data['f20'] = data['n_samples']*data['l2']*data['max_iter']*data['n_informative']\
                *data['n_classes']*data['n_features']*data['1/n_jobs']*data['flip_y']
    
    return data

In [10]:
train_x = new_features(train_x)
test_x = new_features(test_x)

# train_x['n_clusters'] = train_x['n_clusters_per_class'] * train_x['n_classes']
# test_x['n_clusters'] = test_x['n_clusters_per_class'] * test_x['n_classes']

In [11]:
train_x.corrwith(train_y).sort_values()

n_jobs                 -0.307182
l2                     -0.223753
none                   -0.222936
f8                     -0.075477
f20                    -0.069635
f12                    -0.069635
n_clusters_per_class   -0.060997
f16                    -0.028879
f4                     -0.009103
f3                      0.026209
f15                     0.046339
f7                      0.051800
f19                     0.059547
f11                     0.059547
n_informative           0.109154
flip_y                  0.120277
n_classes               0.166696
l1                      0.197349
n_features              0.220124
elasticnet              0.237999
max_iter                0.264227
n_samples               0.296451
1/n_jobs                0.351546
f9                      0.550064
f17                     0.550064
f5                      0.550381
f13                     0.567259
f1                      0.622755
f10                     0.645936
f18                     0.645936
f6        

#### drop unimportant features
it seems do drop could have better result here

In [80]:
def drop_features(data, cols):
    data.drop(cols, axis=1, inplace=True)
    return data

In [81]:
# drop_cols = ['n_jobs', 'f12','n_clusters_per_class','f4','f3','f7','f11','f8','f16','f15','f19']
# train_x = drop_features(train_x, drop_cols)
# test_x = drop_features(test_x, drop_cols)

In [15]:
# train_x.head()

Unnamed: 0_level_0,max_iter,n_jobs,n_samples,n_features,n_classes,n_clusters_per_class,n_informative,flip_y,1/n_jobs,elasticnet,...,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,417,8,1089,327,4,3,7,0.074798,0.125,0,...,5553559.0,0.0,0.0,0.0,38874910.0,0.0,0.0,0.0,1559197000.0,0.0
1,578,1,790,373,4,5,7,0.077781,1.0,0,...,0.0,0.0,0.0,370932200.0,0.0,0.0,0.0,23844700000.0,0.0,0.0
2,588,2,428,1198,2,5,6,0.030196,0.5,0,...,9103970.0,0.0,0.0,0.0,54623820.0,0.0,0.0,0.0,9044804000.0,0.0
3,829,4,877,313,6,5,7,0.057261,0.25,0,...,19545620.0,0.0,0.0,0.0,136819400.0,0.0,0.0,0.0,11946970000.0,0.0
4,167,2,216,644,8,5,11,0.073728,0.5,1,...,0.0,0.0,75359750.0,0.0,0.0,0.0,5110681000.0,0.0,0.0,0.0


In [16]:
# penalty_map = {
#     'none': 0,
#     'l1': 1,
#     'l2': 2,
#     'elasticnet': 3
# }
# # train_x['penalty'].value_counts()
# trainset['penalty'] = trainset['penalty'].map(penalty_map)
# testset['penalty'] = testset['penalty'].map(penalty_map)
# trainset.set_index('id', inplace=True)
# testset.set_index('id', inplace=True)
# # trainset.head(10)
# print()

In [17]:
# train_x = trainset[trainset.columns.tolist()[:-1]]
# train_y = trainset['time']


In [18]:
# trainset.describe()

In [19]:
# Here goes real feature engneering

# n_jobs should be modified as level
# baesed on raw n_jobs generate new features, sth divided by n_jobs
# classes and clusters relations 
# Do not only focus on intra features, try to analysis characteristics of each feature itself
# Normalize some features may be helpful
# Use median to replace values in predictions

In [20]:
# jobs_map = {
#     1: 1,
#     2: 2,
#     4: 4,
#     8: 8,
#     16: 16,
#     -1: 16
# }

# train_x['n_jobs'] = train_x['n_jobs'].map(jobs_map)
# testset['n_jobs'] = testset['n_jobs'].map(jobs_map)

# train_x['n_samples_per_job'] = train_x['n_samples'] / train_x['n_jobs']
# testset['n_samples_per_job'] = testset['n_samples'] / testset['n_jobs']

# jobs_map = {
#     1: 1,
#     2: 2,
#     4: 3,
#     8: 4,
#     16: 5,
#     -1: 5
# }

# train_x['lvl_jobs'] = train_x['n_jobs'].map(jobs_map)
# testset['lvl_jobs'] = testset['n_jobs'].map(jobs_map)

In [21]:
# train_x['n_clusters'] = train_x['n_classes'] * train_x['n_clusters_per_class']
# testset['n_clusters'] = testset['n_classes'] * testset['n_clusters_per_class']
# # train_x['n_combine_info_feature'] = train_x['n_classes'] * train_x['n_clusters_per_class'] * train_x['n_informative']
# # testset['n_combine_info_feature'] = testset['n_classes'] * testset['n_clusters_per_class'] * testset['n_informative']
# train_x['informative_ratio'] = train_x['n_informative'] / train_x['n_features']
# testset['informative_ratio'] = testset['n_informative'] / train_x['n_features']


In [22]:
# Maybe it's not good, this is max_iter, not n_iter, can't simply multiple them
# train_x['n_sample_total_run'] = train_x['n_samples'] * train_x['max_iter']
# testset['n_sample_total_run'] = testset['n_samples'] * testset['max_iter']

In [23]:
# train_x['scaled_features'] = train_x['scale'] * train_x['n_features']
# testset['scaled_features'] = testset['scale'] * testset['n_features']

In [24]:
# from sklearn.preprocessing import OneHotEncoder
# from sklearn.preprocessing import Binarizer

In [25]:
# # penalty one hot

# train_ohe = OneHotEncoder()
# test_ohe = OneHotEncoder()

# # one hot encode trainset
# penalty_oh = train_ohe.fit_transform(train_x.penalty.values.reshape(-1,1)).toarray()
# df_pnt_oh = pd.DataFrame(penalty_oh, columns=['pnt_none', 'pnt_l1', 'pnt_l2', 'pnt_elasticnet'])
# train_x = pd.concat([train_x, df_pnt_oh], axis=1)

# # onr hot encode testset
# penalty_oh = test_ohe.fit_transform(testset.penalty.values.reshape(-1,1)).toarray()
# df_pnt_oh = pd.DataFrame(penalty_oh, columns=['pnt_none', 'pnt_l1', 'pnt_l2', 'pnt_elasticnet'])
# testset = pd.concat([testset, df_pnt_oh], axis=1)

# del train_ohe
# del test_ohe

In [26]:
# train_ohe = OneHotEncoder()
# test_ohe = OneHotEncoder()

# train_x['li_ratio_bin'] = Binarizer(threshold=0.5).fit_transform(train_x.l1_ratio.values.reshape(-1,1))
# l1_ratio_oh = train_ohe.fit_transform(train_x.li_ratio_bin.values.reshape(-1,1)).toarray()
# df_pnt_oh = pd.DataFrame(l1_ratio_oh, columns=['l1_ratio_l2', 'l1_ratio_l1',])
# train_x = pd.concat([train_x, df_pnt_oh], axis=1)


In [27]:
# from sklearn.preprocessing import Normalizer, MinMaxScaler

# mmscaler_train = MinMaxScaler(copy=True, feature_range=(0, 1))
# mmscaler_test = MinMaxScaler(copy=True, feature_range=(0, 1))
# mmscaler_train.fit(train_x.max_iter.values.reshape(-1,1))
# train_x['max_iter'] = mmscaler_train.transform(train_x.max_iter.values.reshape(-1,1))
# testset['max_iter'] = mmscaler_test.fit_transform(testset.max_iter.values.reshape(-1,1))
# # normalize(train_x['n_classes'], axis=1, copy=False)
# # normalize(testset['n_classes'], axis=1, copy=False)
# # normalize(train_x['n_sample_total_run'], axis=1, copy=False)
# # normalize(testset['n_sample_total_run'], axis=1, copy=False)
# # normalize(train_x['n_samples_per_job'], axis=1, copy=False)
# # normalize(testset['n_samples_per_job'], axis=1, copy=False)

In [28]:
# # drop useless columns here

# train_x.drop(['penalty'], axis=1, inplace=True)
# testset.drop(['penalty'], axis=1, inplace=True)
# train_x.drop(['scale'], axis=1, inplace=True)
# testset.drop(['scale'], axis=1, inplace=True)
# train_x.drop(['alpha'], axis=1, inplace=True)
# testset.drop(['alpha'], axis=1, inplace=True)
# train_x.drop(['l1_ratio'], axis=1, inplace=True)
# testset.drop(['l1_ratio'], axis=1, inplace=True)
# train_x.drop(['n_clusters_per_class'], axis=1, inplace=True)
# testset.drop(['n_clusters_per_class'], axis=1, inplace=True)
# train_x.drop(['n_jobs'], axis=1 ,inplace=True)
# testset.drop(['n_jobs'], axis=1, inplace=True)
# train_x.drop(['random_state'], axis=1 ,inplace=True)
# testset.drop(['random_state'], axis=1, inplace=True)
# train_x.drop(['n_informative'], axis=1 ,inplace=True)
# testset.drop(['n_informative'], axis=1, inplace=True)
# train_x.drop(['n_features'], axis=1, inplace=True)
# testset.drop(['n_features'], axis=1, inplace=True)
# train_x.drop(['n_samples'], axis=1, inplace=True)
# testset.drop(['n_samples'], axis=1, inplace=True)

In [29]:
# train_x.head(20)

In [30]:
# pd.concat([train_x, train_y], axis=1).corr()[['time']].sort_values(by='time')

In [31]:
# train_x.head(10)

In [32]:
# len(train_x.columns)

## Modeling

In [12]:
# from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso, Ridge
from xgboost import XGBRegressor

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

from sklearn.neural_network import MLPRegressor

  from numpy.core.umath_tests import inner1d


In [13]:
train_x.head()

Unnamed: 0_level_0,max_iter,n_jobs,n_samples,n_features,n_classes,n_clusters_per_class,n_informative,flip_y,1/n_jobs,elasticnet,...,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,417,8,1089,327,4,3,7,0.074798,0.125,0,...,38874910.0,0.0,0.0,0.0,1559197000.0,0.0,0.0,0.0,38874910.0,0.0
1,578,1,790,373,4,5,7,0.077781,1.0,0,...,0.0,0.0,0.0,23844700000.0,0.0,0.0,0.0,370932200.0,0.0,0.0
2,588,2,428,1198,2,5,6,0.030196,0.5,0,...,54623820.0,0.0,0.0,0.0,9044804000.0,0.0,0.0,0.0,54623820.0,0.0
3,829,4,877,313,6,5,7,0.057261,0.25,0,...,136819400.0,0.0,0.0,0.0,11946970000.0,0.0,0.0,0.0,136819400.0,0.0
4,167,2,216,644,8,5,11,0.073728,0.5,1,...,0.0,0.0,5110681000.0,0.0,0.0,0.0,75359750.0,0.0,0.0,0.0


In [24]:
x_train, x_validate, y_train, y_validate = train_test_split(train_x, train_y, test_size=0.3)


In [25]:
# gbr = GradientBoostingRegressor(n_estimators=1500, learning_rate=0.1, max_depth=1, loss='ls')
# gbr.fit(x_train, y_train)

# # rfr = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=1000)
# # rfr.fit(x_train, y_train)

# lsr= Lasso(alpha=0.01, normalize=False)
# lsr.fit(x_train, y_train)

# rgr= Ridge(alpha=0.01)
# rgr.fit(x_train, y_train)

# xgb = XGBRegressor(booster='gblinear', n_estimators=1100, learning_rate=0.1, max_depth=3)
# xgb.fit(x_train, y_train)

# xgb2 = XGBRegressor(booster='gblinear', n_estimators=1300, learning_rate=0.1, max_depth=3)
# xgb2.fit(x_train, y_train)

# xgb3 = XGBRegressor(booster='gblinear', n_estimators=1500, learning_rate=0.1, max_depth=3)
# xgb3.fit(x_train, y_train)

# xgb4 = XGBRegressor(booster='gblinear', n_estimators=700, learning_rate=0.1, max_depth=3)
# xgb4.fit(x_train, y_train)
# xgb5 = XGBRegressor(booster='gblinear', n_estimators=900, learning_rate=0.1, max_depth=3)
# xgb5.fit(x_train, y_train)

# print('GBDT:', mean_squared_error(y_validate, gbr.predict(x_validate)))
# # print('Random Forests:', mean_squared_error(y_validate, rfr.predict(x_validate)))
# print('LASSO:', mean_squared_error(y_validate, lsr.predict(x_validate)))
# print('Ridge Regression: ', mean_squared_error(y_validate, rgr.predict(x_validate)))
# print('XGBoost', mean_squared_error(y_validate, xgb.predict(x_validate)))
# print('XGBoost', mean_squared_error(y_validate, xgb2.predict(x_validate)))
# print('XGBoost', mean_squared_error(y_validate, xgb3.predict(x_validate)))

# print(mean_squared_error(y_validate, (1*gbr.predict(x_validate) + 1*lsr.predict(x_validate) + 1*rgr.predict(x_validate) + xgb2.predict(x_validate)+ xgb3.predict(x_validate))/5))
# print(mean_squared_error(y_validate, (1*gbr.predict(x_validate) + 1*lsr.predict(x_validate) + 1*rgr.predict(x_validate))/3))
# print(mean_squared_error(y_validate, (1*xgb.predict(x_validate) + 1*xgb2.predict(x_validate) + 1*xgb3.predict(x_validate) + 1*xgb4.predict(x_validate) + 1*xgb5.predict(x_validate))/5))
# print(mean_squared_error(y_validate, (rgr.predict(x_validate)+lsr.predict(x_validate))/2))
                         

GBDT: 2.531196966447541
LASSO: 0.6393070318001428
Ridge Regression:  0.6637223377459497
XGBoost 0.689680729996766
XGBoost 0.6983091286238777
XGBoost 0.7039132813269746
0.5645817414455705
0.5941372636311134
0.6789242576698143
0.6473105733275659


In [37]:
# mlpr = MLPRegressor(hidden_layer_sizes=(4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000),
#                     alpha=0.0001,
#                     learning_rate='adaptive',
#                     learning_rate_init=0.01,
#                     max_iter = 500,
#                     verbose = True,
#                     early_stopping = False,
#                    )
# mlpr.fit(x_train, y_train)
# print('MLP:', mean_squared_error(y_validate, mlpr.predict(x_validate)))

Iteration 1, loss = 108757015119906370128420911575990272.00000000
Iteration 2, loss = 24833896801988027468480512.00000000
MLP: 4.2669045803667755e+27


## Output result

In [113]:
# gbr = GradientBoostingRegressor(n_estimators=500, learning_rate=0.1, max_depth=1, random_state=0, loss='ls')
# gbr.fit(train_x, train_y)
# rfr = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=1000)
# rfr.fit(train_x, train_y)
# lsr= Lasso(alpha=0.01, normalize=False)
# lsr.fit(x_train, y_train)
# rgr= Ridge(alpha=0.01)
# rgr.fit(x_train, y_train)
xgb = XGBRegressor(booster='gblinear', n_estimators=1100, learning_rate=0.1, max_depth=3)
xgb.fit(x_train, y_train)
xgb2 = XGBRegressor(booster='gblinear', n_estimators=1300, learning_rate=0.1, max_depth=2)
xgb2.fit(x_train, y_train)
xgb3 = XGBRegressor(booster='gblinear', n_estimators=1500, learning_rate=0.1, max_depth=1)
xgb3.fit(x_train, y_train)
xgb4 = XGBRegressor(booster='gblinear', n_estimators=700, learning_rate=0.1, max_depth=2)
xgb4.fit(x_train, y_train)
xgb5 = XGBRegressor(booster='gblinear', n_estimators=900, learning_rate=0.1, max_depth=3)
xgb5.fit(x_train, y_train)

# # predict = (1*gbr.predict(testset) + 1*lsr.predict(testset) + 1*rgr.predict(testset))/3
# predict = gbr.predict(testset)
# predicts = rgr.predict(test_x)
predicts = ((1*xgb.predict(test_x) + 1*xgb2.predict(test_x) + 1*xgb3.predict(test_x) + 1*xgb4.predict(test_x) +1*xgb5.predict(test_x))/5)
# predicts = (1*lsr.predict(test_x) + 1*rgr.predict(test_x))/2

In [114]:
# nn_rgs = MLPRegressor(hidden_layer_sizes=(2000,2000,2000,2000), activation='relu')
# nn_rgs.fit(x_train, y_train)
# y_predict = nn_rgs.predict(x_validate)
# score = mean_squared_error(y_validate, y_predict)

# score = cross_val_score(nn_rgs, train_x, train_y, cv=8, scoring='mean_squared_error')
# print(score)
# np.mean(score)
# predicts

In [115]:
# nn_rgs.fit(train_x, train_y)
# predict = nn_rgs.predict(testset)
predicts = predicts.tolist()
predicts = list(map(lambda x: x if x>0 else 0, predicts))

In [116]:
p = np.array(predicts)

In [117]:
result = pd.DataFrame()
result['Id'] = testset.index
result['time'] = pd.Series(p)
result.to_csv('./submition_20527456_6st.csv', index=False)