In [73]:
import os
import sys
import operator
import numpy as np
import pandas as pd
from scipy import sparse
import xgboost as xgb
from sklearn import model_selection, preprocessing, ensemble
from sklearn.metrics import log_loss
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.neural_network import MLPRegressor

In [69]:
def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, num_rounds=1000):
    param = {}
    param['objective'] = 'reg:linear'
    param['eta'] = 0.1
    param['max_depth'] = 10
    param['silent'] = 1
    param['min_child_weight'] = 1
    param['subsample'] = 0.9
    num_rounds = num_rounds

    plst = list(param.items())
    xgtrain = xgb.DMatrix(train_X, label=train_y)

    if test_y is not None:
        xgtest = xgb.DMatrix(test_X, label=test_y)
        watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
        model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=20)
    else:
        xgtest = xgb.DMatrix(test_X)
        model = xgb.train(plst, xgtrain, num_rounds)

    pred_test_y = model.predict(xgtest)
    return pred_test_y, model

In [6]:
data_path = ""
train_file = data_path + "save_train.csv"
test_file = data_path + "save_test.csv"
train_df = pd.read_csv(train_file)
test_df = pd.read_csv(test_file)
print(train_df.shape)
print(test_df.shape)

(25000, 386)
(25000, 385)


In [8]:
train_y = train_df["reference"]
train_X = train_df.iloc[0:,:-1]
test_X = test_df

In [77]:
clf = MLPRegressor()
clf.fit(train_X, train_y)
preds = clf.predict(test_X)

In [68]:
kf = model_selection.KFold(n_splits=5)
for dev_index, val_index in kf.split(range(train_X.shape[0])):
        dev_X, val_X = train_X.iloc[dev_index,:], train_X.iloc[val_index,:]
        dev_y, val_y = train_y[dev_index], train_y[val_index]
        preds, model = runXGB(dev_X, dev_y, val_X, val_y, num_rounds=1000)
        break

[0]	train-rmse:49.0562	test-rmse:48.8822
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 20 rounds.
[1]	train-rmse:46.6447	test-rmse:46.4813
[2]	train-rmse:44.3519	test-rmse:44.201
[3]	train-rmse:42.1739	test-rmse:42.0354
[4]	train-rmse:40.1081	test-rmse:39.9895
[5]	train-rmse:38.1435	test-rmse:38.0333
[6]	train-rmse:36.2749	test-rmse:36.1794
[7]	train-rmse:34.4993	test-rmse:34.407
[8]	train-rmse:32.8111	test-rmse:32.7282
[9]	train-rmse:31.2062	test-rmse:31.1306
[10]	train-rmse:29.6849	test-rmse:29.6237
[11]	train-rmse:28.2366	test-rmse:28.1831
[12]	train-rmse:26.8593	test-rmse:26.8181
[13]	train-rmse:25.5519	test-rmse:25.5208
[14]	train-rmse:24.309	test-rmse:24.2897
[15]	train-rmse:23.1331	test-rmse:23.129
[16]	train-rmse:22.009	test-rmse:22.0158
[17]	train-rmse:20.9455	test-rmse:20.9646
[18]	train-rmse:19.9373	test-rmse:19.9696
[19]	train-rmse:18.9726	test-rmse:19.01
[20]	train-rmse:18.0553	test-rmse:

[190]	train-rmse:0.653747	test-rmse:1.89524
[191]	train-rmse:0.650581	test-rmse:1.89399
[192]	train-rmse:0.645376	test-rmse:1.89114
[193]	train-rmse:0.638019	test-rmse:1.88916
[194]	train-rmse:0.635666	test-rmse:1.88821
[195]	train-rmse:0.634439	test-rmse:1.8877
[196]	train-rmse:0.632702	test-rmse:1.88739
[197]	train-rmse:0.629047	test-rmse:1.88643
[198]	train-rmse:0.622785	test-rmse:1.88455
[199]	train-rmse:0.618769	test-rmse:1.8826
[200]	train-rmse:0.61499	test-rmse:1.88145
[201]	train-rmse:0.612325	test-rmse:1.88017
[202]	train-rmse:0.608738	test-rmse:1.8788
[203]	train-rmse:0.6057	test-rmse:1.8777
[204]	train-rmse:0.604506	test-rmse:1.87741
[205]	train-rmse:0.603844	test-rmse:1.8773
[206]	train-rmse:0.601567	test-rmse:1.87592
[207]	train-rmse:0.600043	test-rmse:1.8757
[208]	train-rmse:0.596589	test-rmse:1.87505
[209]	train-rmse:0.59177	test-rmse:1.87325
[210]	train-rmse:0.590656	test-rmse:1.87309
[211]	train-rmse:0.589617	test-rmse:1.8726
[212]	train-rmse:0.587706	test-rmse:1.8715


[378]	train-rmse:0.319964	test-rmse:1.78356
[379]	train-rmse:0.317499	test-rmse:1.78287
[380]	train-rmse:0.316862	test-rmse:1.78254
[381]	train-rmse:0.315881	test-rmse:1.78228
[382]	train-rmse:0.314047	test-rmse:1.78162
[383]	train-rmse:0.313852	test-rmse:1.7816
[384]	train-rmse:0.313023	test-rmse:1.78139
[385]	train-rmse:0.312044	test-rmse:1.78122
[386]	train-rmse:0.311208	test-rmse:1.78111
[387]	train-rmse:0.310325	test-rmse:1.78104
[388]	train-rmse:0.309886	test-rmse:1.78095
[389]	train-rmse:0.309004	test-rmse:1.78093
[390]	train-rmse:0.308134	test-rmse:1.78075
[391]	train-rmse:0.307077	test-rmse:1.78049
[392]	train-rmse:0.305968	test-rmse:1.78023
[393]	train-rmse:0.305396	test-rmse:1.78011
[394]	train-rmse:0.305186	test-rmse:1.78006
[395]	train-rmse:0.304746	test-rmse:1.78006
[396]	train-rmse:0.303103	test-rmse:1.77972
[397]	train-rmse:0.302661	test-rmse:1.77967
[398]	train-rmse:0.302042	test-rmse:1.77948
[399]	train-rmse:0.301301	test-rmse:1.77941
[400]	train-rmse:0.301088	test-rm

[565]	train-rmse:0.198384	test-rmse:1.75825
[566]	train-rmse:0.198225	test-rmse:1.75823
[567]	train-rmse:0.197936	test-rmse:1.75817
[568]	train-rmse:0.197702	test-rmse:1.75819
[569]	train-rmse:0.197016	test-rmse:1.75802
[570]	train-rmse:0.19666	test-rmse:1.75799
[571]	train-rmse:0.1962	test-rmse:1.75791
[572]	train-rmse:0.195624	test-rmse:1.75783
[573]	train-rmse:0.195302	test-rmse:1.7578
[574]	train-rmse:0.195018	test-rmse:1.75778
[575]	train-rmse:0.194419	test-rmse:1.75774
[576]	train-rmse:0.194325	test-rmse:1.75773
[577]	train-rmse:0.193871	test-rmse:1.75769
[578]	train-rmse:0.193727	test-rmse:1.75769
[579]	train-rmse:0.193061	test-rmse:1.75748
[580]	train-rmse:0.192529	test-rmse:1.75741
[581]	train-rmse:0.192344	test-rmse:1.75739
[582]	train-rmse:0.191656	test-rmse:1.75733
[583]	train-rmse:0.191404	test-rmse:1.75727
[584]	train-rmse:0.190779	test-rmse:1.75719
[585]	train-rmse:0.190358	test-rmse:1.75718
[586]	train-rmse:0.190026	test-rmse:1.75708
[587]	train-rmse:0.189632	test-rmse:

[752]	train-rmse:0.130804	test-rmse:1.74792
[753]	train-rmse:0.130564	test-rmse:1.74788
[754]	train-rmse:0.130329	test-rmse:1.74786
[755]	train-rmse:0.130173	test-rmse:1.74787
[756]	train-rmse:0.129664	test-rmse:1.7478
[757]	train-rmse:0.129314	test-rmse:1.74778
[758]	train-rmse:0.129034	test-rmse:1.74772
[759]	train-rmse:0.128552	test-rmse:1.74758
[760]	train-rmse:0.128521	test-rmse:1.74758
[761]	train-rmse:0.12818	test-rmse:1.74753
[762]	train-rmse:0.128067	test-rmse:1.74751
[763]	train-rmse:0.12781	test-rmse:1.74743
[764]	train-rmse:0.12763	test-rmse:1.74745
[765]	train-rmse:0.12717	test-rmse:1.74732
[766]	train-rmse:0.12702	test-rmse:1.74733
[767]	train-rmse:0.126682	test-rmse:1.74726
[768]	train-rmse:0.126403	test-rmse:1.74722
[769]	train-rmse:0.125947	test-rmse:1.74715
[770]	train-rmse:0.125647	test-rmse:1.74718
[771]	train-rmse:0.125404	test-rmse:1.74715
[772]	train-rmse:0.125316	test-rmse:1.74714
[773]	train-rmse:0.125028	test-rmse:1.74713
[774]	train-rmse:0.124885	test-rmse:1.

[940]	train-rmse:0.08911	test-rmse:1.74324
[941]	train-rmse:0.088954	test-rmse:1.74324
[942]	train-rmse:0.088681	test-rmse:1.74318
[943]	train-rmse:0.088571	test-rmse:1.74314
[944]	train-rmse:0.088499	test-rmse:1.74313
[945]	train-rmse:0.088368	test-rmse:1.74311
[946]	train-rmse:0.088159	test-rmse:1.74301
[947]	train-rmse:0.088086	test-rmse:1.74301
[948]	train-rmse:0.088011	test-rmse:1.743
[949]	train-rmse:0.087985	test-rmse:1.743
[950]	train-rmse:0.087901	test-rmse:1.74301
[951]	train-rmse:0.08781	test-rmse:1.743
[952]	train-rmse:0.087518	test-rmse:1.74291
[953]	train-rmse:0.087098	test-rmse:1.74284
[954]	train-rmse:0.087045	test-rmse:1.74284
[955]	train-rmse:0.086882	test-rmse:1.74282
[956]	train-rmse:0.086727	test-rmse:1.7428
[957]	train-rmse:0.086676	test-rmse:1.74279
[958]	train-rmse:0.086618	test-rmse:1.74279
[959]	train-rmse:0.086479	test-rmse:1.74279
[960]	train-rmse:0.086304	test-rmse:1.7428
[961]	train-rmse:0.086054	test-rmse:1.7428
[962]	train-rmse:0.085506	test-rmse:1.74274

In [38]:
preds, model = runXGB(train_X, train_y, test_X, num_rounds=3000)

In [78]:
out_df = pd.DataFrame(preds)
out_df.columns = ["reference"]
out_df.insert(0, "id", test_df["id"])
out_df.to_csv("sklearnNN_pro1.csv", index=False)