In [None]:
import os
import sys
import operator
import numpy as np
import pandas as pd
from scipy import sparse
import xgboost as xgb
from sklearn import model_selection, preprocessing, ensemble
from sklearn.metrics import log_loss
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.neural_network import MLPRegressor

In [None]:
def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, num_rounds=1000):
    param = {}
    param['objective'] = 'reg:linear'
    param['eta'] = 0.1
    param['max_depth'] = 10
    param['silent'] = 1
    param['min_child_weight'] = 1
    param['subsample'] = 0.9
    num_rounds = num_rounds

    plst = list(param.items())
    xgtrain = xgb.DMatrix(train_X, label=train_y)

    if test_y is not None:
        xgtest = xgb.DMatrix(test_X, label=test_y)
        watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
        model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=20)
    else:
        xgtest = xgb.DMatrix(test_X)
        model = xgb.train(plst, xgtrain, num_rounds)

    pred_test_y = model.predict(xgtest)
    return pred_test_y, model

In [None]:
data_path = ""
train_file = data_path + "save_train.csv"
test_file = data_path + "save_test.csv"
train_df = pd.read_csv(train_file)
test_df = pd.read_csv(test_file)
print(train_df.shape)
print(test_df.shape)

In [None]:
train_y = train_df["reference"]
train_X = train_df.iloc[0:,:-1]
test_X = test_df

In [None]:
clf = MLPRegressor()
clf.fit(train_X, train_y)
preds = clf.predict(test_X)

In [None]:
kf = model_selection.KFold(n_splits=5)
for dev_index, val_index in kf.split(range(train_X.shape[0])):
        dev_X, val_X = train_X.iloc[dev_index,:], train_X.iloc[val_index,:]
        dev_y, val_y = train_y[dev_index], train_y[val_index]
        preds, model = runXGB(dev_X, dev_y, val_X, val_y, num_rounds=1000)
        break

In [None]:
preds, model = runXGB(train_X, train_y, test_X, num_rounds=3000)

In [None]:
out_df = pd.DataFrame(preds)
out_df.columns = ["reference"]
out_df.insert(0, "id", test_df["id"])
out_df.to_csv("sklearnNN_pro1.csv", index=False)