In [14]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
from scipy.stats import mode
import csv
import matplotlib.dates
from datetime import *
from sklearn.preprocessing import *
from sklearn import ensemble
import xgboost as xgb
from sklearn import metrics
from xgboost.sklearn import XGBClassifier  
from sklearn.model_selection import GridSearchCV,cross_val_score  
import matplotlib.pylab as plt  

from sklearn.preprocessing import *
import xgboost as xgb
from sklearn import metrics
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, StratifiedKFold
from sklearn.externals import joblib


In [15]:
class XgbModel:
    def __init__(self, feaNames=None, params={}):
        self.feaNames = feaNames
        self.params = {
            'objective': 'binary:logistic',
            'eval_metric':'logloss',
            'silent': True,
            'eta': 0.1,
            'max_depth': 4,
            'gamma': 0.5,
            'subsample': 0.95,
            'colsample_bytree': 1,
            'min_child_weight': 8,
            'max_delta_step': 5,
            'lambda': 100,
        }
        for k,v in params.items():
            self.params[k] = v
        self.clf = None

    def train(self, X, y, train_size=1, test_size=0.1, verbose=True, num_boost_round=1000, early_stopping_rounds=3):
        X = X.astype(float)
        if train_size==1:
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
            X_train, y_train = X, y
        else:
            X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size)
        dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=self.feaNames)
        dval = xgb.DMatrix(X_test, label=y_test, feature_names=self.feaNames)
        watchlist = [(dtrain,'train'),(dval,'val')]
        clf = xgb.train(
            self.params, dtrain, 
            num_boost_round = num_boost_round, 
            evals = watchlist, 
            early_stopping_rounds = early_stopping_rounds,
            verbose_eval=verbose
        )
        self.clf = clf

    def trainCV(self, X, y, nFold=3, verbose=True, num_boost_round=1500, early_stopping_rounds=10):
        X = X.astype(float)
        dtrain = xgb.DMatrix(X, label=y, feature_names=self.feaNames)
        cvResult = xgb.cv(
            self.params, dtrain, 
            num_boost_round = num_boost_round, 
            nfold = nFold,
            early_stopping_rounds = early_stopping_rounds,
            verbose_eval=verbose
        )
        clf = xgb.train(
            self.params, dtrain, 
            num_boost_round = cvResult.shape[0], 
        )
        self.clf = clf

    def gridSearch(self, X, y, nFold=3, verbose=1, num_boost_round=130):
        paramsGrids = {
            # 'n_estimators': [50+5*i for i in range(0,30)],
            'gamma': [0,0.01,0.05,0.1,0.5,1,5,10,50,100],
            # 'max_depth': list(range(3,10)),
            'min_child_weight': list(range(0,10)),
            'subsample': [1-0.05*i for i in range(0,8)],
            'colsample_bytree': [1-0.05*i for i in range(0,10)],
            # 'reg_alpha': [0+2*i for i in range(0,10)],
            'reg_lambda': [0+50*i for i in range(0,10)],            
            'max_delta_step': [0+1*i for i in range(0,8)],
        }
        for k,v in paramsGrids.items():
            gsearch = GridSearchCV(
                estimator = xgb.XGBClassifier(
                    max_depth = self.params['max_depth'], 
                    gamma = self.params['gamma'],
                    learning_rate = self.params['eta'],
                    max_delta_step = self.params['max_delta_step'],
                    min_child_weight = self.params['min_child_weight'],
                    subsample = self.params['subsample'],
                    colsample_bytree = self.params['colsample_bytree'],
                    silent = self.params['silent'],
                    reg_lambda = self.params['lambda'],
                    n_estimators = num_boost_round
                ),
                # param_grid = paramsGrids,
                param_grid = {k:v},
                scoring = 'neg_log_loss',
                cv = nFold,
                verbose = verbose,
                n_jobs = 4
            )
            gsearch.fit(X, y)
            print(pd.DataFrame(gsearch.cv_results_))
            print(gsearch.best_params_)
        exit()

    def predict(self, X):
        X = X.astype(float)
        return self.clf.predict(xgb.DMatrix(X, feature_names=self.feaNames))

    def getFeaScore(self, show=False):
        fscore = self.clf.get_score()
        feaNames = fscore.keys()
        scoreDf = pd.DataFrame(index=feaNames, columns=['importance'])
        for k,v in fscore.items():
            scoreDf.loc[k, 'importance'] = v
        if show:
            print(scoreDf.sort_index(by=['importance'], ascending=False))
        return scoreDf

# 划分训练集和测试集
def trainTestSplit(df, splitDate=pd.to_datetime('2018-09-23'), trainPeriod=3, testPeriod=1):
    trainDf = df[(df.context_timestamp<splitDate)&(df.context_timestamp>=splitDate-timedelta(days=trainPeriod))]
    testDf = df[(df.context_timestamp>=splitDate)&(df.context_timestamp<splitDate+timedelta(days=testPeriod))]
    return (trainDf, testDf)


# 统计预测误差
def countDeltaY(predictSeries, labelSeries, show=True, title='', subplot=None):
    deltaSeries = predictSeries - labelSeries
    if subplot!=None:
        plt.subplot(subplot[0], subplot[1], subplot[2])
    deltaSeries.plot(style='b-')
    plt.title(title)
    if show:
        plt.show()
    return deltaSeries

# 获取stacking下一层数据集
def getOof(clf, trainX, trainY, testX, nFold=5, stratify=False):
    oofTrain = np.zeros(trainX.shape[0])
    oofTest = np.zeros(testX.shape[0])
    oofTestSkf = np.zeros((testX.shape[0], nFold))
    if stratify:
        kf = StratifiedKFold(n_splits=nFold, shuffle=True)
    else:
        kf = KFold(n_splits=nFold, shuffle=True)
    for i, (trainIdx, testIdx) in enumerate(kf.split(trainX, trainY)):
        kfTrainX = trainX[trainIdx]
        kfTrainY = trainY[trainIdx]
        kfTestX = trainX[testIdx]
        clf.trainCV(kfTrainX, kfTrainY, verbose=False)
        oofTrain[testIdx] = clf.predict(kfTestX)
        oofTestSkf[:,i] = clf.predict(testX)
    oofTest[:] = oofTestSkf.mean(axis=1)
    return oofTrain, oofTest


In [16]:
#首先导入滑窗训练集数据进行线下数据集划分
train_df = pd.read_csv('~/kengkeng/alimama/data/fusai_a_train_df_weilai.csv')
test_df = pd.read_csv('~/kengkeng/alimama/data/fusai_a_test_df_weilai.csv')

print(train_df.info())
print(test_df.info())

train_df['is_special'] = 0
train_df['is_special'][train_df.day == 7] = 1
test_df['is_special'] = 1

print(train_df.columns.values)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1629767 entries, 0 to 1629766
Data columns (total 89 columns):
instance_id                                1629767 non-null int64
item_id                                    1629767 non-null int64
item_brand_id                              1629767 non-null int64
item_city_id                               1629767 non-null int64
item_price_level                           1629767 non-null int64
item_sales_level                           1629767 non-null int64
item_collected_level                       1629767 non-null int64
item_pv_level                              1629767 non-null int64
user_id                                    1629767 non-null int64
user_gender_id                             1629767 non-null int64
user_age_level                             1629767 non-null int64
user_occupation_id                         1629767 non-null int64
user_star_level                            1629767 non-null int64
context_id                   

['instance_id' 'item_id' 'item_brand_id' 'item_city_id' 'item_price_level'
 'item_sales_level' 'item_collected_level' 'item_pv_level' 'user_id'
 'user_gender_id' 'user_age_level' 'user_occupation_id' 'user_star_level'
 'context_id' 'context_timestamp' 'context_page_id' 'shop_id'
 'shop_review_num_level' 'shop_review_positive_rate' 'shop_star_level'
 'shop_score_service' 'shop_score_delivery' 'shop_score_description'
 'is_trade' 'date' 'weekday' 'day' 'hour' 'match_category_proportion'
 'match_property_proportion' 'predict_category_number'
 'predict_property_number' 'isFirstCategoryIn' 'isLastCategoryIn'
 'category_number' 'property_number' 'all_item_brand_id_click_number'
 'all_item_brand_id_buy_number' 'history_item_brand_id_rate'
 'history_item_brand_id_smooth_rate' 'all_shop_id_click_number'
 'all_shop_id_buy_number' 'history_shop_id_rate'
 'history_shop_id_smooth_rate' 'all_item_id_click_number'
 'all_item_id_buy_number' 'history_item_id_rate'
 'history_item_id_smooth_rate' 'lastOn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [17]:
fea = [
       
     'item_brand_id', 'item_city_id', 'item_price_level', 'item_sales_level',
     'item_collected_level', 'item_pv_level', 'user_gender_id',
     'user_age_level', 'user_occupation_id', 'user_star_level',
     'context_page_id', 
     'shop_review_num_level', 'shop_review_positive_rate',
     'shop_star_level', 'shop_score_service', 'shop_score_delivery',
     'shop_score_description',
     'match_category_proportion', 'match_property_proportion', 'predict_category_number',
     'predict_property_number', 'isFirstCategoryIn', 'isLastCategoryIn',
     'category_number', 'property_number',
     'all_item_brand_id_click_number',
     'all_item_brand_id_buy_number',
     'history_item_brand_id_smooth_rate', 'all_shop_id_click_number',
     'all_shop_id_buy_number',
     'history_shop_id_smooth_rate', 'all_item_id_click_number',
     'all_item_id_buy_number',
     'history_item_id_smooth_rate', 'lastOneHour_sameItem_count',
     'lastOneHour_sameFirstCategory_count', 'lastOneHour_sameLastCategory_count',
     'lastOneHour_sameBrand_count', 'lastOneHour_sameShop_count',
     'isLastOneHour_firstClickItem',
     'is_special', 'hour',
     'userItem_lastClickDeltaTime',
     'userBrand_lastClickDeltaTime', 'userShop_lastClickDeltaTime',
     'userFirstCategory_lastClickDeltaTime',
     'userLastCategory_lastClickDeltaTime', 'hour_rate', 
     'user_id_converse_smooth_rate', 'user_id_total_number', 'user_id_buy_number',
     'item_brand_id_converse_smooth_rate', 'item_brand_id_total_number',
     'item_brand_id_buy_number', 'item_id_converse_smooth_rate',
     'item_id_total_number', 'item_id_buy_number', 'shop_id_converse_smooth_rate',
     'shop_id_total_number', 'shop_id_buy_number',
     'real_last_category_converse_smooth_rate',
     'real_last_category_total_number', 'real_last_category_buy_number',
     'is_later_clickSameItem', 'is_later_clickSameLastCategory', 
     'later_clickSameItem_count', 'later_clickSameLastCategory_count',
     'later_clickSameItem_deltaTime', 'later_clickSameLastCategory_deltaTime',
#      'shop_item_classNumber', 'brand_item_classNumber', 'city_item_classNumber',
#      'shop_user_classNumber', 'brand_user_classNumber', 'city_user_classNumber'
    
      ]


In [18]:
xgbModel = XgbModel(feaNames=fea)
modelName = "xgb_fusai_a"

# 正式模型
startTime = datetime.now()
xgbModel.trainCV(train_df[fea].values, train_df['is_trade'].values)
xgbModel.getFeaScore(show=True)
print('training time: ', datetime.now()-startTime)


[0]	train-logloss:0.61071+2.73537e-05	test-logloss:0.610713+1.93276e-05
[1]	train-logloss:0.543257+6.45824e-05	test-logloss:0.543262+2.34141e-05
[2]	train-logloss:0.487194+8.92051e-05	test-logloss:0.487204+3.49698e-05
[3]	train-logloss:0.440031+9.09078e-05	test-logloss:0.440044+6.88686e-05
[4]	train-logloss:0.399977+0.000107757	test-logloss:0.399995+8.36341e-05
[5]	train-logloss:0.365739+0.000118136	test-logloss:0.365761+0.000104337
[6]	train-logloss:0.336301+0.000124328	test-logloss:0.336327+0.000129869
[7]	train-logloss:0.310881+0.000137494	test-logloss:0.310912+0.000141989
[8]	train-logloss:0.288845+0.000147901	test-logloss:0.288882+0.000159162
[9]	train-logloss:0.269686+0.000155401	test-logloss:0.269723+0.000176954
[10]	train-logloss:0.253+0.000165	test-logloss:0.253041+0.000194622
[11]	train-logloss:0.238431+0.00017134	test-logloss:0.238474+0.000213433
[12]	train-logloss:0.225697+0.000178405	test-logloss:0.225742+0.000231301
[13]	train-logloss:0.214551+0.000183773	test-logloss:0.2

[112]	train-logloss:0.132111+0.000284274	test-logloss:0.132794+0.0005176
[113]	train-logloss:0.132091+0.000285156	test-logloss:0.13278+0.00052127
[114]	train-logloss:0.132065+0.000293769	test-logloss:0.132762+0.000511722
[115]	train-logloss:0.132044+0.000294058	test-logloss:0.132749+0.000512266
[116]	train-logloss:0.132028+0.000293391	test-logloss:0.132736+0.000512648
[117]	train-logloss:0.132002+0.000292671	test-logloss:0.132715+0.000514044
[118]	train-logloss:0.131984+0.00029039	test-logloss:0.132703+0.000515564
[119]	train-logloss:0.131965+0.000289927	test-logloss:0.132689+0.000517243
[120]	train-logloss:0.131945+0.000292233	test-logloss:0.132672+0.000513142
[121]	train-logloss:0.131928+0.000292858	test-logloss:0.13266+0.000512086
[122]	train-logloss:0.131913+0.000295527	test-logloss:0.132651+0.000508365
[123]	train-logloss:0.131894+0.0003013	test-logloss:0.132639+0.000505613
[124]	train-logloss:0.131877+0.000299262	test-logloss:0.132625+0.00050766
[125]	train-logloss:0.131854+0.000

[222]	train-logloss:0.130735+0.00030074	test-logloss:0.13199+0.000529979
[223]	train-logloss:0.130726+0.000300892	test-logloss:0.131986+0.000527578
[224]	train-logloss:0.13072+0.000298579	test-logloss:0.131982+0.000529271
[225]	train-logloss:0.130704+0.000300436	test-logloss:0.131971+0.00052624
[226]	train-logloss:0.130691+0.000297497	test-logloss:0.131964+0.000528299
[227]	train-logloss:0.130685+0.00029796	test-logloss:0.131961+0.0005291
[228]	train-logloss:0.130676+0.000295493	test-logloss:0.131958+0.000530272
[229]	train-logloss:0.130669+0.000297355	test-logloss:0.131957+0.000531333
[230]	train-logloss:0.130659+0.000296428	test-logloss:0.131952+0.000530702
[231]	train-logloss:0.130653+0.000296892	test-logloss:0.131949+0.000530347
[232]	train-logloss:0.130645+0.000296727	test-logloss:0.131945+0.000529809
[233]	train-logloss:0.130632+0.000294259	test-logloss:0.131938+0.00053258
[234]	train-logloss:0.130625+0.000295494	test-logloss:0.131936+0.000531411
[235]	train-logloss:0.130618+0.00

[332]	train-logloss:0.129905+0.000274657	test-logloss:0.131672+0.00054036
[333]	train-logloss:0.129898+0.0002743	test-logloss:0.13167+0.000539706
[334]	train-logloss:0.129892+0.000275197	test-logloss:0.131668+0.000539404
[335]	train-logloss:0.129885+0.000272632	test-logloss:0.131666+0.000539853
[336]	train-logloss:0.129876+0.000272632	test-logloss:0.131662+0.000540912
[337]	train-logloss:0.129872+0.000270748	test-logloss:0.131661+0.000540912
[338]	train-logloss:0.129863+0.000269316	test-logloss:0.131661+0.000540605
[339]	train-logloss:0.129854+0.000268961	test-logloss:0.131659+0.000542853
[340]	train-logloss:0.129842+0.000270231	test-logloss:0.131654+0.000541205
[341]	train-logloss:0.129837+0.000271028	test-logloss:0.131652+0.00054181
[342]	train-logloss:0.129831+0.000269884	test-logloss:0.13165+0.000541362
[343]	train-logloss:0.129823+0.000271099	test-logloss:0.131648+0.000540979
[344]	train-logloss:0.129818+0.000269192	test-logloss:0.131646+0.000541618
[345]	train-logloss:0.129812+0.

[442]	train-logloss:0.129236+0.000278555	test-logloss:0.1315+0.000544768
[443]	train-logloss:0.12923+0.00027572	test-logloss:0.131499+0.00054575
[444]	train-logloss:0.129225+0.000277331	test-logloss:0.131498+0.000544775
[445]	train-logloss:0.129221+0.00027787	test-logloss:0.131497+0.000544238
[446]	train-logloss:0.129215+0.000276734	test-logloss:0.131498+0.000544684
[447]	train-logloss:0.12921+0.000278555	test-logloss:0.131495+0.000543437
[448]	train-logloss:0.129205+0.000279824	test-logloss:0.131494+0.000542281
[449]	train-logloss:0.129198+0.000282402	test-logloss:0.131493+0.000541835
[450]	train-logloss:0.129192+0.000282059	test-logloss:0.131491+0.000541389
[451]	train-logloss:0.129184+0.000281717	test-logloss:0.13149+0.000541926
[452]	train-logloss:0.129179+0.000282163	test-logloss:0.131488+0.000542373
[453]	train-logloss:0.129173+0.000281261	test-logloss:0.131487+0.0005429
[454]	train-logloss:0.129166+0.000279678	test-logloss:0.131486+0.000544948
[455]	train-logloss:0.129159+0.0002

[553]	train-logloss:0.128651+0.000266014	test-logloss:0.131394+0.000549948
[554]	train-logloss:0.128646+0.000268778	test-logloss:0.131393+0.000549687
[555]	train-logloss:0.128642+0.000268435	test-logloss:0.131393+0.000549147
[556]	train-logloss:0.128636+0.000268795	test-logloss:0.131392+0.00054924
[557]	train-logloss:0.128631+0.000268795	test-logloss:0.131391+0.000549333
[558]	train-logloss:0.128627+0.000271206	test-logloss:0.131391+0.00054924
[559]	train-logloss:0.12862+0.000272177	test-logloss:0.131389+0.00054924
[560]	train-logloss:0.128616+0.000273175	test-logloss:0.131389+0.000548886
[561]	train-logloss:0.128612+0.000273296	test-logloss:0.131388+0.00054898
[562]	train-logloss:0.128608+0.000273738	test-logloss:0.131388+0.000547826
[563]	train-logloss:0.128604+0.000273013	test-logloss:0.131387+0.000547732
[564]	train-logloss:0.128599+0.000272289	test-logloss:0.131386+0.000548086
[565]	train-logloss:0.128593+0.00027334	test-logloss:0.131386+0.000548347
[566]	train-logloss:0.128589+0.

[663]	train-logloss:0.128138+0.000283394	test-logloss:0.131335+0.000552349
[664]	train-logloss:0.128135+0.000283804	test-logloss:0.131334+0.000552262
[665]	train-logloss:0.128132+0.000283701	test-logloss:0.131333+0.000551991
[666]	train-logloss:0.128126+0.000284705	test-logloss:0.131333+0.000550657
[667]	train-logloss:0.128123+0.000284255	test-logloss:0.131333+0.000551101
[668]	train-logloss:0.128117+0.000285501	test-logloss:0.131331+0.000550657
[669]	train-logloss:0.128112+0.000287338	test-logloss:0.131331+0.000549496
[670]	train-logloss:0.128106+0.000286786	test-logloss:0.13133+0.000548423
[671]	train-logloss:0.128101+0.000287338	test-logloss:0.13133+0.000547533
[672]	train-logloss:0.128097+0.000288729	test-logloss:0.131331+0.000547002
[673]	train-logloss:0.128094+0.00028928	test-logloss:0.131332+0.000547533
[674]	train-logloss:0.128091+0.000289876	test-logloss:0.131332+0.000546819
[675]	train-logloss:0.128086+0.000290077	test-logloss:0.131332+0.000547709
[676]	train-logloss:0.128082

[773]	train-logloss:0.127643+0.000283402	test-logloss:0.131294+0.000558257
[774]	train-logloss:0.127638+0.000284284	test-logloss:0.131294+0.000558607
[775]	train-logloss:0.127633+0.000283998	test-logloss:0.131293+0.000559254
[776]	train-logloss:0.127628+0.000284077	test-logloss:0.131292+0.000559603
[777]	train-logloss:0.127624+0.000283635	test-logloss:0.131292+0.000560052
[778]	train-logloss:0.12762+0.000285324	test-logloss:0.131291+0.000559353
[779]	train-logloss:0.127614+0.000282596	test-logloss:0.131289+0.0005618
[780]	train-logloss:0.127609+0.000281063	test-logloss:0.131289+0.000561003
[781]	train-logloss:0.127602+0.000279011	test-logloss:0.131289+0.000562148
[782]	train-logloss:0.127596+0.000279011	test-logloss:0.131288+0.000562497
[783]	train-logloss:0.127591+0.000279973	test-logloss:0.131289+0.000562745
[784]	train-logloss:0.127588+0.000279973	test-logloss:0.131289+0.000562745
[785]	train-logloss:0.127584+0.000281583	test-logloss:0.131289+0.000562396
[786]	train-logloss:0.12758+



In [19]:
# 开始预测
test_df.loc[:,'predicted_score'] = xgbModel.predict(test_df[fea].values)
print('predicting time: ', datetime.now()-startTime)
print("预测结果：\n",test_df[['instance_id','predicted_score']].head())
print('预测均值：', test_df['predicted_score'].mean())


predicting time:  0:24:02.682356
预测结果：
        instance_id  predicted_score
0   93294255633855         0.050628
1  558322259509454         0.006850
2  594573634113186         0.029492
3  667327653735176         0.040187
4  697732672924394         0.027147
预测均值： 0.047942910343408585


In [20]:
# 生成stacking数据集
train_df['predicted_score'] = np.nan
test_df['predicted_score'] = np.nan
train_df.loc[:,'predicted_score'], test_df.loc[:,'predicted_score'] = getOof(xgbModel, train_df[fea].values, train_df['is_trade'].values, test_df[fea].values, stratify=True)
print('oof training time: ', datetime.now()-startTime)
xgbModel.getFeaScore(show=True)
cost = metrics.log_loss(train_df['is_trade'].values, train_df['predicted_score'].values)
print('train loss: ', cost)



oof training time:  2:06:51.905442
                                        importance
history_item_id_smooth_rate                    539
later_clickSameLastCategory_deltaTime          382
item_sales_level                               303
item_id_converse_smooth_rate                   265
shop_score_description                         242
shop_id_converse_smooth_rate                   238
hour_rate                                      236
property_number                                229
userFirstCategory_lastClickDeltaTime           221
history_shop_id_smooth_rate                    216
shop_score_delivery                            209
all_item_id_click_number                       197
item_brand_id                                  194
shop_review_positive_rate                      194
user_star_level                                193
shop_score_service                             193
item_id_total_number                           181
item_collected_level                           



train loss:  0.131245255175


In [21]:
print('7th train loss', metrics.log_loss(train_df.loc[train_df.is_special == 1,'is_trade'].values, train_df.loc[train_df.is_special == 1,'predicted_score'].values))
print('train predict: \n',train_df[['instance_id','predicted_score']].head())
print('train predict aver:', train_df['predicted_score'].mean())
print('7th train predict aver:', train_df.loc[train_df.is_special == 1,'predicted_score'].mean())
print('test predict: \n',test_df[['instance_id','predicted_score']].head())
print('test predict aver:', test_df['predicted_score'].mean())


7th train loss 0.170825609474
train predict: 
        instance_id  predicted_score
0  135318204268396         0.020234
1  467064713704441         0.020830
2  479649319359792         0.078572
3  535627308908402         0.058819
4  561801501778309         0.012778
train predict aver: 0.034565499365660994
7th train predict aver: 0.04610285899991572
test predict: 
        instance_id  predicted_score
0   93294255633855         0.051177
1  558322259509454         0.008283
2  594573634113186         0.026079
3  667327653735176         0.041750
4  697732672924394         0.026691
test predict aver: 0.047779947151931464


In [22]:
# 导出预测结果
def exportResult(df, fileName):
    df.to_csv('~/kengkeng/alimama/result/%s.txt' % fileName, sep=' ', header=True, index=False)

exportResult(test_df[['instance_id', 'predicted_score']], 'fusai_xgb_5_6_wen')
