In [1]:
import numpy as np
import pandas as pd
import time
import xgboost as xgb
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import OneHotEncoder



In [2]:
import matplotlib as plt
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['font.serif'] = ['SimHei']  #中文字符显示规范
import seaborn as sns
%matplotlib inline
p = sns.color_palette()
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})

## 用户信息

In [3]:
train = pd.read_csv("D:/rong360DC/creditData/feature/train_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/test_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
# test表中的用户标签为NAN,这里用-1来填充
test=test.fillna(-1)

# retset_index可以还原索引，重新变为默认的整型索引,添加了新的index行,若不添加则不会出现index索引
dataset=pd.concat([train, test], axis=0).reset_index()
# dataset.shape =(2000, 8)
label=dataset[['用户标识', '标签']]
dataset=dataset.drop(['index', '用户标识'], axis=1)
# (2000, 6)


dataset=pd.get_dummies(dataset,columns=['用户性别','用户职业','用户教育程度','用户婚姻状态','用户户口类型']).drop(['标签'],axis=1)
# 哑变量处理
# dataset.shape =(2000, 24)
# 特征工程扩展
from sklearn.preprocessing import PolynomialFeatures
poly=PolynomialFeatures(interaction_only=True)
# interaction_only=True 表示变量自身和自身不会做运算
dataset=poly.fit_transform(dataset)
df = pd.DataFrame(dataset.reshape(2000,301))
df.interpolate(axis=1).values.reshape(dataset.shape)
# interpolate是作插值处理的意思

trains=pd.concat([label, df], axis=1)
# trains.shape (2000, 303)

# 用户浏览行为训练表

In [4]:
trains1 = trains
train = pd.read_csv("D:/rong360DC/creditData/feature/userbrowsetest_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/userbrowsetrain_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
dataset=pd.concat([train, test], axis=0)

temp = dataset[:]
temp.drop('放款时间', axis=1, inplace=True)
print(temp.shape)
dataset=dataset.fillna(-1)
temp['用户浏览行为缺失统计']=(dataset==-1).sum(axis=1)

# 用均值填充缺失值
temp.fillna(0, inplace=True)
print(temp.shape)

trains=pd.merge(trains, temp, how='left', on='用户标识')
print(trains.shape)

(2000, 39)
(2000, 40)
(2000, 342)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


## 用户银行流水记录表

In [5]:
trains2 = trains
train = pd.read_csv("D:/rong360DC/creditData/feature/bankloantrain_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/bankloantest_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径

dataset = pd.concat([train, test], axis=0)
temp=dataset[:]

temp.drop('放款时间', axis=1, inplace=True)
print(temp.shape)

dataset=dataset.fillna(-1)
temp['用户银行流水记录缺失统计']=(dataset==-1).sum(axis=1)

temp.fillna(0, inplace=True)
print(temp.shape)
trains=pd.merge(trains, temp, how='left', on='用户标识')
print(trains.shape)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


(2000, 25)
(2000, 26)
(2000, 367)


## 用户账单表初级特征

In [6]:
trains3 = trains

train = pd.read_csv("D:/rong360DC/creditData/feature/chujiueserbilltrain_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/chujiueserbilltest_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径

dataset = pd.concat([train, test], axis=0)

temp=dataset[:]

temp.drop('放款时间', axis=1, inplace=True)

print(temp.shape)

# 统计行和列的缺失值数
dataset=dataset.fillna(-1)

temp['用户账单表初级特征缺失统计']=(dataset==-1).sum(axis=1)

# 特征轮：构造放款后和放款前差值特征
feature_name=temp.columns
for name in feature_name:
    if name.find("款后")>0:
        x1=name.replace('款后', '款前')
        if x1 in feature_name:
            temp[name + '与' + x1 + '差值'] = temp[name]-temp[x1]

temp.fillna(0, inplace=True)

print(temp.shape)

trains=pd.merge(trains, temp, how='left', on='用户标识')
print(trains.shape)

(2000, 56)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


(2000, 79)
(2000, 445)


## 用户账单表特征

In [7]:
trains4 = trains[:]

train = pd.read_csv("D:/rong360DC/creditData/feature/userbilltrainkonwtime_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/userbilltestkonwtime_20190303_A.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径

dataset = pd.concat([train, test], axis=0)
temp=dataset[:]
temp.drop("放款时间", axis=1, inplace=True)
print(temp.shape)

dataset=dataset.fillna(-1)

temp['用户账单表特征_时间已知缺失统计']=(dataset==-1).sum(axis=1)

feature_name = temp.columns

for name in feature_name:
    if name.find('款后') > 0:
        x1 = name.replace('款后', '款前')
        if x1 in feature_name:
            temp[name + '与' + x1 + '差值']=temp[name] - temp[x1]

# 填充缺失值
temp.fillna(0, inplace=True)
print(temp.shape)

trains=pd.merge(trains, temp, how='left', on='用户标识')

print(trains.shape)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


(2000, 473)
(2000, 706)
(2000, 1150)


## 用户账单表特征训练表

In [8]:
trains5 = trains[:]

train = pd.read_csv("D:/rong360DC/creditData/feature/userloantrain_20170203_unknow.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/userloantest_20170203_unknow.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径

print(train.shape)

# 在时间未知上期金额账单金额sum为空的地方的值为True
train已知=train[train['时间未知上期账单金额sum'].isnull().values==True]

print(train已知.shape)

# 在时间未知上期金额账单金额sum为空的地方的值为False
train未知=train[train['时间未知上期账单金额sum'].isnull().values==False]

print(train未知.shape)

# test_size:样本占比，如果是整数的话就是样本的数量，random_state:是随机数的种子
t1, t2=train_test_split(train未知, test_size=0.5, random_state=36)

print(t1.shape)

train = pd.concat([train已知, t1], axis=0)
print(train.shape)

(1000, 241)
(488, 241)
(512, 241)
(256, 241)
(744, 241)


In [9]:
dataset = pd.concat([train, test], axis=0)

temp=dataset[:]

print(temp.shape)

dataset=dataset.fillna(-1)

temp['用户账单表特征_时间未知缺失统计']=(dataset==-1).sum(axis=1)

temp.fillna(0, inplace=True)

print(temp.shape)


trains=pd.merge(trains, temp, how='inner', on='用户标识')
print(trains.shape)

(1744, 241)
(1744, 242)
(1744, 1391)


In [10]:
train未知.to_csv("D:/rong360DC/creditData/feature/billtimefeatureunknow_20190303.csv",index=None,encoding="gb2312")

## 用户账单表特征训练表

In [11]:
trains6 = trains[:]


train = pd.read_csv("D:/rong360DC/creditData/feature/userloanfeaturetrain_20190303_all.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径
test = pd.read_csv("D:/rong360DC/creditData/feature/userloanfeaturetest_20190303_all.csv",encoding="gb2312", nrows=1000) # 注意自己数据路径

dataset = pd.concat([train, test], axis=0)

temp = dataset[:]

print(temp.shape)

dataset = dataset.fillna(-1)

temp['用户账单表特征_时间未知缺失统计']=(dataset==-1).sum(axis=1)

temp.fillna(0, inplace=True)

print(temp.shape)

trains=pd.merge(trains, temp, how='left', on='用户标识')
print(trains.shape)



(2000, 241)
(2000, 242)
(1744, 1632)


# 疑问：在变量中加入一个时间未知缺失统计的用处？？

In [12]:
trains.to_csv("D:/rong360DC/creditData/feature/featturesum_20190303_A.csv",index=None,encoding="gb2312")

dataset= pd.read_csv("D:/rong360DC/creditData/feature/featturesum_20190303_A.csv",encoding="gb2312") # 注意自己数据路径

beifen = 0
trains = 0

from sklearn.preprocessing import MinMaxScaler

names = dataset.columns.tolist()

print(len(names))

print(len(set(names)))

myset = set(names)

for item in myset:
    if names.count(item)>1:
        print(item)
        dataset=dataset.drop(item, axis=1)

print(dataset.shape)

1632
1632
(1744, 1632)


In [13]:
tests = dataset[(dataset['标签']<0)]

trains = dataset[(dataset['标签']>=0)]

#时间未知= pd.read_csv("D:/rong360DC/creditData/feature/billtimefeatureunknow_20190303.csv",encoding="gb2312", nrows=1000)[['用户标识','时间未知上期账单金额sum']]# 注意自己数据路径

#t1,t2=train_test_split(时间未知, test_size = 0.5,random_state=36)#

#trains=pd.merge(trains,t1,how='left', on = "用户标识")#


trains.shape

(744, 1632)

In [14]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['font.serif'] = ['SimHei']
import seaborn as sns
%matplotlib inline
p = sns.color_palette()
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})

In [15]:
#t1=trains.columns
#t2=tests.columns
from sklearn import metrics
def ks(y_predicted, y_true):
    label=y_true
    #label = y_true.get_label()
    fpr,tpr,thres = metrics.roc_curve(label,y_predicted,pos_label=1)
    return 'ks',abs(fpr - tpr).max()

In [16]:
online_test=tests[:]
 
train,val = train_test_split(trains, test_size = 0.25,random_state=36)#41697/13886
offline_test=val[:]

print(tests.shape)
print(train.shape)
print(val.shape)

(1000, 1632)
(558, 1632)
(186, 1632)


In [17]:
start_time = time.time()
offline=0
online=0
params={'booster':'gbtree',
    'objective': 'rank:pairwise',
    'eval_metric':'auc',
    'gamma':0.1,
    'min_child_weight':1.1,
    'max_depth':7,
    'lambda':10,
    'subsample':0.7,
    'colsample_bytree':0.7,
    'colsample_bylevel':0.7,
    'eta': 0.01,
    'tree_method':'exact',
    'seed':1000,
    'nthread':12
    }

params1={
'booster':'gbtree',
'objective': 'binary:logistic',
'scale_pos_weight': 1/7.5,
#7183正样本
#55596条总样本
#差不多1:7.7这样子
'gamma':0.2,  # 用于控制是否后剪枝的参数,越大越保守，一般0.1、0.2这样子。
'max_depth':8, # 构建树的深度，越大越容易过拟合
'lambda':3,  # 控制模型复杂度的权重值的L2正则化项参数，参数越大，模型越不容易过拟合。
'subsample':0.7, # 随机采样训练样本
#'colsample_bytree':0.7, # 生成树时进行的列采样
'min_child_weight':3, 
# 这个参数默认是 1，是每个叶子里面 h 的和至少是多少，对正负样本不均衡时的 0-1 分类而言
#，假设 h 在 0.01 附近，min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。
#这个参数非常影响结果，控制叶子节点中二阶导的和的最小值，该参数值越小，越容易 overfitting。 
'silent':0 ,#设置成1则没有运行信息输出，最好是设置为0.
'eta': 0.03, # 如同学习率
'seed':1000,
'nthread':12,# cpu 线程数
'eval_metric': 'auc'
}

plst = list(params.items())
num_rounds = 5000 # 迭代次数

y = train.标签
X = train.drop(['标签','用户标识'],axis=1)
#X=train[feature_list]

val_y = val.标签
val_X = val.drop(['标签','用户标识'],axis=1)
#val_X = val[feature_list]

offline_test_X=offline_test.drop(['标签','用户标识'],axis=1)
online_test_X=online_test.drop(['标签','用户标识'],axis=1)
#offline_test_X=offline_test[feature_list]
#online_test_X=online_test[feature_list]

xgb_train = xgb.DMatrix(X, label=y)
xgb_val = xgb.DMatrix(val_X,label=val_y)

xgb_offline_test = xgb.DMatrix(offline_test_X)
xgb_online_test = xgb.DMatrix(online_test_X)

# return 训练和验证的错误率
watchlist = [(xgb_train, 'train'),(xgb_val, 'val')]

print ("跑到这里了xgb.train")

# training model 
# early_stopping_rounds 当设置的迭代次数较大时，early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练
model = xgb.train(plst, xgb_train,num_boost_round=7000,evals=watchlist,early_stopping_rounds=500)
print ("跑到这里了save_model")


model.save_model('D:/rong360DC/creditData/model20190310_unkowndata.model') # 用于存储训练出的模型

print ("best best_ntree_limit",model.best_ntree_limit)   #did not save the best,why?
print ("best best_iteration",model.best_iteration) #get it?

print ("跑到这里了model.predict")

跑到这里了xgb.train
[08:49:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=7
[0]	train-auc:0.852954	val-auc:0.715949
Multiple eval metrics have been passed: 'val-auc' will be used for early stopping.

Will train until val-auc hasn't improved in 500 rounds.
[08:49:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=7
[1]	train-auc:0.911889	val-auc:0.729641
[08:49:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=7
[2]	train-auc:0.913207	val-auc:0.714972
[08:49:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 0 pruned nodes, max_depth=7
[3]	train-auc:0.915197	val-auc:0.712927
[08:49:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: t

[08:49:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 0 pruned nodes, max_depth=7
[42]	train-auc:0.966024	val-auc:0.713193
[08:49:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=7
[43]	train-auc:0.967318	val-auc:0.714616
[08:49:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 0 pruned nodes, max_depth=7
[44]	train-auc:0.967541	val-auc:0.712127
[08:49:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 0 pruned nodes, max_depth=7
[45]	train-auc:0.967417	val-auc:0.713016
[08:49:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 0 pruned nodes, max_depth=7
[46]	train-auc:0.966895	val-auc:0.714616
[08:49:35] C:\Users\Administrator\Deskto

[08:49:37] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=7
[84]	train-auc:0.973536	val-auc:0.713727
[08:49:37] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 0 pruned nodes, max_depth=7
[85]	train-auc:0.973635	val-auc:0.715683
[08:49:37] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 0 pruned nodes, max_depth=7
[86]	train-auc:0.974182	val-auc:0.715505
[08:49:37] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 0 pruned nodes, max_depth=7
[87]	train-auc:0.974655	val-auc:0.715327
[08:49:37] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 0 pruned nodes, max_depth=7
[88]	train-auc:0.974531	val-auc:0.714794
[08:49:37] C:\Users\Administrator\Deskto

[08:49:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=7
[127]	train-auc:0.978137	val-auc:0.717105
[08:49:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=7
[128]	train-auc:0.978535	val-auc:0.719417
[08:49:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=7
[129]	train-auc:0.978336	val-auc:0.719061
[08:49:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 0 pruned nodes, max_depth=7
[130]	train-auc:0.978212	val-auc:0.717461
[08:49:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 0 pruned nodes, max_depth=7
[131]	train-auc:0.978361	val-auc:0.717816
[08:49:38] C:\Users\Administrator\D

[169]	train-auc:0.981395	val-auc:0.717461
[08:49:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 0 pruned nodes, max_depth=7
[170]	train-auc:0.981545	val-auc:0.717639
[08:49:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 0 pruned nodes, max_depth=7
[171]	train-auc:0.981445	val-auc:0.717994
[08:49:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 0 pruned nodes, max_depth=7
[172]	train-auc:0.981545	val-auc:0.717816
[08:49:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=7
[173]	train-auc:0.981594	val-auc:0.717105
[08:49:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 0 pruned nodes, max_depth=7
[174]	train-auc:0.981619	val-auc:0.

[212]	train-auc:0.9853	val-auc:0.718172
[08:49:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 4 pruned nodes, max_depth=7
[213]	train-auc:0.985275	val-auc:0.717816
[08:49:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 4 pruned nodes, max_depth=7
[214]	train-auc:0.9853	val-auc:0.717639
[08:49:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 2 pruned nodes, max_depth=7
[215]	train-auc:0.985126	val-auc:0.718528
[08:49:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 0 pruned nodes, max_depth=7
[216]	train-auc:0.985375	val-auc:0.717283
[08:49:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 0 pruned nodes, max_depth=7
[217]	train-auc:0.985524	val-auc:0.7171

[255]	train-auc:0.989156	val-auc:0.718883
[08:49:43] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 2 pruned nodes, max_depth=7
[256]	train-auc:0.989106	val-auc:0.718706
[08:49:43] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 4 pruned nodes, max_depth=7
[257]	train-auc:0.989255	val-auc:0.719239
[08:49:43] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 6 pruned nodes, max_depth=7
[258]	train-auc:0.98923	val-auc:0.718883
[08:49:43] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 2 pruned nodes, max_depth=7
[259]	train-auc:0.989429	val-auc:0.719417
[08:49:43] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 0 pruned nodes, max_depth=7
[260]	train-auc:0.989255	val-auc:0.7

[298]	train-auc:0.99122	val-auc:0.718172
[08:49:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 2 pruned nodes, max_depth=7
[299]	train-auc:0.991344	val-auc:0.718528
[08:49:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 0 pruned nodes, max_depth=7
[300]	train-auc:0.991369	val-auc:0.718883
[08:49:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=7
[301]	train-auc:0.991419	val-auc:0.719239
[08:49:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 4 pruned nodes, max_depth=7
[302]	train-auc:0.991419	val-auc:0.718706
[08:49:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=7
[303]	train-auc:0.991344	val-auc:0.7

[08:49:46] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 4 pruned nodes, max_depth=7
[342]	train-auc:0.993434	val-auc:0.716038
[08:49:46] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 6 pruned nodes, max_depth=7
[343]	train-auc:0.993459	val-auc:0.716216
[08:49:46] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 2 pruned nodes, max_depth=7
[344]	train-auc:0.993508	val-auc:0.71675
[08:49:46] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 2 pruned nodes, max_depth=7
[345]	train-auc:0.993658	val-auc:0.71675
[08:49:46] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=7
[346]	train-auc:0.993757	val-auc:0.716216
[08:49:46] C:\Users\Administrator\Des

[08:49:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 2 pruned nodes, max_depth=7
[385]	train-auc:0.995647	val-auc:0.714794
[08:49:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 2 pruned nodes, max_depth=7
[386]	train-auc:0.995722	val-auc:0.714438
[08:49:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 2 pruned nodes, max_depth=7
[387]	train-auc:0.995821	val-auc:0.714438
[08:49:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 0 pruned nodes, max_depth=7
[388]	train-auc:0.995821	val-auc:0.714794
[08:49:47] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=7
[389]	train-auc:0.995821	val-auc:0.714616
[08:49:47] C:\Users\Administrator\D

[08:49:49] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 0 pruned nodes, max_depth=7
[428]	train-auc:0.996592	val-auc:0.713905
[08:49:49] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 2 pruned nodes, max_depth=7
[429]	train-auc:0.996543	val-auc:0.714438
[08:49:49] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 4 pruned nodes, max_depth=7
[430]	train-auc:0.996543	val-auc:0.713905
[08:49:49] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=7
[431]	train-auc:0.996642	val-auc:0.714082
[08:49:49] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 0 pruned nodes, max_depth=7
[432]	train-auc:0.996667	val-auc:0.714082
[08:49:49] C:\Users\Administrator\D

[470]	train-auc:0.997239	val-auc:0.715505
[08:49:50] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 4 pruned nodes, max_depth=7
[471]	train-auc:0.997214	val-auc:0.715683
[08:49:50] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=7
[472]	train-auc:0.997264	val-auc:0.715861
[08:49:50] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 6 pruned nodes, max_depth=7
[473]	train-auc:0.997289	val-auc:0.716038
[08:49:50] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 2 pruned nodes, max_depth=7
[474]	train-auc:0.997339	val-auc:0.716216
[08:49:50] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 2 pruned nodes, max_depth=7
[475]	train-auc:0.997364	val-auc:0.

In [18]:
preds_offline = model.predict(xgb_offline_test,ntree_limit=model.best_iteration)#

preds_online = model.predict(xgb_online_test,ntree_limit=model.best_iteration)#

offline=offline_test[['用户标识','标签']]
online=online_test[['用户标识']]

offline['预测']=preds_offline
online['预测']=preds_online

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [19]:
print("线下得分;")
print(ks(offline.预测,offline.标签))

print ("跑到这里了,输出结果")

线下得分;
('ks', 0.371977240398293)
跑到这里了,输出结果


In [20]:
a = np.array(online.预测)

In [21]:
from sklearn.preprocessing import MinMaxScaler
online.预测 = MinMaxScaler().fit_transform(np.array(online.预测).reshape(-1, 1))
output=online[['用户标识','预测']].rename(index=str, columns={"用户标识": "userid", "预测": "probability"})
output.to_csv("D:/rong360DC/creditData/model/20170204_D_最好成绩特征训练集删去部分时间未知样本.csv",index=None,encoding='utf-8')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [22]:

#save feature score and feature information:  feature,score,min,max,n_null,n_gt1w
feature_score = model.get_fscore()
feature_score = sorted(feature_score.items(), key=lambda x:x[1],reverse=True)
fs = []
for (key,value) in feature_score:
    fs.append("{0},{1}\n".format(key,value))
    
with open('D:/rong360DC/creditData/20170204_D_最好成绩特征训练集删去部分时间未知样本.csv','w') as f:
        f.writelines("feature,score\n")
        f.writelines(fs)
        
cost_time = time.time()-start_time
print ("",'\n',"cost time:",cost_time,"(s)")
print ("跑到这里了,结束！")

 
 cost time: 18.655977487564087 (s)
跑到这里了,结束！
