In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import datetime
import time
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import SelectPercentile,f_regression



In [2]:
train_1 = pd.read_csv('./data/train_1.csv')
train_2 = pd.read_csv('./data/train_2.csv')
train_3 = pd.read_csv('./data/train_3.csv')
train_4 = pd.read_csv('./data/train_4.csv')

test_1 = pd.read_csv('./data/test_1.csv')
test_2 = pd.read_csv('./data/test_2.csv')
test_3 = pd.read_csv('./data/test_3.csv')
test_4 = pd.read_csv('./data/test_4.csv')

In [3]:
data_1 = pd.concat([train_1,test_1],axis=0,ignore_index=True,sort=False)
data_2 = pd.concat([train_2,test_2],axis=0,ignore_index=True,sort=False)
data_3 = pd.concat([train_3,test_3],axis=0,ignore_index=True,sort=False)
data_4 = pd.concat([train_4,test_4],axis=0,ignore_index=True,sort=False)

In [4]:
# 返回时间戳
def get_timestamp(x):
    try:
        t = datetime.datetime.strptime(x,"%Y-%m-%d %H:%M:%S.%f")
        t = int(round((t.timestamp())))  # 四舍五入，去掉毫秒级  
    except:
        t = datetime.datetime.strptime(x,"%Y-%m-%d %H:%M:%S")
        t = int(t.timestamp())
    return t   

In [5]:
data_1['时间戳'] = data_1['时间'].map(lambda x: get_timestamp(x))
data_2['时间戳'] = data_2['时间'].map(lambda x: get_timestamp(x))
data_3['时间戳'] = data_3['时间'].map(lambda x: get_timestamp(x))
data_4['时间戳'] = data_4['时间'].map(lambda x: get_timestamp(x))

In [6]:
# 将带有毫秒级时间四舍五入为秒级时间
# 输入字符串时间，"%Y-%m-%d %H:%M:%S.%f"
# 输出datetime格式的时间，年月日时分秒

def get_new_time(x):
    try:
        t = datetime.datetime.strptime(x,"%Y-%m-%d %H:%M:%S.%f")
        t = round(t.timestamp())  # 四舍五入，去掉毫秒级
        t = datetime.datetime.fromtimestamp(t)  # 转换为datetime格式
    except:
        t = datetime.datetime.strptime(x,"%Y-%m-%d %H:%M:%S")  # 转换为datetime格式
    return t

In [7]:

data_1['month'] = data_1['时间'].map(lambda x: get_new_time(x).month)
data_1['day'] = data_1['时间'].map(lambda x: get_new_time(x).day)
data_1['hour'] = data_1['时间'].map(lambda x: get_new_time(x).hour)
data_1['minute'] = data_1['时间'].map(lambda x: get_new_time(x).minute)

data_2['month'] = data_2['时间'].map(lambda x: get_new_time(x).month)
data_2['day'] = data_2['时间'].map(lambda x: get_new_time(x).day)
data_2['hour'] = data_2['时间'].map(lambda x: get_new_time(x).hour)
data_2['minute'] = data_2['时间'].map(lambda x: get_new_time(x).minute)

data_3['month'] = data_3['时间'].map(lambda x: get_new_time(x).month)
data_3['day'] = data_3['时间'].map(lambda x: get_new_time(x).day)
data_3['hour'] = data_3['时间'].map(lambda x: get_new_time(x).hour)
data_3['minute'] = data_3['时间'].map(lambda x: get_new_time(x).minute)

data_4['month'] = data_4['时间'].map(lambda x: get_new_time(x).month)
data_4['day'] = data_4['时间'].map(lambda x: get_new_time(x).day)
data_4['hour'] = data_4['时间'].map(lambda x: get_new_time(x).hour)
data_4['minute'] = data_4['时间'].map(lambda x: get_new_time(x).minute)

In [8]:
data_1['风向_count']= data_1['风向'].map(data_1['风向'].value_counts()).astype(int)
data_1['风向_count'] = (data_1['风向_count']-data_1['风向_count'].min()) / (data_1['风向_count'].max()-data_1['风向_count'].min())

data_2['风向_count']= data_2['风向'].map(data_2['风向'].value_counts()).astype(int)
data_2['风向_count'] = (data_2['风向_count']-data_2['风向_count'].min()) / (data_2['风向_count'].max()-data_2['风向_count'].min())

data_3['风向_count']= data_3['风向'].map(data_3['风向'].value_counts()).astype(int)
data_3['风向_count'] = (data_3['风向_count']-data_3['风向_count'].min()) / (data_3['风向_count'].max()-data_3['风向_count'].min())

data_4['风向_count']= data_4['风向'].map(data_4['风向'].value_counts()).astype(int)
data_4['风向_count'] = (data_4['风向_count']-data_4['风向_count'].min()) / (data_4['风向_count'].max()-data_4['风向_count'].min())

In [9]:
# 距离中心13点30分(13.5)的时间距离
data_1['dis2peak'] = data_1['时间'].map(lambda x: (13.5-abs(13.5-get_new_time(x).hour))/13.5)
data_2['dis2peak'] = data_2['时间'].map(lambda x: (13.5-abs(13.5-get_new_time(x).hour))/13.5)
data_3['dis2peak'] = data_3['hour'].map(lambda x: (13.5-abs(13.5-x))/13.5)
data_4['dis2peak'] = data_4['时间'].map(lambda x: (13.5-abs(13.5-get_new_time(x).hour))/13.5)

In [10]:
def map_hour(h):
    if h < 7:
        return 1
    elif h < 10:
        return 2
    elif h < 13:
        return 3
    elif h < 16:
        return 4
    elif h < 19:
        return 5
    else:
        return 1

In [11]:
data_1['maphour'] = data_1['时间'].map(lambda x: map_hour(get_new_time(x).hour))
data_2['maphour'] = data_2['时间'].map(lambda x:map_hour(get_new_time(x).hour))
data_3['maphour'] = data_3['hour'].map(map_hour)
data_4['maphour'] = data_4['时间'].map(lambda x:map_hour(get_new_time(x).hour))

In [12]:
yuanshi_feature = ['时间', '辐照度', '风速', '风向', '温度', '压强', '湿度', '实发辐照度', '实际功率', 'id','时间戳',
       'month', 'day', 'hour', '风向_count','dis2peak']

萌鸡、新添特征

In [13]:
data_1.columns

Index(['时间', '辐照度', '风速', '风向', '温度', '压强', '湿度', '实发辐照度', '实际功率', 'id', '时间戳',
       'month', 'day', 'hour', 'minute', '风向_count', 'dis2peak', 'maphour'],
      dtype='object')

In [14]:
for data in [data_1,data_2,data_3,data_4]:
    data['time_2_hours_flag'] = data['时间戳']/120000000
    data['time_2_hours_flag'] = data['time_2_hours_flag'].round()
    
    data['time_1flag'] = data['时间戳']/10000000
    data['time_1flag'] = data['time_1flag'].round()
    data['time_5flag'] = data['时间戳']/5000000
    data['time_5flag'] = data['time_5flag'].round()
    data['time_10flag'] = data['时间戳']/1000000
    data['time_10flag'] = data['time_10flag'].round()
for i in ['time_2_hours_flag','time_1flag', 'time_5flag', 'time_10flag']:
    for j in ['辐照度','风速','温度','压强','湿度','风向']:
        temp1 = data_1.groupby(i, as_index = False)[j].agg({i+'_'+j+'_mean':'mean', i+'_'+j+'_max':'max',i+'_'+j+'_min':'min',i+'_'+j+'_var':'var',i+'_'+j+'_median':'median'})
        temp2 = data_2.groupby(i, as_index = False)[j].agg({i+'_'+j+'_mean':'mean', i+'_'+j+'_max':'max',i+'_'+j+'_min':'min',i+'_'+j+'_var':'var',i+'_'+j+'_median':'median'})
        temp3 = data_3.groupby(i, as_index = False)[j].agg({i+'_'+j+'_mean':'mean', i+'_'+j+'_max':'max',i+'_'+j+'_min':'min',i+'_'+j+'_var':'var',i+'_'+j+'_median':'median'})
        temp4 = data_4.groupby(i, as_index = False)[j].agg({i+'_'+j+'_mean':'mean', i+'_'+j+'_max':'max',i+'_'+j+'_min':'min',i+'_'+j+'_var':'var',i+'_'+j+'_median':'median'})
        data_1 = pd.merge(data_1, temp1, on=i, how='left')
        data_2 = pd.merge(data_2, temp2, on=i, how='left')
        data_3 = pd.merge(data_3, temp3, on=i, how='left')
        data_4 = pd.merge(data_4, temp4, on=i, how='left')
        
for i in ['辐照度','风速','温度','压强','湿度','风向']:
#     col = str('time_1flag_'+i+'_mean')
    data_1[i+'-mean_2_hours'] = data_1[i] - data_1['time_2_hours_flag_'+i+'_mean']   
    data_2[i+'-mean_2_hours'] = data_2[i] - data_2['time_2_hours_flag_'+i+'_mean'] 
    data_3[i+'-mean_2_hours'] = data_3[i] - data_3['time_2_hours_flag_'+i+'_mean'] 
    data_4[i+'-mean_2_hours'] = data_4[i] - data_4['time_2_hours_flag_'+i+'_mean'] 
    
for i in ['辐照度','风速','温度','压强','湿度','风向']:
#     col = str('time_1flag_'+i+'_mean')
    data_1[i+'-mean_everyday'] = data_1[i] - data_1['time_1flag_'+i+'_mean']   
    data_2[i+'-mean_everyday'] = data_2[i] - data_2['time_1flag_'+i+'_mean'] 
    data_3[i+'-mean_everyday'] = data_3[i] - data_3['time_1flag_'+i+'_mean'] 
    data_4[i+'-mean_everyday'] = data_4[i] - data_4['time_1flag_'+i+'_mean'] 
for i in ['辐照度','风速','温度','压强','湿度','风向']:
#     col = str('time_1flag_'+i+'_mean')
    data_1[i+'-mean_5day'] = data_1[i] - data_1['time_5flag_'+i+'_mean']   
    data_2[i+'-mean_5day'] = data_2[i] - data_2['time_5flag_'+i+'_mean'] 
    data_3[i+'-mean_5day'] = data_3[i] - data_3['time_5flag_'+i+'_mean'] 
    data_4[i+'-mean_5day'] = data_4[i] - data_4['time_5flag_'+i+'_mean'] 

for i in ['辐照度','风速','温度','压强','湿度','风向']:
#     col = str('time_1flag_'+i+'_mean')
    data_1[i+'_1f-5f_everyweek'] = data_1['time_1flag_'+i+'_mean'] - data_1['time_5flag_'+i+'_mean']   
    data_2[i+'_1f-5f_everyweek'] = data_2['time_1flag_'+i+'_mean'] - data_2['time_5flag_'+i+'_mean'] 
    data_3[i+'_1f-5f_everyweek'] = data_3['time_1flag_'+i+'_mean'] - data_3['time_5flag_'+i+'_mean'] 
    data_4[i+'_1f-5f_everyweek'] = data_4['time_1flag_'+i+'_mean'] - data_4['time_5flag_'+i+'_mean']     
    
for i in ['辐照度','风速','温度','压强','湿度','风向']:
#     col = str('time_1flag_'+i+'_mean')
    data_1[i+'-mean_everyweek'] = data_1['time_1flag_'+i+'_mean'] - data_1['time_10flag_'+i+'_mean']   
    data_2[i+'-mean_everyweek'] = data_2['time_1flag_'+i+'_mean'] - data_2['time_10flag_'+i+'_mean'] 
    data_3[i+'-mean_everyweek'] = data_3['time_1flag_'+i+'_mean'] - data_3['time_10flag_'+i+'_mean'] 
    data_4[i+'-mean_everyweek'] = data_4['time_1flag_'+i+'_mean'] - data_4['time_10flag_'+i+'_mean'] 

In [15]:
def math(data,i,j):
    data[items[i]+'+'+items[j]] = data_1[items[i]] + data_1[items[j]]
    data[items[i]+'*'+items[j]] = data_1[items[i]] + data_1[items[j]]
    data[items[i]+'方+'+items[j]] = data_1[items[i]]*data_1[items[i]] + data_1[items[j]]*data_1[items[j]]
    return data
   
items = ['辐照度','风速','温度','压强','湿度','风向']
for i in range(len(items)):
    for j in range(i+1, len(items)):
        data_1 = math(data_1,i,j)
        data_2 = math(data_2,i,j)
        data_3 = math(data_3,i,j)
        data_4 = math(data_4,i,j)

In [16]:
def cha1(data):    
    for i in ['辐照度','风速','温度','压强','湿度','风向','温度*压强','风速*温度']:
        data[i+'_1'] = data[i].shift(1)
        data[i+'_1'].fillna(-1,inplace =True)
        data[i+'_差1'] = data[i] - data[i+'_1']
    return data
def cha3(data):    
    for i in ['辐照度','风速','温度','压强','湿度','风向','温度*压强','风速*温度']:
        data[i+'_3'] = data[i].shift(3)
        data[i+'_3'].fillna(-1,inplace =True)
        data[i+'_差3'] = data[i] - data[i+'_3']
    return data
for i in [data_1,data_2,data_3,data_4]:
    cha1(i)
    cha3(i)
    

In [17]:
def get_ab_month(x):
    return abs(x-6)   
data_1['ab_month'] = data_1['month'].map(lambda x: get_ab_month(x))
data_2['ab_month'] = data_2['month'].map(lambda x: get_ab_month(x))
data_3['ab_month'] = data_3['month'].map(lambda x: get_ab_month(x))
data_4['ab_month'] = data_4['month'].map(lambda x: get_ab_month(x))

def get_ab_hour(x):
    return abs(x-12)   
data_1['abs_hour'] = data_1['hour'].map(lambda x: get_ab_hour(x))
data_2['abs_hour'] = data_2['hour'].map(lambda x: get_ab_hour(x))
data_3['abs_hour'] = data_3['hour'].map(lambda x: get_ab_hour(x))
data_4['abs_hour'] = data_4['hour'].map(lambda x: get_ab_hour(x))

data_1['abs_时间戳'] = data_1['时间戳']%31536000
data_2['abs_时间戳'] = data_2['时间戳']%31536000
data_3['abs_时间戳'] = data_3['时间戳']%31536000
data_4['abs_时间戳'] = data_4['时间戳']%31536000

In [18]:
a = data_1.columns.tolist()
a

['时间',
 '辐照度',
 '风速',
 '风向',
 '温度',
 '压强',
 '湿度',
 '实发辐照度',
 '实际功率',
 'id',
 '时间戳',
 'month',
 'day',
 'hour',
 'minute',
 '风向_count',
 'dis2peak',
 'maphour',
 'time_2_hours_flag',
 'time_1flag',
 'time_5flag',
 'time_10flag',
 'time_2_hours_flag_辐照度_mean',
 'time_2_hours_flag_辐照度_max',
 'time_2_hours_flag_辐照度_min',
 'time_2_hours_flag_辐照度_var',
 'time_2_hours_flag_辐照度_median',
 'time_2_hours_flag_风速_mean',
 'time_2_hours_flag_风速_max',
 'time_2_hours_flag_风速_min',
 'time_2_hours_flag_风速_var',
 'time_2_hours_flag_风速_median',
 'time_2_hours_flag_温度_mean',
 'time_2_hours_flag_温度_max',
 'time_2_hours_flag_温度_min',
 'time_2_hours_flag_温度_var',
 'time_2_hours_flag_温度_median',
 'time_2_hours_flag_压强_mean',
 'time_2_hours_flag_压强_max',
 'time_2_hours_flag_压强_min',
 'time_2_hours_flag_压强_var',
 'time_2_hours_flag_压强_median',
 'time_2_hours_flag_湿度_mean',
 'time_2_hours_flag_湿度_max',
 'time_2_hours_flag_湿度_min',
 'time_2_hours_flag_湿度_var',
 'time_2_hours_flag_湿度_median',
 'time_2_hours_flag_风向

In [19]:
# # 这组特征没卵用
# for i in ['辐照度','风速','温度','压强','湿度','风向']:
# #     col = str('time_1flag_'+i+'_mean')
#     data_1[i+'_diff_everyday'] = data_1['time_1flag_'+i+'_max'] - data_1['time_1flag_'+i+'_min']   
#     data_2[i+'_diff_everyday'] = data_2['time_1flag_'+i+'_max'] - data_2['time_1flag_'+i+'_min'] 
#     data_3[i+'_diff_everyday'] = data_3['time_1flag_'+i+'_max'] - data_3['time_1flag_'+i+'_min'] 
#     data_4[i+'_diff_everyday'] = data_4['time_1flag_'+i+'_max'] - data_4['time_1flag_'+i+'_min'] 

In [20]:
# new_feature_cols = ['辐照度-mean_everyday','湿度-mean_everyday','风向-mean_everyday','压强-mean_everyday','温度-mean_everyday',
#                     '风速-mean_everyday','辐照度-mean_everyweek','time_10flag_辐照度_mean',]

new_feature_cols = ['辐照度-mean_everyday','湿度-mean_everyday','风向-mean_everyday','压强-mean_everyday','温度-mean_everyday',
                    '风速-mean_everyday','辐照度-mean_everyweek','time_10flag_辐照度_mean','风速*温度','温度*压强','ab_month'
                   ,'abs_hour', 
'辐照度_差1',
 '风速_差1',
 '温度_差1',
 '压强_差1',
 '湿度_差1',
 '风向_差1']
# '辐照度_差3',
#  '风速_差3',
#  '温度_差3',
#  '压强_差3',
#  '湿度_差3',
#  '风向_差3'




In [21]:

#  '辐照度_差1',
#  '风速_差1',
#  '温度_差1',
#  '压强_差1',
#  '湿度_差1',
#  '风向_差1',
#  '辐照度_差3',
#  '风速_差3',
#  '温度_差3',
#  '压强_差3',
#  '湿度_差3',
#  '风向_差3',

In [22]:
# X = data_1
# import ssl
# def count_corr(df):
#     '''
#     输入dataframe
#     输出相关系数dataframe:col_1,col_2,cor(不包含同一特征且已去重复)
#     '''
# #     计算列之间的相关系数，abs求绝对值，unstack变成堆叠形式的，sort_values降序排列，然后用reset_index重新赋值索引
#     x = df.corr().abs().unstack().sort_values(ascending=False).reset_index()
# #     找到除了对角线之外的行
#     x = x.loc[x.level_0!=x.level_1]
# #
#     x2 = pd.DataFrame([sorted(i) for i in x[['level_0','level_1']].values])
#     x2['cor'] = x[0].values
#     x2.columns = ['col_1','col_2','cor']
# #     删除重复的行
#     return x2.drop_duplicates()
# v = X.var()
# constant_feature = v[v==0].index.values.tolist()

# for i in constant_feature:
#     _ = X.pop(i)

# train_corr_col = count_corr(X)
# corr1_col = train_corr_col[train_corr_col.cor>0.9].col_2.values.tolist()
# corr_col = list(set(corr1_col))
# print(corr_col)
# # 删除掉相关性大于0.99的列
# for i in corr_col:
#     if i not in['时间','实发辐照度','实际功率','id']:
#         _ = X.pop(i)



In [23]:
# a = X.columns.tolist()
# a

In [24]:
col= yuanshi_feature + new_feature_cols
# col = a
data_1 = data_1[col]
data_2 = data_2[col]
data_3 = data_3[col]
data_4 = data_4[col]


萌鸡、至此

In [25]:
X_train_1 = data_1[data_1['实际功率'].notnull()].drop(['时间','实发辐照度','实际功率','id'],axis=1)
y_train_1 = data_1[data_1['实际功率'].notnull()]['实际功率']
X_test_1 = data_1[data_1['实际功率'].isnull()].drop(['时间','实发辐照度','实际功率','id'],axis=1)

In [26]:
train_1_features = X_train_1.columns
train_1_features

Index(['辐照度', '风速', '风向', '温度', '压强', '湿度', '时间戳', 'month', 'day', 'hour',
       '风向_count', 'dis2peak', '辐照度-mean_everyday', '湿度-mean_everyday',
       '风向-mean_everyday', '压强-mean_everyday', '温度-mean_everyday',
       '风速-mean_everyday', '辐照度-mean_everyweek', 'time_10flag_辐照度_mean',
       '风速*温度', '温度*压强', 'ab_month', 'abs_hour', '辐照度_差1', '风速_差1', '温度_差1',
       '湿度_差1', '风向_差1', '温度*压强_差1', '风速*温度_差1'],
      dtype='object')

In [27]:
prediction_1 = test_1[['id']]

X_train_1_loc = X_train_1.values
X_test_1_loc = X_test_1.values
y_train_1_loc = y_train_1.values

# SKB = SelectPercentile(f_regression,percentile=95).fit(X_train_1_loc,y_train_1_loc)
# X_train_1_loc = SKB.transform(X_train_1_loc)
# X_test_1_loc = SKB.transform(X_test_1_loc)

In [28]:
from sklearn.metrics import mean_absolute_error
def my_metric_1(y_true,y_pred): 
    y_true_1 = y_true[y_true>=10*0.03]
    y_pred_1 = y_pred[y_true>=10*0.03]
    score = mean_absolute_error(y_true_1,y_pred_1)/10.0
    return 'my_metric_1',score,False  # False表示score不是越大越好(is_bigger_better)

In [29]:
model_1 = lgb.LGBMRegressor('gbdt',num_leaves=51,max_depth=-1,learning_rate=0.05,n_estimators=100,max_bin=255,subsample_for_bin=200000,
                         objective='regression',min_split_gain=0,min_child_weight=0.001,min_child_samples=20,subsample=1,subsample_freq=1,
                         colsample_bytree=1, reg_alpha=0, reg_lambda=0, random_state=2018,n_jobs=-1)


skf=list(StratifiedKFold(y_train_1_loc, n_folds=10, shuffle=True, random_state=2018))
baseloss = []
loss = 0
for i, (train_index, test_index) in enumerate(skf):
    print("Fold", i)
    lgb_model = model_1.fit(X_train_1_loc[train_index], y_train_1_loc[train_index],
                          eval_names =['train','valid'],
                          eval_set=[(X_train_1_loc[train_index], y_train_1_loc[train_index]), 
                                    (X_train_1_loc[test_index], y_train_1_loc[test_index])],
                          eval_metric=my_metric_1,
                          early_stopping_rounds=50)
    baseloss.append(lgb_model.best_score_['valid']['my_metric_1'])
    loss += lgb_model.best_score_['valid']['my_metric_1']
    test_pred= lgb_model.predict(X_test_1_loc, num_iteration=lgb_model.best_iteration_)
#     print('test mean:', test_pred.mean())
    prediction_1['predict_%s' % str(i)] = test_pred
print('my_metric_1:', baseloss, loss/10)

p = prediction_1.drop(['id'],axis=1)
prediction_1['prediction']=p.mean(axis=1)




Fold 0
[1]	train's l2: 8.17936	train's my_metric_1: 0.294849	valid's l2: 8.0235	valid's my_metric_1: 0.292212
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.47853	train's my_metric_1: 0.282267	valid's l2: 7.33989	valid's my_metric_1: 0.279914
[3]	train's l2: 6.84593	train's my_metric_1: 0.270497	valid's l2: 6.7233	valid's my_metric_1: 0.268414
[4]	train's l2: 6.27512	train's my_metric_1: 0.259485	valid's l2: 6.16889	valid's my_metric_1: 0.257718
[5]	train's l2: 5.75828	train's my_metric_1: 0.249115	valid's l2: 5.665	valid's my_metric_1: 0.247597
[6]	train's l2: 5.29023	train's my_metric_1: 0.239447	valid's l2: 5.21086	valid's my_metric_1: 0.238118
[7]	train's l2: 4.86746	train's my_metric_1: 0.230314	valid's l2: 4.79926	valid's my_metric_1: 0.229108
[8]	train's l2: 4.48547	train's my_metric_1: 0.221758	valid's l2: 4.42577	valid's my_metric_1: 0.220649
[9]	train's l2: 4.13993	train's my_metric_1: 0.213747	valid's l2: 4.08963	valid's my_metric_1: 0.212739

[83]	train's l2: 0.606986	train's my_metric_1: 0.0868193	valid's l2: 0.69722	valid's my_metric_1: 0.0921775
[84]	train's l2: 0.603211	train's my_metric_1: 0.0865246	valid's l2: 0.69435	valid's my_metric_1: 0.0919368
[85]	train's l2: 0.599469	train's my_metric_1: 0.0862295	valid's l2: 0.691262	valid's my_metric_1: 0.0916917
[86]	train's l2: 0.595753	train's my_metric_1: 0.0859136	valid's l2: 0.68742	valid's my_metric_1: 0.0913916
[87]	train's l2: 0.593039	train's my_metric_1: 0.0857003	valid's l2: 0.685479	valid's my_metric_1: 0.0912388
[88]	train's l2: 0.590037	train's my_metric_1: 0.0854589	valid's l2: 0.683476	valid's my_metric_1: 0.0910423
[89]	train's l2: 0.587431	train's my_metric_1: 0.0852062	valid's l2: 0.681941	valid's my_metric_1: 0.0908471
[90]	train's l2: 0.585385	train's my_metric_1: 0.0849868	valid's l2: 0.680293	valid's my_metric_1: 0.0906663
[91]	train's l2: 0.582825	train's my_metric_1: 0.0847316	valid's l2: 0.678076	valid's my_metric_1: 0.0904709
[92]	train's l2: 0.579

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 1
[1]	train's l2: 8.16217	train's my_metric_1: 0.294358	valid's l2: 8.14716	valid's my_metric_1: 0.298177
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.46336	train's my_metric_1: 0.281832	valid's l2: 7.44802	valid's my_metric_1: 0.285427
[3]	train's l2: 6.83233	train's my_metric_1: 0.270094	valid's l2: 6.81767	valid's my_metric_1: 0.273467
[4]	train's l2: 6.26147	train's my_metric_1: 0.259089	valid's l2: 6.24469	valid's my_metric_1: 0.262155
[5]	train's l2: 5.74559	train's my_metric_1: 0.248753	valid's l2: 5.73121	valid's my_metric_1: 0.25166
[6]	train's l2: 5.27978	train's my_metric_1: 0.239074	valid's l2: 5.26283	valid's my_metric_1: 0.241792
[7]	train's l2: 4.85819	train's my_metric_1: 0.229952	valid's l2: 4.84405	valid's my_metric_1: 0.232616
[8]	train's l2: 4.47761	train's my_metric_1: 0.221487	valid's l2: 4.463	valid's my_metric_1: 0.223982
[9]	train's l2: 4.13344	train's my_metric_1: 0.213549	valid's l2: 4.11827	valid's my_metric_1: 0.21596

[86]	train's l2: 0.595544	train's my_metric_1: 0.0858633	valid's l2: 0.670308	valid's my_metric_1: 0.0902384
[87]	train's l2: 0.592248	train's my_metric_1: 0.0855695	valid's l2: 0.667753	valid's my_metric_1: 0.0900101
[88]	train's l2: 0.58978	train's my_metric_1: 0.0853463	valid's l2: 0.665916	valid's my_metric_1: 0.089834
[89]	train's l2: 0.587315	train's my_metric_1: 0.0851072	valid's l2: 0.663737	valid's my_metric_1: 0.0896493
[90]	train's l2: 0.584517	train's my_metric_1: 0.0848736	valid's l2: 0.661671	valid's my_metric_1: 0.0894589
[91]	train's l2: 0.582026	train's my_metric_1: 0.084652	valid's l2: 0.659574	valid's my_metric_1: 0.0892808
[92]	train's l2: 0.579579	train's my_metric_1: 0.084418	valid's l2: 0.658034	valid's my_metric_1: 0.0891113
[93]	train's l2: 0.577186	train's my_metric_1: 0.0841952	valid's l2: 0.656255	valid's my_metric_1: 0.0889652
[94]	train's l2: 0.574911	train's my_metric_1: 0.0839986	valid's l2: 0.654399	valid's my_metric_1: 0.0887819
[95]	train's l2: 0.5717

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 8.17582	train's my_metric_1: 0.295138	valid's l2: 8.0313	valid's my_metric_1: 0.289914
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.47557	train's my_metric_1: 0.282567	valid's l2: 7.34867	valid's my_metric_1: 0.277692
[3]	train's l2: 6.84329	train's my_metric_1: 0.270788	valid's l2: 6.73333	valid's my_metric_1: 0.266297
[4]	train's l2: 6.27193	train's my_metric_1: 0.259765	valid's l2: 6.17521	valid's my_metric_1: 0.255594
[5]	train's l2: 5.75554	train's my_metric_1: 0.249419	valid's l2: 5.67061	valid's my_metric_1: 0.245519
[6]	train's l2: 5.28784	train's my_metric_1: 0.239676	valid's l2: 5.21455	valid's my_metric_1: 0.236108
[7]	train's l2: 4.86555	train's my_metric_1: 0.23055	valid's l2: 4.80336	valid's my_metric_1: 0.227365
[8]	train's l2: 4.48428	train's my_metric_1: 0.222016	valid's l2: 4.43216	valid's my_metric_1: 0.219194
[9]	train's l2: 4.13957	train's my_metric_1: 0.214029	valid's l2: 4.09642	valid's my_metric_1: 0.211541
[10]

[82]	train's l2: 0.606268	train's my_metric_1: 0.0869832	valid's l2: 0.709203	valid's my_metric_1: 0.092777
[83]	train's l2: 0.603394	train's my_metric_1: 0.0867361	valid's l2: 0.706816	valid's my_metric_1: 0.092575
[84]	train's l2: 0.600373	train's my_metric_1: 0.0864831	valid's l2: 0.704257	valid's my_metric_1: 0.0923682
[85]	train's l2: 0.597316	train's my_metric_1: 0.0862108	valid's l2: 0.701883	valid's my_metric_1: 0.0921648
[86]	train's l2: 0.594185	train's my_metric_1: 0.085961	valid's l2: 0.69986	valid's my_metric_1: 0.091998
[87]	train's l2: 0.591262	train's my_metric_1: 0.0856983	valid's l2: 0.697126	valid's my_metric_1: 0.0917686
[88]	train's l2: 0.587795	train's my_metric_1: 0.0853951	valid's l2: 0.69435	valid's my_metric_1: 0.091524
[89]	train's l2: 0.585738	train's my_metric_1: 0.0852086	valid's l2: 0.693078	valid's my_metric_1: 0.0913847
[90]	train's l2: 0.582389	train's my_metric_1: 0.0849103	valid's l2: 0.690076	valid's my_metric_1: 0.0911234
[91]	train's l2: 0.579795	

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3
[1]	train's l2: 8.16177	train's my_metric_1: 0.294515	valid's l2: 8.1558	valid's my_metric_1: 0.295707
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.46395	train's my_metric_1: 0.281997	valid's l2: 7.46242	valid's my_metric_1: 0.283251
[3]	train's l2: 6.83275	train's my_metric_1: 0.270269	valid's l2: 6.83439	valid's my_metric_1: 0.271548
[4]	train's l2: 6.26242	train's my_metric_1: 0.259269	valid's l2: 6.26724	valid's my_metric_1: 0.260581
[5]	train's l2: 5.74721	train's my_metric_1: 0.248967	valid's l2: 5.75514	valid's my_metric_1: 0.250349
[6]	train's l2: 5.2799	train's my_metric_1: 0.239233	valid's l2: 5.29162	valid's my_metric_1: 0.240702
[7]	train's l2: 4.85879	train's my_metric_1: 0.230179	valid's l2: 4.87192	valid's my_metric_1: 0.231659
[8]	train's l2: 4.47769	train's my_metric_1: 0.221655	valid's l2: 4.49523	valid's my_metric_1: 0.223141
[9]	train's l2: 4.13395	train's my_metric_1: 0.213692	valid's l2: 4.15404	valid's my_metric_1: 0.2152

[84]	train's l2: 0.602704	train's my_metric_1: 0.0864015	valid's l2: 0.679305	valid's my_metric_1: 0.0930247
[85]	train's l2: 0.599417	train's my_metric_1: 0.0861261	valid's l2: 0.676759	valid's my_metric_1: 0.0928354
[86]	train's l2: 0.596402	train's my_metric_1: 0.0858555	valid's l2: 0.674197	valid's my_metric_1: 0.0926125
[87]	train's l2: 0.593141	train's my_metric_1: 0.0855586	valid's l2: 0.671037	valid's my_metric_1: 0.0923364
[88]	train's l2: 0.590811	train's my_metric_1: 0.0853583	valid's l2: 0.669746	valid's my_metric_1: 0.0922135
[89]	train's l2: 0.588191	train's my_metric_1: 0.0851086	valid's l2: 0.667417	valid's my_metric_1: 0.0920228
[90]	train's l2: 0.586053	train's my_metric_1: 0.0849264	valid's l2: 0.666094	valid's my_metric_1: 0.0919035
[91]	train's l2: 0.583158	train's my_metric_1: 0.0846745	valid's l2: 0.663452	valid's my_metric_1: 0.0916771
[92]	train's l2: 0.580157	train's my_metric_1: 0.0844235	valid's l2: 0.661464	valid's my_metric_1: 0.0914953
[93]	train's l2: 0.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 8.158	train's my_metric_1: 0.294526	valid's l2: 8.18898	valid's my_metric_1: 0.29639
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.45988	train's my_metric_1: 0.28196	valid's l2: 7.48896	valid's my_metric_1: 0.283797
[3]	train's l2: 6.82917	train's my_metric_1: 0.270207	valid's l2: 6.85709	valid's my_metric_1: 0.27206
[4]	train's l2: 6.25903	train's my_metric_1: 0.259225	valid's l2: 6.28752	valid's my_metric_1: 0.26107
[5]	train's l2: 5.7441	train's my_metric_1: 0.248898	valid's l2: 5.7728	valid's my_metric_1: 0.250758
[6]	train's l2: 5.27842	train's my_metric_1: 0.239163	valid's l2: 5.30487	valid's my_metric_1: 0.240996
[7]	train's l2: 4.85672	train's my_metric_1: 0.230078	valid's l2: 4.88176	valid's my_metric_1: 0.231866
[8]	train's l2: 4.4756	train's my_metric_1: 0.221603	valid's l2: 4.49867	valid's my_metric_1: 0.223225
[9]	train's l2: 4.13106	train's my_metric_1: 0.213629	valid's l2: 4.15373	valid's my_metric_1: 0.21519
[10]	train's

[80]	train's l2: 0.621969	train's my_metric_1: 0.0880641	valid's l2: 0.687767	valid's my_metric_1: 0.0925197
[81]	train's l2: 0.619249	train's my_metric_1: 0.0878112	valid's l2: 0.685113	valid's my_metric_1: 0.0922822
[82]	train's l2: 0.615659	train's my_metric_1: 0.0875339	valid's l2: 0.682649	valid's my_metric_1: 0.0920685
[83]	train's l2: 0.612444	train's my_metric_1: 0.0872429	valid's l2: 0.679837	valid's my_metric_1: 0.0917767
[84]	train's l2: 0.608818	train's my_metric_1: 0.0869391	valid's l2: 0.676639	valid's my_metric_1: 0.0915012
[85]	train's l2: 0.605175	train's my_metric_1: 0.0866344	valid's l2: 0.673712	valid's my_metric_1: 0.0912734
[86]	train's l2: 0.601994	train's my_metric_1: 0.0863718	valid's l2: 0.670922	valid's my_metric_1: 0.0910227
[87]	train's l2: 0.598878	train's my_metric_1: 0.0860936	valid's l2: 0.668175	valid's my_metric_1: 0.090787
[88]	train's l2: 0.595916	train's my_metric_1: 0.085847	valid's l2: 0.665773	valid's my_metric_1: 0.090569
[89]	train's l2: 0.593

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 5
[1]	train's l2: 8.17485	train's my_metric_1: 0.294808	valid's l2: 8.04587	valid's my_metric_1: 0.291491
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.47495	train's my_metric_1: 0.282287	valid's l2: 7.36372	valid's my_metric_1: 0.279216
[3]	train's l2: 6.84344	train's my_metric_1: 0.270512	valid's l2: 6.74827	valid's my_metric_1: 0.267785
[4]	train's l2: 6.2725	train's my_metric_1: 0.259505	valid's l2: 6.19042	valid's my_metric_1: 0.257007
[5]	train's l2: 5.75553	train's my_metric_1: 0.24916	valid's l2: 5.68535	valid's my_metric_1: 0.24694
[6]	train's l2: 5.2882	train's my_metric_1: 0.239426	valid's l2: 5.22891	valid's my_metric_1: 0.237518
[7]	train's l2: 4.86624	train's my_metric_1: 0.230347	valid's l2: 4.81608	valid's my_metric_1: 0.228767
[8]	train's l2: 4.48487	train's my_metric_1: 0.221794	valid's l2: 4.44318	valid's my_metric_1: 0.220593
[9]	train's l2: 4.13906	train's my_metric_1: 0.213789	valid's l2: 4.10545	valid's my_metric_1: 0.212978

[88]	train's l2: 0.591576	train's my_metric_1: 0.0854208	valid's l2: 0.684854	valid's my_metric_1: 0.0914817
[89]	train's l2: 0.588765	train's my_metric_1: 0.0851926	valid's l2: 0.681433	valid's my_metric_1: 0.0912466
[90]	train's l2: 0.586069	train's my_metric_1: 0.0849479	valid's l2: 0.67901	valid's my_metric_1: 0.0910468
[91]	train's l2: 0.583301	train's my_metric_1: 0.0847129	valid's l2: 0.676441	valid's my_metric_1: 0.0908358
[92]	train's l2: 0.581102	train's my_metric_1: 0.0845049	valid's l2: 0.674745	valid's my_metric_1: 0.0906619
[93]	train's l2: 0.578456	train's my_metric_1: 0.0842657	valid's l2: 0.67249	valid's my_metric_1: 0.090492
[94]	train's l2: 0.576121	train's my_metric_1: 0.0840506	valid's l2: 0.67085	valid's my_metric_1: 0.0903418
[95]	train's l2: 0.57361	train's my_metric_1: 0.0838293	valid's l2: 0.669363	valid's my_metric_1: 0.0902008
[96]	train's l2: 0.571375	train's my_metric_1: 0.083642	valid's l2: 0.667747	valid's my_metric_1: 0.0900569
[97]	train's l2: 0.568817

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 8.12284	train's my_metric_1: 0.29399	valid's l2: 8.50582	valid's my_metric_1: 0.30535
Training until validation scores don't improve for 50 rounds.
[2]	train's l2: 7.42794	train's my_metric_1: 0.281446	valid's l2: 7.77817	valid's my_metric_1: 0.292348
[3]	train's l2: 6.80006	train's my_metric_1: 0.2697	valid's l2: 7.12329	valid's my_metric_1: 0.280219
[4]	train's l2: 6.23318	train's my_metric_1: 0.258685	valid's l2: 6.52628	valid's my_metric_1: 0.268709
[5]	train's l2: 5.71996	train's my_metric_1: 0.248351	valid's l2: 5.98825	valid's my_metric_1: 0.257933
[6]	train's l2: 5.25665	train's my_metric_1: 0.238696	valid's l2: 5.50573	valid's my_metric_1: 0.247904
[7]	train's l2: 4.83701	train's my_metric_1: 0.2296	valid's l2: 5.06579	valid's my_metric_1: 0.238403
[8]	train's l2: 4.45767	train's my_metric_1: 0.221117	valid's l2: 4.67101	valid's my_metric_1: 0.229517
[9]	train's l2: 4.11439	train's my_metric_1: 0.213178	valid's l2: 4.30994	valid's my_metric_1: 0.221097
[10]	tra

[81]	train's l2: 0.610154	train's my_metric_1: 0.0871264	valid's l2: 0.709065	valid's my_metric_1: 0.0930562
[82]	train's l2: 0.605938	train's my_metric_1: 0.0867729	valid's l2: 0.705403	valid's my_metric_1: 0.0927763
[83]	train's l2: 0.602588	train's my_metric_1: 0.086496	valid's l2: 0.702416	valid's my_metric_1: 0.0925378
[84]	train's l2: 0.599693	train's my_metric_1: 0.086239	valid's l2: 0.700555	valid's my_metric_1: 0.092343
[85]	train's l2: 0.596617	train's my_metric_1: 0.0859573	valid's l2: 0.698166	valid's my_metric_1: 0.0921145
[86]	train's l2: 0.593916	train's my_metric_1: 0.0857252	valid's l2: 0.696098	valid's my_metric_1: 0.0919393
[87]	train's l2: 0.591173	train's my_metric_1: 0.0854906	valid's l2: 0.694289	valid's my_metric_1: 0.0917411
[88]	train's l2: 0.588099	train's my_metric_1: 0.0852388	valid's l2: 0.69205	valid's my_metric_1: 0.0915481
[89]	train's l2: 0.585069	train's my_metric_1: 0.0849825	valid's l2: 0.688923	valid's my_metric_1: 0.0912933
[90]	train's l2: 0.5814

[66]	train's l2: 0.672662	train's my_metric_1: 0.0926418	valid's l2: 0.771238	valid's my_metric_1: 0.096956
[67]	train's l2: 0.668004	train's my_metric_1: 0.0922275	valid's l2: 0.766855	valid's my_metric_1: 0.0965988
[68]	train's l2: 0.663479	train's my_metric_1: 0.0918202	valid's l2: 0.763283	valid's my_metric_1: 0.0962521
[69]	train's l2: 0.658132	train's my_metric_1: 0.0913667	valid's l2: 0.757569	valid's my_metric_1: 0.0958165
[70]	train's l2: 0.65365	train's my_metric_1: 0.0910149	valid's l2: 0.753131	valid's my_metric_1: 0.0954953
[71]	train's l2: 0.648975	train's my_metric_1: 0.0906095	valid's l2: 0.748275	valid's my_metric_1: 0.0951063
[72]	train's l2: 0.644559	train's my_metric_1: 0.090239	valid's l2: 0.744274	valid's my_metric_1: 0.0947966
[73]	train's l2: 0.640615	train's my_metric_1: 0.0899204	valid's l2: 0.740605	valid's my_metric_1: 0.0945437
[74]	train's l2: 0.637332	train's my_metric_1: 0.0896015	valid's l2: 0.737506	valid's my_metric_1: 0.0942597
[75]	train's l2: 0.632

[58]	train's l2: 0.718929	train's my_metric_1: 0.0962595	valid's l2: 0.786595	valid's my_metric_1: 0.100807
[59]	train's l2: 0.712118	train's my_metric_1: 0.0956918	valid's l2: 0.780464	valid's my_metric_1: 0.100302
[60]	train's l2: 0.705714	train's my_metric_1: 0.0951868	valid's l2: 0.775606	valid's my_metric_1: 0.0999073
[61]	train's l2: 0.699182	train's my_metric_1: 0.094653	valid's l2: 0.770045	valid's my_metric_1: 0.0994333
[62]	train's l2: 0.693414	train's my_metric_1: 0.0941699	valid's l2: 0.764875	valid's my_metric_1: 0.0990265
[63]	train's l2: 0.687121	train's my_metric_1: 0.0936691	valid's l2: 0.758447	valid's my_metric_1: 0.0985206
[64]	train's l2: 0.681744	train's my_metric_1: 0.0932122	valid's l2: 0.753377	valid's my_metric_1: 0.098097
[65]	train's l2: 0.676596	train's my_metric_1: 0.092785	valid's l2: 0.749491	valid's my_metric_1: 0.0977484
[66]	train's l2: 0.672048	train's my_metric_1: 0.0923903	valid's l2: 0.745532	valid's my_metric_1: 0.0974166
[67]	train's l2: 0.66775

[45]	train's l2: 0.837748	train's my_metric_1: 0.104887	valid's l2: 0.931345	valid's my_metric_1: 0.110104
[46]	train's l2: 0.825217	train's my_metric_1: 0.104038	valid's l2: 0.919802	valid's my_metric_1: 0.109307
[47]	train's l2: 0.813027	train's my_metric_1: 0.103195	valid's l2: 0.908863	valid's my_metric_1: 0.108536
[48]	train's l2: 0.802424	train's my_metric_1: 0.102415	valid's l2: 0.899206	valid's my_metric_1: 0.107795
[49]	train's l2: 0.79191	train's my_metric_1: 0.101674	valid's l2: 0.889211	valid's my_metric_1: 0.107071
[50]	train's l2: 0.781873	train's my_metric_1: 0.100971	valid's l2: 0.878677	valid's my_metric_1: 0.106357
[51]	train's l2: 0.772717	train's my_metric_1: 0.100275	valid's l2: 0.870717	valid's my_metric_1: 0.105741
[52]	train's l2: 0.763964	train's my_metric_1: 0.099625	valid's l2: 0.863271	valid's my_metric_1: 0.105128
[53]	train's l2: 0.755009	train's my_metric_1: 0.0989791	valid's l2: 0.856111	valid's my_metric_1: 0.104574
[54]	train's l2: 0.74647	train's my_m

In [30]:
train_features = X_train_1.columns
model_1.feature_importances_
pd.DataFrame({'features':train_features, 'imp':model_1.feature_importances_}).sort_values('imp',ascending=False)

Unnamed: 0,features,imp
6,时间戳,466
8,day,376
12,辐照度-mean_everyday,327
5,湿度,271
21,温度*压强,240
18,辐照度-mean_everyweek,238
19,time_10flag_辐照度_mean,230
13,湿度-mean_everyday,230
2,风向,212
3,温度,209


In [31]:
train_features = X_train_1.columns
model_1.feature_importances_
pd.DataFrame({'features':train_features, 'imp':model_1.feature_importances_}).sort_values('imp',ascending=False)

Unnamed: 0,features,imp
6,时间戳,466
8,day,376
12,辐照度-mean_everyday,327
5,湿度,271
21,温度*压强,240
18,辐照度-mean_everyweek,238
19,time_10flag_辐照度_mean,230
13,湿度-mean_everyday,230
2,风向,212
3,温度,209


In [32]:
X_train_2 = data_2[:len(train_2)].drop(['时间','实发辐照度','实际功率','id'],axis=1)

y_train_2 = data_2[:len(train_2)]['实际功率']

X_test_2 = data_2[len(train_2):].drop(['时间','实发辐照度','实际功率','id'],axis=1)

In [33]:
from sklearn.metrics import mean_absolute_error
def my_metric_2(y_true,y_pred): 
    y_true_2 = y_true[y_true>=10*0.03]
    y_pred_2 = y_pred[y_true>=10*0.03]
    score = mean_absolute_error(y_true_2,y_pred_2)/10.0
    return 'my_metric_2',score,False  # False表示score不是越大越好(is_bigger_better)

In [34]:
prediction_2 = test_2[['id']]

X_train_2_loc = X_train_2.values
X_test_2_loc = X_test_2.values
y_train_2_loc = y_train_2.values

model_2 = lgb.LGBMRegressor('gbdt',num_leaves=41,max_depth=-1,learning_rate=0.05,n_estimators=200,max_bin=255,subsample_for_bin=200000,
                         objective='regression',min_split_gain=0,min_child_weight=0.01,min_child_samples=20,subsample=1,subsample_freq=1,
                         colsample_bytree=1, reg_alpha=0, reg_lambda=0, random_state=2018,n_jobs=-1)
# 五折交叉训练，构造五个模型
skf=list(StratifiedKFold(y_train_2_loc, n_folds=10, shuffle=True, random_state=2018))
baseloss = []
loss = 0
for i, (train_index, test_index) in enumerate(skf):
    print("Fold", i)
    lgb_model = model_2.fit(X_train_2_loc[train_index], y_train_2_loc[train_index],
                          eval_names =['train','valid'],
                          eval_set=[(X_train_2_loc[train_index], y_train_2_loc[train_index]), 
                                    (X_train_2_loc[test_index], y_train_2_loc[test_index])],
                          eval_metric=my_metric_2,
                          early_stopping_rounds=100)
    baseloss.append(lgb_model.best_score_['valid']['my_metric_2'])
    loss += lgb_model.best_score_['valid']['my_metric_2']
    test_pred= lgb_model.predict(X_test_2_loc, num_iteration=lgb_model.best_iteration_)
#     print('test mean:', test_pred.mean())
    prediction_2['predict_%s' % str(i)] = test_pred
print('mae:', baseloss, loss/10)

p = prediction_2.drop(['id'],axis=1)
prediction_2['prediction']=p.mean(axis=1)



Fold 0
[1]	train's l2: 7.38162	train's my_metric_2: 0.294946	valid's l2: 7.71642	valid's my_metric_2: 0.293716
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.78275	train's my_metric_2: 0.283273	valid's l2: 7.09347	valid's my_metric_2: 0.282188
[3]	train's l2: 6.24027	train's my_metric_2: 0.272368	valid's l2: 6.5314	valid's my_metric_2: 0.271468
[4]	train's l2: 5.75036	train's my_metric_2: 0.262138	valid's l2: 6.02373	valid's my_metric_2: 0.261466
[5]	train's l2: 5.30576	train's my_metric_2: 0.252481	valid's l2: 5.56508	valid's my_metric_2: 0.252114
[6]	train's l2: 4.90486	train's my_metric_2: 0.243446	valid's l2: 5.15125	valid's my_metric_2: 0.243373
[7]	train's l2: 4.54076	train's my_metric_2: 0.234883	valid's l2: 4.77405	valid's my_metric_2: 0.23504
[8]	train's l2: 4.20985	train's my_metric_2: 0.226886	valid's l2: 4.43065	valid's my_metric_2: 0.227368
[9]	train's l2: 3.91297	train's my_metric_2: 0.219494	valid's l2: 4.12507	valid's my_metric_2: 0.220

[94]	train's l2: 0.590278	train's my_metric_2: 0.0878148	valid's l2: 0.753805	valid's my_metric_2: 0.094446
[95]	train's l2: 0.587655	train's my_metric_2: 0.0875754	valid's l2: 0.751797	valid's my_metric_2: 0.0942869
[96]	train's l2: 0.584556	train's my_metric_2: 0.0873059	valid's l2: 0.749414	valid's my_metric_2: 0.094107
[97]	train's l2: 0.582309	train's my_metric_2: 0.0870931	valid's l2: 0.747473	valid's my_metric_2: 0.0939436
[98]	train's l2: 0.579657	train's my_metric_2: 0.0868499	valid's l2: 0.744921	valid's my_metric_2: 0.093726
[99]	train's l2: 0.577424	train's my_metric_2: 0.0866329	valid's l2: 0.743218	valid's my_metric_2: 0.0935774
[100]	train's l2: 0.573728	train's my_metric_2: 0.0863498	valid's l2: 0.739985	valid's my_metric_2: 0.093378
[101]	train's l2: 0.56992	train's my_metric_2: 0.0860662	valid's l2: 0.735035	valid's my_metric_2: 0.0931154
[102]	train's l2: 0.567607	train's my_metric_2: 0.0858528	valid's l2: 0.732553	valid's my_metric_2: 0.0929348
[103]	train's l2: 0.5

[186]	train's l2: 0.424027	train's my_metric_2: 0.0730864	valid's l2: 0.601195	valid's my_metric_2: 0.0830639
[187]	train's l2: 0.42263	train's my_metric_2: 0.0729845	valid's l2: 0.600217	valid's my_metric_2: 0.0830027
[188]	train's l2: 0.420756	train's my_metric_2: 0.0728294	valid's l2: 0.598163	valid's my_metric_2: 0.0828781
[189]	train's l2: 0.419592	train's my_metric_2: 0.0727208	valid's l2: 0.597413	valid's my_metric_2: 0.0828254
[190]	train's l2: 0.418047	train's my_metric_2: 0.0725839	valid's l2: 0.596143	valid's my_metric_2: 0.0826961
[191]	train's l2: 0.416995	train's my_metric_2: 0.0724917	valid's l2: 0.59532	valid's my_metric_2: 0.0826162
[192]	train's l2: 0.415762	train's my_metric_2: 0.0723844	valid's l2: 0.594027	valid's my_metric_2: 0.0825398
[193]	train's l2: 0.41498	train's my_metric_2: 0.0723151	valid's l2: 0.593761	valid's my_metric_2: 0.0825208
[194]	train's l2: 0.414087	train's my_metric_2: 0.0722271	valid's l2: 0.593305	valid's my_metric_2: 0.082478
[195]	train's 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 7.38584	train's my_metric_2: 0.294163	valid's l2: 7.67108	valid's my_metric_2: 0.295888
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.78814	train's my_metric_2: 0.282634	valid's l2: 7.05342	valid's my_metric_2: 0.284315
[3]	train's l2: 6.24481	train's my_metric_2: 0.271753	valid's l2: 6.48801	valid's my_metric_2: 0.273269
[4]	train's l2: 5.75337	train's my_metric_2: 0.261479	valid's l2: 5.98265	valid's my_metric_2: 0.263056
[5]	train's l2: 5.3098	train's my_metric_2: 0.251903	valid's l2: 5.52488	valid's my_metric_2: 0.253464
[6]	train's l2: 4.90839	train's my_metric_2: 0.242943	valid's l2: 5.1119	valid's my_metric_2: 0.244552
[7]	train's l2: 4.54472	train's my_metric_2: 0.234487	valid's l2: 4.73796	valid's my_metric_2: 0.236253
[8]	train's l2: 4.21484	train's my_metric_2: 0.22649	valid's l2: 4.40233	valid's my_metric_2: 0.228489
[9]	train's l2: 3.91748	train's my_metric_2: 0.219153	valid's l2: 4.09717	valid's my_metric_2: 0.221285
[10]

[80]	train's l2: 0.648663	train's my_metric_2: 0.092299	valid's l2: 0.804629	valid's my_metric_2: 0.0993059
[81]	train's l2: 0.644262	train's my_metric_2: 0.0919196	valid's l2: 0.801501	valid's my_metric_2: 0.0990195
[82]	train's l2: 0.639451	train's my_metric_2: 0.0915617	valid's l2: 0.797105	valid's my_metric_2: 0.0987422
[83]	train's l2: 0.634358	train's my_metric_2: 0.0911594	valid's l2: 0.792557	valid's my_metric_2: 0.0984022
[84]	train's l2: 0.630777	train's my_metric_2: 0.0908639	valid's l2: 0.788738	valid's my_metric_2: 0.0981052
[85]	train's l2: 0.626603	train's my_metric_2: 0.0904836	valid's l2: 0.784417	valid's my_metric_2: 0.0977384
[86]	train's l2: 0.621848	train's my_metric_2: 0.0901194	valid's l2: 0.780299	valid's my_metric_2: 0.0974472
[87]	train's l2: 0.61852	train's my_metric_2: 0.0898446	valid's l2: 0.778056	valid's my_metric_2: 0.0972829
[88]	train's l2: 0.613718	train's my_metric_2: 0.089479	valid's l2: 0.773811	valid's my_metric_2: 0.0969453
[89]	train's l2: 0.608

[172]	train's l2: 0.45182	train's my_metric_2: 0.0753775	valid's l2: 0.624266	valid's my_metric_2: 0.0854896
[173]	train's l2: 0.450681	train's my_metric_2: 0.0752824	valid's l2: 0.623416	valid's my_metric_2: 0.0854258
[174]	train's l2: 0.44944	train's my_metric_2: 0.0751538	valid's l2: 0.622498	valid's my_metric_2: 0.0853313
[175]	train's l2: 0.447601	train's my_metric_2: 0.0750101	valid's l2: 0.620476	valid's my_metric_2: 0.0851954
[176]	train's l2: 0.446701	train's my_metric_2: 0.0749317	valid's l2: 0.619842	valid's my_metric_2: 0.0851708
[177]	train's l2: 0.44567	train's my_metric_2: 0.0748309	valid's l2: 0.619081	valid's my_metric_2: 0.0851132
[178]	train's l2: 0.444556	train's my_metric_2: 0.0747269	valid's l2: 0.618645	valid's my_metric_2: 0.0850741
[179]	train's l2: 0.443551	train's my_metric_2: 0.0746325	valid's l2: 0.618045	valid's my_metric_2: 0.0850214
[180]	train's l2: 0.442515	train's my_metric_2: 0.0745435	valid's l2: 0.617393	valid's my_metric_2: 0.0849754
[181]	train's

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 2
[1]	train's l2: 7.39768	train's my_metric_2: 0.294106	valid's l2: 7.60202	valid's my_metric_2: 0.296221
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.79849	train's my_metric_2: 0.282526	valid's l2: 6.9991	valid's my_metric_2: 0.284902
[3]	train's l2: 6.25712	train's my_metric_2: 0.271818	valid's l2: 6.45724	valid's my_metric_2: 0.274538
[4]	train's l2: 5.76663	train's my_metric_2: 0.261635	valid's l2: 5.96251	valid's my_metric_2: 0.264526
[5]	train's l2: 5.3229	train's my_metric_2: 0.25209	valid's l2: 5.5178	valid's my_metric_2: 0.255161
[6]	train's l2: 4.92056	train's my_metric_2: 0.243113	valid's l2: 5.11246	valid's my_metric_2: 0.246251
[7]	train's l2: 4.5549	train's my_metric_2: 0.234632	valid's l2: 4.74572	valid's my_metric_2: 0.237929
[8]	train's l2: 4.22446	train's my_metric_2: 0.226705	valid's l2: 4.41124	valid's my_metric_2: 0.230077
[9]	train's l2: 3.92629	train's my_metric_2: 0.219352	valid's l2: 4.11266	valid's my_metric_2: 0.222935

[85]	train's l2: 0.62006	train's my_metric_2: 0.0901074	valid's l2: 0.785506	valid's my_metric_2: 0.0983885
[86]	train's l2: 0.616898	train's my_metric_2: 0.0898632	valid's l2: 0.782692	valid's my_metric_2: 0.0981731
[87]	train's l2: 0.611487	train's my_metric_2: 0.0894358	valid's l2: 0.777234	valid's my_metric_2: 0.0977775
[88]	train's l2: 0.608275	train's my_metric_2: 0.0891763	valid's l2: 0.774371	valid's my_metric_2: 0.0975634
[89]	train's l2: 0.604461	train's my_metric_2: 0.0888271	valid's l2: 0.770178	valid's my_metric_2: 0.097205
[90]	train's l2: 0.601275	train's my_metric_2: 0.0885484	valid's l2: 0.768598	valid's my_metric_2: 0.0970566
[91]	train's l2: 0.598207	train's my_metric_2: 0.0882728	valid's l2: 0.76431	valid's my_metric_2: 0.0967558
[92]	train's l2: 0.595455	train's my_metric_2: 0.088029	valid's l2: 0.762698	valid's my_metric_2: 0.0966252
[93]	train's l2: 0.592091	train's my_metric_2: 0.087748	valid's l2: 0.759637	valid's my_metric_2: 0.096369
[94]	train's l2: 0.589195

[177]	train's l2: 0.439247	train's my_metric_2: 0.0745893	valid's l2: 0.629166	valid's my_metric_2: 0.0859895
[178]	train's l2: 0.437962	train's my_metric_2: 0.0744794	valid's l2: 0.628127	valid's my_metric_2: 0.0859328
[179]	train's l2: 0.436599	train's my_metric_2: 0.0743699	valid's l2: 0.62753	valid's my_metric_2: 0.0858649
[180]	train's l2: 0.435589	train's my_metric_2: 0.0742515	valid's l2: 0.62634	valid's my_metric_2: 0.0857397
[181]	train's l2: 0.434442	train's my_metric_2: 0.0741461	valid's l2: 0.625216	valid's my_metric_2: 0.085638
[182]	train's l2: 0.433129	train's my_metric_2: 0.0740257	valid's l2: 0.62426	valid's my_metric_2: 0.085559
[183]	train's l2: 0.431284	train's my_metric_2: 0.073884	valid's l2: 0.622811	valid's my_metric_2: 0.0854548
[184]	train's l2: 0.429977	train's my_metric_2: 0.0737683	valid's l2: 0.622211	valid's my_metric_2: 0.0853915
[185]	train's l2: 0.42903	train's my_metric_2: 0.073666	valid's l2: 0.62126	valid's my_metric_2: 0.0852871
[186]	train's l2: 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3
[1]	train's l2: 7.40574	train's my_metric_2: 0.293914	valid's l2: 7.52763	valid's my_metric_2: 0.295074
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.80496	train's my_metric_2: 0.282357	valid's l2: 6.92197	valid's my_metric_2: 0.283478
[3]	train's l2: 6.25968	train's my_metric_2: 0.271386	valid's l2: 6.37312	valid's my_metric_2: 0.272571
[4]	train's l2: 5.76877	train's my_metric_2: 0.261215	valid's l2: 5.87908	valid's my_metric_2: 0.262386
[5]	train's l2: 5.32295	train's my_metric_2: 0.251625	valid's l2: 5.43409	valid's my_metric_2: 0.252953
[6]	train's l2: 4.91963	train's my_metric_2: 0.242607	valid's l2: 5.03186	valid's my_metric_2: 0.244099
[7]	train's l2: 4.55517	train's my_metric_2: 0.234274	valid's l2: 4.66306	valid's my_metric_2: 0.235676
[8]	train's l2: 4.2243	train's my_metric_2: 0.226313	valid's l2: 4.33404	valid's my_metric_2: 0.227834
[9]	train's l2: 3.92516	train's my_metric_2: 0.218946	valid's l2: 4.03579	valid's my_metric_2: 0.22

[93]	train's l2: 0.595452	train's my_metric_2: 0.0879193	valid's l2: 0.76037	valid's my_metric_2: 0.0946095
[94]	train's l2: 0.591227	train's my_metric_2: 0.0876176	valid's l2: 0.756831	valid's my_metric_2: 0.0943704
[95]	train's l2: 0.588198	train's my_metric_2: 0.087357	valid's l2: 0.754205	valid's my_metric_2: 0.0941476
[96]	train's l2: 0.584941	train's my_metric_2: 0.087071	valid's l2: 0.751246	valid's my_metric_2: 0.093934
[97]	train's l2: 0.582334	train's my_metric_2: 0.086834	valid's l2: 0.749042	valid's my_metric_2: 0.0937153
[98]	train's l2: 0.578651	train's my_metric_2: 0.0865344	valid's l2: 0.74602	valid's my_metric_2: 0.0935076
[99]	train's l2: 0.575712	train's my_metric_2: 0.0863	valid's l2: 0.743171	valid's my_metric_2: 0.093322
[100]	train's l2: 0.573083	train's my_metric_2: 0.0860891	valid's l2: 0.740677	valid's my_metric_2: 0.0931332
[101]	train's l2: 0.570212	train's my_metric_2: 0.0858558	valid's l2: 0.73723	valid's my_metric_2: 0.0929179
[102]	train's l2: 0.566979	t

[184]	train's l2: 0.429265	train's my_metric_2: 0.0734109	valid's l2: 0.614991	valid's my_metric_2: 0.0835958
[185]	train's l2: 0.428413	train's my_metric_2: 0.0733419	valid's l2: 0.614788	valid's my_metric_2: 0.0835751
[186]	train's l2: 0.427265	train's my_metric_2: 0.0732398	valid's l2: 0.613425	valid's my_metric_2: 0.0834861
[187]	train's l2: 0.425804	train's my_metric_2: 0.0730872	valid's l2: 0.611824	valid's my_metric_2: 0.0833277
[188]	train's l2: 0.424922	train's my_metric_2: 0.0730029	valid's l2: 0.611412	valid's my_metric_2: 0.0832913
[189]	train's l2: 0.423912	train's my_metric_2: 0.072919	valid's l2: 0.610669	valid's my_metric_2: 0.0832194
[190]	train's l2: 0.422807	train's my_metric_2: 0.072797	valid's l2: 0.609628	valid's my_metric_2: 0.0831302
[191]	train's l2: 0.422059	train's my_metric_2: 0.07272	valid's l2: 0.609078	valid's my_metric_2: 0.0830866
[192]	train's l2: 0.420905	train's my_metric_2: 0.0726102	valid's l2: 0.607641	valid's my_metric_2: 0.0829784
[193]	train's 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 7.41397	train's my_metric_2: 0.29368	valid's l2: 7.46295	valid's my_metric_2: 0.295476
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.81198	train's my_metric_2: 0.282028	valid's l2: 6.86209	valid's my_metric_2: 0.284073
[3]	train's l2: 6.26886	train's my_metric_2: 0.271129	valid's l2: 6.32482	valid's my_metric_2: 0.273648
[4]	train's l2: 5.77917	train's my_metric_2: 0.260976	valid's l2: 5.84069	valid's my_metric_2: 0.263993
[5]	train's l2: 5.33681	train's my_metric_2: 0.251483	valid's l2: 5.39847	valid's my_metric_2: 0.254783
[6]	train's l2: 4.93253	train's my_metric_2: 0.242376	valid's l2: 4.99814	valid's my_metric_2: 0.246059
[7]	train's l2: 4.56698	train's my_metric_2: 0.233947	valid's l2: 4.63486	valid's my_metric_2: 0.237876
[8]	train's l2: 4.23584	train's my_metric_2: 0.225968	valid's l2: 4.3055	valid's my_metric_2: 0.230105
[9]	train's l2: 3.93714	train's my_metric_2: 0.218549	valid's l2: 4.00664	valid's my_metric_2: 0.222749
[10

[80]	train's l2: 0.645027	train's my_metric_2: 0.091871	valid's l2: 0.772087	valid's my_metric_2: 0.101285
[81]	train's l2: 0.640882	train's my_metric_2: 0.0915379	valid's l2: 0.768918	valid's my_metric_2: 0.101047
[82]	train's l2: 0.636193	train's my_metric_2: 0.0911257	valid's l2: 0.764222	valid's my_metric_2: 0.100684
[83]	train's l2: 0.631572	train's my_metric_2: 0.0907257	valid's l2: 0.759686	valid's my_metric_2: 0.100323
[84]	train's l2: 0.627577	train's my_metric_2: 0.0903985	valid's l2: 0.75639	valid's my_metric_2: 0.100073
[85]	train's l2: 0.622692	train's my_metric_2: 0.0900512	valid's l2: 0.752127	valid's my_metric_2: 0.0997721
[86]	train's l2: 0.618451	train's my_metric_2: 0.0896787	valid's l2: 0.74804	valid's my_metric_2: 0.09943
[87]	train's l2: 0.61492	train's my_metric_2: 0.0893906	valid's l2: 0.745418	valid's my_metric_2: 0.099232
[88]	train's l2: 0.610356	train's my_metric_2: 0.0890471	valid's l2: 0.740752	valid's my_metric_2: 0.0988989
[89]	train's l2: 0.607019	train

[169]	train's l2: 0.450674	train's my_metric_2: 0.0750781	valid's l2: 0.610864	valid's my_metric_2: 0.0878919
[170]	train's l2: 0.448593	train's my_metric_2: 0.0749066	valid's l2: 0.608782	valid's my_metric_2: 0.0877393
[171]	train's l2: 0.447331	train's my_metric_2: 0.0747796	valid's l2: 0.607907	valid's my_metric_2: 0.0876494
[172]	train's l2: 0.445926	train's my_metric_2: 0.0746621	valid's l2: 0.607221	valid's my_metric_2: 0.0875924
[173]	train's l2: 0.444642	train's my_metric_2: 0.0745518	valid's l2: 0.605443	valid's my_metric_2: 0.0874766
[174]	train's l2: 0.442867	train's my_metric_2: 0.0743991	valid's l2: 0.603463	valid's my_metric_2: 0.0873219
[175]	train's l2: 0.441666	train's my_metric_2: 0.0743052	valid's l2: 0.602846	valid's my_metric_2: 0.087279
[176]	train's l2: 0.440237	train's my_metric_2: 0.0742062	valid's l2: 0.601551	valid's my_metric_2: 0.0872087
[177]	train's l2: 0.438936	train's my_metric_2: 0.0740856	valid's l2: 0.601159	valid's my_metric_2: 0.0871731
[178]	train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 5
[1]	train's l2: 7.42051	train's my_metric_2: 0.293761	valid's l2: 7.40572	valid's my_metric_2: 0.293748
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.81916	train's my_metric_2: 0.282164	valid's l2: 6.8015	valid's my_metric_2: 0.281786
[3]	train's l2: 6.27666	train's my_metric_2: 0.271482	valid's l2: 6.25609	valid's my_metric_2: 0.270713
[4]	train's l2: 5.78517	train's my_metric_2: 0.261333	valid's l2: 5.76524	valid's my_metric_2: 0.260386
[5]	train's l2: 5.34118	train's my_metric_2: 0.251889	valid's l2: 5.32187	valid's my_metric_2: 0.250767
[6]	train's l2: 4.93716	train's my_metric_2: 0.24286	valid's l2: 4.91485	valid's my_metric_2: 0.241443
[7]	train's l2: 4.57201	train's my_metric_2: 0.234447	valid's l2: 4.5483	valid's my_metric_2: 0.232885
[8]	train's l2: 4.24259	train's my_metric_2: 0.226656	valid's l2: 4.22076	valid's my_metric_2: 0.225039
[9]	train's l2: 3.94309	train's my_metric_2: 0.219234	valid's l2: 3.92317	valid's my_metric_2: 0.2177

[95]	train's l2: 0.598608	train's my_metric_2: 0.087882	valid's l2: 0.653964	valid's my_metric_2: 0.0911173
[96]	train's l2: 0.595869	train's my_metric_2: 0.0876387	valid's l2: 0.652444	valid's my_metric_2: 0.0909485
[97]	train's l2: 0.593053	train's my_metric_2: 0.0874147	valid's l2: 0.64957	valid's my_metric_2: 0.0907627
[98]	train's l2: 0.590799	train's my_metric_2: 0.0872078	valid's l2: 0.647991	valid's my_metric_2: 0.0906002
[99]	train's l2: 0.587861	train's my_metric_2: 0.0869539	valid's l2: 0.646168	valid's my_metric_2: 0.0904114
[100]	train's l2: 0.585419	train's my_metric_2: 0.0867447	valid's l2: 0.644168	valid's my_metric_2: 0.0902654
[101]	train's l2: 0.582267	train's my_metric_2: 0.0864608	valid's l2: 0.641308	valid's my_metric_2: 0.0900661
[102]	train's l2: 0.579309	train's my_metric_2: 0.0862336	valid's l2: 0.63845	valid's my_metric_2: 0.0898891
[103]	train's l2: 0.577187	train's my_metric_2: 0.0860394	valid's l2: 0.636677	valid's my_metric_2: 0.0897509
[104]	train's l2: 

[188]	train's l2: 0.437613	train's my_metric_2: 0.0737218	valid's l2: 0.539213	valid's my_metric_2: 0.0813717
[189]	train's l2: 0.436472	train's my_metric_2: 0.0736138	valid's l2: 0.53775	valid's my_metric_2: 0.0812546
[190]	train's l2: 0.435111	train's my_metric_2: 0.0735059	valid's l2: 0.536543	valid's my_metric_2: 0.0811854
[191]	train's l2: 0.43373	train's my_metric_2: 0.0733673	valid's l2: 0.535895	valid's my_metric_2: 0.0811221
[192]	train's l2: 0.432181	train's my_metric_2: 0.0732264	valid's l2: 0.535049	valid's my_metric_2: 0.0810457
[193]	train's l2: 0.431292	train's my_metric_2: 0.0731425	valid's l2: 0.534811	valid's my_metric_2: 0.0810037
[194]	train's l2: 0.430604	train's my_metric_2: 0.0730691	valid's l2: 0.534251	valid's my_metric_2: 0.0809627
[195]	train's l2: 0.429083	train's my_metric_2: 0.0729248	valid's l2: 0.53257	valid's my_metric_2: 0.0808293
[196]	train's l2: 0.42792	train's my_metric_2: 0.0728213	valid's l2: 0.53144	valid's my_metric_2: 0.0807391
[197]	train's l

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 7.43749	train's my_metric_2: 0.29357	valid's l2: 7.23648	valid's my_metric_2: 0.291548
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 6.83706	train's my_metric_2: 0.282078	valid's l2: 6.6482	valid's my_metric_2: 0.280063
[3]	train's l2: 6.2916	train's my_metric_2: 0.271235	valid's l2: 6.11015	valid's my_metric_2: 0.268995
[4]	train's l2: 5.79909	train's my_metric_2: 0.261191	valid's l2: 5.62778	valid's my_metric_2: 0.258819
[5]	train's l2: 5.3528	train's my_metric_2: 0.251718	valid's l2: 5.18846	valid's my_metric_2: 0.249184
[6]	train's l2: 4.94864	train's my_metric_2: 0.242714	valid's l2: 4.79073	valid's my_metric_2: 0.240024
[7]	train's l2: 4.58173	train's my_metric_2: 0.234349	valid's l2: 4.43057	valid's my_metric_2: 0.231607
[8]	train's l2: 4.24925	train's my_metric_2: 0.226429	valid's l2: 4.10288	valid's my_metric_2: 0.223543
[9]	train's l2: 3.94774	train's my_metric_2: 0.219093	valid's l2: 3.80772	valid's my_metric_2: 0.216219
[10]	

[83]	train's l2: 0.636623	train's my_metric_2: 0.0909825	valid's l2: 0.684025	valid's my_metric_2: 0.0980154
[84]	train's l2: 0.633136	train's my_metric_2: 0.0906675	valid's l2: 0.681566	valid's my_metric_2: 0.0977381
[85]	train's l2: 0.629368	train's my_metric_2: 0.0903455	valid's l2: 0.67905	valid's my_metric_2: 0.097484
[86]	train's l2: 0.625252	train's my_metric_2: 0.0899903	valid's l2: 0.675909	valid's my_metric_2: 0.0971904
[87]	train's l2: 0.621021	train's my_metric_2: 0.0896206	valid's l2: 0.67217	valid's my_metric_2: 0.096879
[88]	train's l2: 0.617723	train's my_metric_2: 0.0893387	valid's l2: 0.669144	valid's my_metric_2: 0.0966006
[89]	train's l2: 0.614792	train's my_metric_2: 0.089085	valid's l2: 0.667769	valid's my_metric_2: 0.0964536
[90]	train's l2: 0.610043	train's my_metric_2: 0.0887062	valid's l2: 0.663	valid's my_metric_2: 0.0960744
[91]	train's l2: 0.606454	train's my_metric_2: 0.0883853	valid's l2: 0.660861	valid's my_metric_2: 0.0958656
[92]	train's l2: 0.603499	t

[173]	train's l2: 0.448548	train's my_metric_2: 0.0747974	valid's l2: 0.542342	valid's my_metric_2: 0.0858076
[174]	train's l2: 0.447774	train's my_metric_2: 0.0747155	valid's l2: 0.541695	valid's my_metric_2: 0.0857205
[175]	train's l2: 0.446966	train's my_metric_2: 0.0746472	valid's l2: 0.541278	valid's my_metric_2: 0.0856788
[176]	train's l2: 0.44564	train's my_metric_2: 0.0745213	valid's l2: 0.540223	valid's my_metric_2: 0.0855769
[177]	train's l2: 0.444053	train's my_metric_2: 0.0743799	valid's l2: 0.539213	valid's my_metric_2: 0.0854823
[178]	train's l2: 0.442132	train's my_metric_2: 0.0742294	valid's l2: 0.53761	valid's my_metric_2: 0.0853804
[179]	train's l2: 0.440887	train's my_metric_2: 0.0741175	valid's l2: 0.537006	valid's my_metric_2: 0.0853192
[180]	train's l2: 0.439417	train's my_metric_2: 0.0739736	valid's l2: 0.535749	valid's my_metric_2: 0.0852052
[181]	train's l2: 0.437914	train's my_metric_2: 0.0738702	valid's l2: 0.534504	valid's my_metric_2: 0.0850965
[182]	train'

[71]	train's l2: 0.6953	train's my_metric_2: 0.0954842	valid's l2: 0.756757	valid's my_metric_2: 0.103118
[72]	train's l2: 0.689707	train's my_metric_2: 0.0950536	valid's l2: 0.751244	valid's my_metric_2: 0.10267
[73]	train's l2: 0.684005	train's my_metric_2: 0.094606	valid's l2: 0.746531	valid's my_metric_2: 0.102218
[74]	train's l2: 0.678897	train's my_metric_2: 0.0941967	valid's l2: 0.742388	valid's my_metric_2: 0.101874
[75]	train's l2: 0.673629	train's my_metric_2: 0.0937156	valid's l2: 0.737476	valid's my_metric_2: 0.101418
[76]	train's l2: 0.668089	train's my_metric_2: 0.0932858	valid's l2: 0.732747	valid's my_metric_2: 0.101018
[77]	train's l2: 0.662519	train's my_metric_2: 0.0928739	valid's l2: 0.728204	valid's my_metric_2: 0.100668
[78]	train's l2: 0.656638	train's my_metric_2: 0.0924043	valid's l2: 0.722796	valid's my_metric_2: 0.100211
[79]	train's l2: 0.651799	train's my_metric_2: 0.0920035	valid's l2: 0.71791	valid's my_metric_2: 0.0998145
[80]	train's l2: 0.647173	train'

[164]	train's l2: 0.462917	train's my_metric_2: 0.0759904	valid's l2: 0.564681	valid's my_metric_2: 0.0865348
[165]	train's l2: 0.461952	train's my_metric_2: 0.0759091	valid's l2: 0.564284	valid's my_metric_2: 0.0865103
[166]	train's l2: 0.460637	train's my_metric_2: 0.0758013	valid's l2: 0.563097	valid's my_metric_2: 0.0864237
[167]	train's l2: 0.459213	train's my_metric_2: 0.07567	valid's l2: 0.561316	valid's my_metric_2: 0.0862689
[168]	train's l2: 0.457792	train's my_metric_2: 0.0755424	valid's l2: 0.560232	valid's my_metric_2: 0.0861861
[169]	train's l2: 0.455866	train's my_metric_2: 0.0753915	valid's l2: 0.55834	valid's my_metric_2: 0.0860149
[170]	train's l2: 0.45449	train's my_metric_2: 0.075277	valid's l2: 0.557054	valid's my_metric_2: 0.0859194
[171]	train's l2: 0.453286	train's my_metric_2: 0.0751753	valid's l2: 0.556203	valid's my_metric_2: 0.0858608
[172]	train's l2: 0.452139	train's my_metric_2: 0.0750735	valid's l2: 0.555781	valid's my_metric_2: 0.0858317
[173]	train's l

[53]	train's l2: 0.827043	train's my_metric_2: 0.106169	valid's l2: 0.848139	valid's my_metric_2: 0.110054
[54]	train's l2: 0.816675	train's my_metric_2: 0.105333	valid's l2: 0.838457	valid's my_metric_2: 0.109218
[55]	train's l2: 0.80658	train's my_metric_2: 0.104586	valid's l2: 0.829545	valid's my_metric_2: 0.108505
[56]	train's l2: 0.795768	train's my_metric_2: 0.103789	valid's l2: 0.817858	valid's my_metric_2: 0.107728
[57]	train's l2: 0.786604	train's my_metric_2: 0.103124	valid's l2: 0.809743	valid's my_metric_2: 0.107078
[58]	train's l2: 0.778867	train's my_metric_2: 0.102504	valid's l2: 0.803786	valid's my_metric_2: 0.106557
[59]	train's l2: 0.770311	train's my_metric_2: 0.101851	valid's l2: 0.796165	valid's my_metric_2: 0.105998
[60]	train's l2: 0.762328	train's my_metric_2: 0.101214	valid's l2: 0.788166	valid's my_metric_2: 0.105327
[61]	train's l2: 0.754889	train's my_metric_2: 0.100652	valid's l2: 0.781688	valid's my_metric_2: 0.104805
[62]	train's l2: 0.746704	train's my_m

[135]	train's l2: 0.503728	train's my_metric_2: 0.0796423	valid's l2: 0.574105	valid's my_metric_2: 0.0872488
[136]	train's l2: 0.501894	train's my_metric_2: 0.0794788	valid's l2: 0.572326	valid's my_metric_2: 0.0871122
[137]	train's l2: 0.500779	train's my_metric_2: 0.0793617	valid's l2: 0.571925	valid's my_metric_2: 0.0870643
[138]	train's l2: 0.498102	train's my_metric_2: 0.0791297	valid's l2: 0.569271	valid's my_metric_2: 0.0868337
[139]	train's l2: 0.496892	train's my_metric_2: 0.0790204	valid's l2: 0.568427	valid's my_metric_2: 0.0867699
[140]	train's l2: 0.495689	train's my_metric_2: 0.078905	valid's l2: 0.567727	valid's my_metric_2: 0.0866913
[141]	train's l2: 0.493451	train's my_metric_2: 0.0786984	valid's l2: 0.565733	valid's my_metric_2: 0.0865722
[142]	train's l2: 0.492368	train's my_metric_2: 0.0785746	valid's l2: 0.565255	valid's my_metric_2: 0.0864962
[143]	train's l2: 0.491388	train's my_metric_2: 0.0784876	valid's l2: 0.564538	valid's my_metric_2: 0.0864302
[144]	train

[28]	train's l2: 1.41399	train's my_metric_2: 0.140008	valid's l2: 1.35387	valid's my_metric_2: 0.140468
[29]	train's l2: 1.36456	train's my_metric_2: 0.137732	valid's l2: 1.30608	valid's my_metric_2: 0.138182
[30]	train's l2: 1.32136	train's my_metric_2: 0.135664	valid's l2: 1.26468	valid's my_metric_2: 0.136073
[31]	train's l2: 1.27963	train's my_metric_2: 0.133666	valid's l2: 1.22592	valid's my_metric_2: 0.134091
[32]	train's l2: 1.24258	train's my_metric_2: 0.131828	valid's l2: 1.19246	valid's my_metric_2: 0.132389
[33]	train's l2: 1.20751	train's my_metric_2: 0.13005	valid's l2: 1.16181	valid's my_metric_2: 0.130767
[34]	train's l2: 1.17583	train's my_metric_2: 0.128406	valid's l2: 1.13333	valid's my_metric_2: 0.129176
[35]	train's l2: 1.14527	train's my_metric_2: 0.126764	valid's l2: 1.10593	valid's my_metric_2: 0.127618
[36]	train's l2: 1.11718	train's my_metric_2: 0.125217	valid's l2: 1.0815	valid's my_metric_2: 0.126226
[37]	train's l2: 1.09081	train's my_metric_2: 0.12371	val

[111]	train's l2: 0.549492	train's my_metric_2: 0.083775	valid's l2: 0.597216	valid's my_metric_2: 0.0893157
[112]	train's l2: 0.547415	train's my_metric_2: 0.083588	valid's l2: 0.595661	valid's my_metric_2: 0.0891964
[113]	train's l2: 0.54492	train's my_metric_2: 0.0833608	valid's l2: 0.593697	valid's my_metric_2: 0.089015
[114]	train's l2: 0.543197	train's my_metric_2: 0.0832052	valid's l2: 0.592479	valid's my_metric_2: 0.0889095
[115]	train's l2: 0.540993	train's my_metric_2: 0.0830055	valid's l2: 0.590744	valid's my_metric_2: 0.0887687
[116]	train's l2: 0.539053	train's my_metric_2: 0.0828474	valid's l2: 0.589213	valid's my_metric_2: 0.0886454
[117]	train's l2: 0.536531	train's my_metric_2: 0.0826332	valid's l2: 0.587551	valid's my_metric_2: 0.0884968
[118]	train's l2: 0.534988	train's my_metric_2: 0.0824777	valid's l2: 0.586512	valid's my_metric_2: 0.0883891
[119]	train's l2: 0.533497	train's my_metric_2: 0.0823319	valid's l2: 0.585742	valid's my_metric_2: 0.0882976
[120]	train's 

mae: [0.08204059345161227, 0.08339344742602993, 0.08411866100764022, 0.08246350375570262, 0.08548353518186443, 0.08044556934389208, 0.08370410809043002, 0.08366111658674363, 0.08179221735554246, 0.08163378928935164] 0.08287365414888093


In [35]:
train_features = X_train_2.columns
model_2.feature_importances_
pd.DataFrame({'features':train_features, 'imp':model_2.feature_importances_}).sort_values('imp',ascending=False)

Unnamed: 0,features,imp
6,时间戳,829
8,day,605
21,温度*压强,570
24,辐照度_差1,435
5,湿度,423
20,风速*温度,352
15,压强-mean_everyday,349
3,温度,331
1,风速,326
0,辐照度,325


In [36]:
from sklearn.metrics import mean_absolute_error
def my_metric_3(y_true,y_pred): 
    y_true_3 = y_true[y_true>=40*0.03]
    y_pred_3 = y_pred[y_true>=40*0.03]
    score = mean_absolute_error(y_true_3,y_pred_3)/40.0
    return 'my_metric_3',score,False  # False表示score不是越大越好(is_bigger_better)

In [37]:
X_train_3 = data_3[:len(train_3)].drop(['时间','实发辐照度','实际功率','id'],axis=1)

y_train_3 = data_3[:len(train_3)]['实际功率']

X_test_3 = data_3[len(train_3):].drop(['时间','实发辐照度','实际功率','id'],axis=1)


prediction_3 = test_3[['id']]

X_train_3_loc = X_train_3.values
X_test_3_loc = X_test_3.values
y_train_3_loc = y_train_3.values

model_3 = lgb.LGBMRegressor('gbdt',num_leaves=51,max_depth=-1,learning_rate=0.05,n_estimators=200,max_bin=255,subsample_for_bin=200000,
                         objective='regression',min_split_gain=0,min_child_weight=0.01,min_child_samples=20,subsample=1,subsample_freq=1,
                         colsample_bytree=1, reg_alpha=0, reg_lambda=0, random_state=2018,n_jobs=-1)
# 五折交叉训练，构造五个模型
skf=list(StratifiedKFold(y_train_3_loc, n_folds=10, shuffle=True, random_state=2018))
baseloss = []
loss = 0
for i, (train_index, test_index) in enumerate(skf):
    print("Fold", i)
    lgb_model = model_3.fit(X_train_3_loc[train_index], y_train_3_loc[train_index],
                          eval_names =['train','valid'],
                          eval_set=[(X_train_3_loc[train_index], y_train_3_loc[train_index]), 
                                    (X_train_3_loc[test_index], y_train_3_loc[test_index])],
                          eval_metric=my_metric_3,
                          early_stopping_rounds=100)
    baseloss.append(lgb_model.best_score_['valid']['my_metric_3'])
    loss += lgb_model.best_score_['valid']['my_metric_3']
    test_pred= lgb_model.predict(X_test_3_loc, num_iteration=lgb_model.best_iteration_)
#     print('test mean:', test_pred.mean())
    prediction_3['predict_%s' % str(i)] = test_pred
print('mae:', baseloss, loss/10)

p = prediction_3.drop(['id'],axis=1)
prediction_3['prediction']=p.mean(axis=1)



Fold 0
[1]	train's l2: 109.174	train's my_metric_3: 0.273658	valid's l2: 113.228	valid's my_metric_3: 0.278053
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.043	train's my_metric_3: 0.262395	valid's l2: 103.821	valid's my_metric_3: 0.266679
[3]	train's l2: 91.7989	train's my_metric_3: 0.251781	valid's l2: 95.3325	valid's my_metric_3: 0.256023
[4]	train's l2: 84.3506	train's my_metric_3: 0.24185	valid's l2: 87.6091	valid's my_metric_3: 0.245944
[5]	train's l2: 77.5673	train's my_metric_3: 0.232464	valid's l2: 80.5695	valid's my_metric_3: 0.236463
[6]	train's l2: 71.4519	train's my_metric_3: 0.223702	valid's l2: 74.2617	valid's my_metric_3: 0.227777
[7]	train's l2: 65.9293	train's my_metric_3: 0.215441	valid's l2: 68.5691	valid's my_metric_3: 0.219572
[8]	train's l2: 60.8943	train's my_metric_3: 0.207636	valid's l2: 63.409	valid's my_metric_3: 0.2118
[9]	train's l2: 56.3127	train's my_metric_3: 0.200315	valid's l2: 58.6844	valid's my_metric_3: 0.20438

[80]	train's l2: 6.61425	train's my_metric_3: 0.0701571	valid's l2: 8.07987	valid's my_metric_3: 0.0750092
[81]	train's l2: 6.54295	train's my_metric_3: 0.0697845	valid's l2: 8.03156	valid's my_metric_3: 0.0747526
[82]	train's l2: 6.49451	train's my_metric_3: 0.0694786	valid's l2: 7.99366	valid's my_metric_3: 0.0745491
[83]	train's l2: 6.43433	train's my_metric_3: 0.0691419	valid's l2: 7.95547	valid's my_metric_3: 0.0743587
[84]	train's l2: 6.36678	train's my_metric_3: 0.0687412	valid's l2: 7.89468	valid's my_metric_3: 0.0740004
[85]	train's l2: 6.31198	train's my_metric_3: 0.0683857	valid's l2: 7.84803	valid's my_metric_3: 0.0737143
[86]	train's l2: 6.25137	train's my_metric_3: 0.0679924	valid's l2: 7.79234	valid's my_metric_3: 0.0733815
[87]	train's l2: 6.18668	train's my_metric_3: 0.0675853	valid's l2: 7.74409	valid's my_metric_3: 0.0730885
[88]	train's l2: 6.12969	train's my_metric_3: 0.0672445	valid's l2: 7.69392	valid's my_metric_3: 0.0728059
[89]	train's l2: 6.08072	train's my_m

[177]	train's l2: 3.81974	train's my_metric_3: 0.0518309	valid's l2: 5.91815	valid's my_metric_3: 0.0626022
[178]	train's l2: 3.80531	train's my_metric_3: 0.0517308	valid's l2: 5.9081	valid's my_metric_3: 0.0625475
[179]	train's l2: 3.78523	train's my_metric_3: 0.0515986	valid's l2: 5.89247	valid's my_metric_3: 0.0624694
[180]	train's l2: 3.77443	train's my_metric_3: 0.0515098	valid's l2: 5.88544	valid's my_metric_3: 0.0624325
[181]	train's l2: 3.76094	train's my_metric_3: 0.0514152	valid's l2: 5.87634	valid's my_metric_3: 0.0623798
[182]	train's l2: 3.74638	train's my_metric_3: 0.0513004	valid's l2: 5.86922	valid's my_metric_3: 0.0623258
[183]	train's l2: 3.73002	train's my_metric_3: 0.0511816	valid's l2: 5.85526	valid's my_metric_3: 0.0622545
[184]	train's l2: 3.72064	train's my_metric_3: 0.0510956	valid's l2: 5.85104	valid's my_metric_3: 0.062214
[185]	train's l2: 3.70596	train's my_metric_3: 0.0509862	valid's l2: 5.83871	valid's my_metric_3: 0.0621336
[186]	train's l2: 3.69147	trai

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 109.371	train's my_metric_3: 0.272949	valid's l2: 111.986	valid's my_metric_3: 0.282186
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.284	train's my_metric_3: 0.261779	valid's l2: 102.682	valid's my_metric_3: 0.270494
[3]	train's l2: 92.0402	train's my_metric_3: 0.251185	valid's l2: 94.3019	valid's my_metric_3: 0.259634
[4]	train's l2: 84.5736	train's my_metric_3: 0.241244	valid's l2: 86.6467	valid's my_metric_3: 0.249362
[5]	train's l2: 77.7978	train's my_metric_3: 0.23191	valid's l2: 79.7171	valid's my_metric_3: 0.239725
[6]	train's l2: 71.676	train's my_metric_3: 0.223113	valid's l2: 73.4638	valid's my_metric_3: 0.230633
[7]	train's l2: 66.1625	train's my_metric_3: 0.214958	valid's l2: 67.8667	valid's my_metric_3: 0.222298
[8]	train's l2: 61.1016	train's my_metric_3: 0.207198	valid's l2: 62.7594	valid's my_metric_3: 0.214376
[9]	train's l2: 56.5513	train's my_metric_3: 0.199912	valid's l2: 58.1052	valid's my_metric_3: 0.206865
[10

[92]	train's l2: 5.93953	train's my_metric_3: 0.0656871	valid's l2: 7.10278	valid's my_metric_3: 0.0749883
[93]	train's l2: 5.88843	train's my_metric_3: 0.0653725	valid's l2: 7.05382	valid's my_metric_3: 0.0746908
[94]	train's l2: 5.84286	train's my_metric_3: 0.0650943	valid's l2: 7.01814	valid's my_metric_3: 0.0744329
[95]	train's l2: 5.79526	train's my_metric_3: 0.0648033	valid's l2: 6.97059	valid's my_metric_3: 0.0741733
[96]	train's l2: 5.74026	train's my_metric_3: 0.0644516	valid's l2: 6.92565	valid's my_metric_3: 0.0739067
[97]	train's l2: 5.6984	train's my_metric_3: 0.0641941	valid's l2: 6.90655	valid's my_metric_3: 0.0737441
[98]	train's l2: 5.65165	train's my_metric_3: 0.0639019	valid's l2: 6.86337	valid's my_metric_3: 0.0735304
[99]	train's l2: 5.60528	train's my_metric_3: 0.0636081	valid's l2: 6.81958	valid's my_metric_3: 0.0732663
[100]	train's l2: 5.55789	train's my_metric_3: 0.0633133	valid's l2: 6.77547	valid's my_metric_3: 0.0729863
[101]	train's l2: 5.50832	train's my_

[194]	train's l2: 3.59255	train's my_metric_3: 0.0497503	valid's l2: 5.1908	valid's my_metric_3: 0.0622893
[195]	train's l2: 3.57872	train's my_metric_3: 0.0496617	valid's l2: 5.17807	valid's my_metric_3: 0.0622044
[196]	train's l2: 3.56301	train's my_metric_3: 0.0495463	valid's l2: 5.16722	valid's my_metric_3: 0.0621208
[197]	train's l2: 3.54984	train's my_metric_3: 0.0494436	valid's l2: 5.15662	valid's my_metric_3: 0.0620439
[198]	train's l2: 3.53415	train's my_metric_3: 0.0493159	valid's l2: 5.14115	valid's my_metric_3: 0.0619313
[199]	train's l2: 3.52534	train's my_metric_3: 0.049242	valid's l2: 5.13658	valid's my_metric_3: 0.0618992
[200]	train's l2: 3.51495	train's my_metric_3: 0.0491497	valid's l2: 5.1283	valid's my_metric_3: 0.0618334
Did not meet early stopping. Best iteration is:
[200]	train's l2: 3.51495	train's my_metric_3: 0.0491497	valid's l2: 5.1283	valid's my_metric_3: 0.0618334
Fold 2
[1]	train's l2: 109.852	train's my_metric_3: 0.273973	valid's l2: 107.254	valid's my_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[4]	train's l2: 84.7674	train's my_metric_3: 0.241825	valid's l2: 83.3304	valid's my_metric_3: 0.24102
[5]	train's l2: 77.9499	train's my_metric_3: 0.232392	valid's l2: 76.785	valid's my_metric_3: 0.231893
[6]	train's l2: 71.7888	train's my_metric_3: 0.223535	valid's l2: 70.8646	valid's my_metric_3: 0.223287
[7]	train's l2: 66.1942	train's my_metric_3: 0.215274	valid's l2: 65.5975	valid's my_metric_3: 0.215487
[8]	train's l2: 61.1008	train's my_metric_3: 0.207446	valid's l2: 60.7813	valid's my_metric_3: 0.208136
[9]	train's l2: 56.5201	train's my_metric_3: 0.200075	valid's l2: 56.3672	valid's my_metric_3: 0.201057
[10]	train's l2: 52.3897	train's my_metric_3: 0.193204	valid's l2: 52.4665	valid's my_metric_3: 0.194599
[11]	train's l2: 48.5748	train's my_metric_3: 0.186618	valid's l2: 48.8379	valid's my_metric_3: 0.188496
[12]	train's l2: 45.152	train's my_metric_3: 0.18049	valid's l2: 45.5041	valid's my_metric_3: 0.182618
[13]	train's l2: 42.016	train's my_metric_3: 0.174713	valid's l2:

[95]	train's l2: 5.64963	train's my_metric_3: 0.064342	valid's l2: 8.20899	valid's my_metric_3: 0.0765827
[96]	train's l2: 5.60616	train's my_metric_3: 0.0640722	valid's l2: 8.15714	valid's my_metric_3: 0.0763133
[97]	train's l2: 5.5591	train's my_metric_3: 0.0637925	valid's l2: 8.11527	valid's my_metric_3: 0.0761021
[98]	train's l2: 5.51168	train's my_metric_3: 0.0635017	valid's l2: 8.06679	valid's my_metric_3: 0.0758284
[99]	train's l2: 5.47458	train's my_metric_3: 0.0632777	valid's l2: 8.03666	valid's my_metric_3: 0.0756618
[100]	train's l2: 5.43767	train's my_metric_3: 0.0630598	valid's l2: 8.00216	valid's my_metric_3: 0.0754855
[101]	train's l2: 5.39983	train's my_metric_3: 0.0628154	valid's l2: 7.95554	valid's my_metric_3: 0.075252
[102]	train's l2: 5.35466	train's my_metric_3: 0.0625468	valid's l2: 7.90302	valid's my_metric_3: 0.0749722
[103]	train's l2: 5.32338	train's my_metric_3: 0.0623559	valid's l2: 7.86871	valid's my_metric_3: 0.074809
[104]	train's l2: 5.28581	train's my_

[196]	train's l2: 3.43818	train's my_metric_3: 0.0491931	valid's l2: 6.07908	valid's my_metric_3: 0.0645983
[197]	train's l2: 3.42752	train's my_metric_3: 0.0491113	valid's l2: 6.07147	valid's my_metric_3: 0.0645643
[198]	train's l2: 3.4152	train's my_metric_3: 0.0490142	valid's l2: 6.06337	valid's my_metric_3: 0.0645066
[199]	train's l2: 3.40536	train's my_metric_3: 0.0489233	valid's l2: 6.0524	valid's my_metric_3: 0.0644138
[200]	train's l2: 3.38944	train's my_metric_3: 0.0488013	valid's l2: 6.03577	valid's my_metric_3: 0.0643152
Did not meet early stopping. Best iteration is:
[200]	train's l2: 3.38944	train's my_metric_3: 0.0488013	valid's l2: 6.03577	valid's my_metric_3: 0.0643152
Fold 3
[1]	train's l2: 109.761	train's my_metric_3: 0.273075	valid's l2: 108.101	valid's my_metric_3: 0.279736
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.599	train's my_metric_3: 0.26183	valid's l2: 99.1681	valid's my_metric_3: 0.268517
[3]	train's l2: 92.2739	train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[5]	train's l2: 77.9677	train's my_metric_3: 0.23192	valid's l2: 77.0749	valid's my_metric_3: 0.238563
[6]	train's l2: 71.8356	train's my_metric_3: 0.223182	valid's l2: 71.0853	valid's my_metric_3: 0.229657
[7]	train's l2: 66.228	train's my_metric_3: 0.21495	valid's l2: 65.6295	valid's my_metric_3: 0.221384
[8]	train's l2: 61.1619	train's my_metric_3: 0.207155	valid's l2: 60.6997	valid's my_metric_3: 0.213636
[9]	train's l2: 56.5582	train's my_metric_3: 0.199783	valid's l2: 56.1876	valid's my_metric_3: 0.206275
[10]	train's l2: 52.429	train's my_metric_3: 0.192958	valid's l2: 52.1447	valid's my_metric_3: 0.199429
[11]	train's l2: 48.6422	train's my_metric_3: 0.186421	valid's l2: 48.4665	valid's my_metric_3: 0.192896
[12]	train's l2: 45.228	train's my_metric_3: 0.180292	valid's l2: 45.1082	valid's my_metric_3: 0.186662
[13]	train's l2: 42.101	train's my_metric_3: 0.174543	valid's l2: 42.0759	valid's my_metric_3: 0.180904
[14]	train's l2: 39.2946	train's my_metric_3: 0.169212	valid's l2:

[96]	train's l2: 5.74136	train's my_metric_3: 0.0642539	valid's l2: 6.73327	valid's my_metric_3: 0.0716329
[97]	train's l2: 5.68797	train's my_metric_3: 0.0639706	valid's l2: 6.70472	valid's my_metric_3: 0.0714332
[98]	train's l2: 5.64421	train's my_metric_3: 0.0636939	valid's l2: 6.68269	valid's my_metric_3: 0.071257
[99]	train's l2: 5.60043	train's my_metric_3: 0.0634172	valid's l2: 6.66826	valid's my_metric_3: 0.0711419
[100]	train's l2: 5.55031	train's my_metric_3: 0.0631238	valid's l2: 6.6333	valid's my_metric_3: 0.070929
[101]	train's l2: 5.51189	train's my_metric_3: 0.0628921	valid's l2: 6.60638	valid's my_metric_3: 0.0707523
[102]	train's l2: 5.46478	train's my_metric_3: 0.0625943	valid's l2: 6.57058	valid's my_metric_3: 0.0705359
[103]	train's l2: 5.42561	train's my_metric_3: 0.0623242	valid's l2: 6.5457	valid's my_metric_3: 0.0703271
[104]	train's l2: 5.38795	train's my_metric_3: 0.0621108	valid's l2: 6.51732	valid's my_metric_3: 0.0701164
[105]	train's l2: 5.3359	train's my_

[196]	train's l2: 3.55906	train's my_metric_3: 0.049553	valid's l2: 5.2764	valid's my_metric_3: 0.0619983
[197]	train's l2: 3.54539	train's my_metric_3: 0.0494461	valid's l2: 5.26464	valid's my_metric_3: 0.061933
[198]	train's l2: 3.53731	train's my_metric_3: 0.049377	valid's l2: 5.25912	valid's my_metric_3: 0.0618953
[199]	train's l2: 3.52657	train's my_metric_3: 0.0492873	valid's l2: 5.25712	valid's my_metric_3: 0.0618572
[200]	train's l2: 3.50894	train's my_metric_3: 0.0491553	valid's l2: 5.24914	valid's my_metric_3: 0.0618251
Did not meet early stopping. Best iteration is:
[200]	train's l2: 3.50894	train's my_metric_3: 0.0491553	valid's l2: 5.24914	valid's my_metric_3: 0.0618251
Fold 4
[1]	train's l2: 109.881	train's my_metric_3: 0.273847	valid's l2: 107.133	valid's my_metric_3: 0.271711
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.708	train's my_metric_3: 0.262573	valid's l2: 98.2218	valid's my_metric_3: 0.260378
[3]	train's l2: 92.3653	train'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[4]	train's l2: 84.8434	train's my_metric_3: 0.241919	valid's l2: 82.8442	valid's my_metric_3: 0.239857
[5]	train's l2: 78.046	train's my_metric_3: 0.232529	valid's l2: 76.234	valid's my_metric_3: 0.23038
[6]	train's l2: 71.8991	train's my_metric_3: 0.223751	valid's l2: 70.3394	valid's my_metric_3: 0.221845
[7]	train's l2: 66.3313	train's my_metric_3: 0.215503	valid's l2: 64.9826	valid's my_metric_3: 0.213734
[8]	train's l2: 61.2629	train's my_metric_3: 0.207774	valid's l2: 60.0587	valid's my_metric_3: 0.205955
[9]	train's l2: 56.7051	train's my_metric_3: 0.200531	valid's l2: 55.6952	valid's my_metric_3: 0.19886
[10]	train's l2: 52.5642	train's my_metric_3: 0.193704	valid's l2: 51.7467	valid's my_metric_3: 0.192195
[11]	train's l2: 48.7759	train's my_metric_3: 0.187176	valid's l2: 48.0408	valid's my_metric_3: 0.185667
[12]	train's l2: 45.3163	train's my_metric_3: 0.180964	valid's l2: 44.6887	valid's my_metric_3: 0.179521
[13]	train's l2: 42.2021	train's my_metric_3: 0.175155	valid's l2

[94]	train's l2: 5.69217	train's my_metric_3: 0.0642894	valid's l2: 7.2695	valid's my_metric_3: 0.0744589
[95]	train's l2: 5.63667	train's my_metric_3: 0.0639638	valid's l2: 7.21802	valid's my_metric_3: 0.0741866
[96]	train's l2: 5.58763	train's my_metric_3: 0.063693	valid's l2: 7.17996	valid's my_metric_3: 0.0739918
[97]	train's l2: 5.54697	train's my_metric_3: 0.0634487	valid's l2: 7.14645	valid's my_metric_3: 0.0737911
[98]	train's l2: 5.49981	train's my_metric_3: 0.0631589	valid's l2: 7.10515	valid's my_metric_3: 0.073553
[99]	train's l2: 5.45139	train's my_metric_3: 0.0628809	valid's l2: 7.06249	valid's my_metric_3: 0.0733484
[100]	train's l2: 5.4143	train's my_metric_3: 0.0626565	valid's l2: 7.02574	valid's my_metric_3: 0.0731271
[101]	train's l2: 5.37519	train's my_metric_3: 0.0624149	valid's l2: 6.98718	valid's my_metric_3: 0.0728839
[102]	train's l2: 5.32555	train's my_metric_3: 0.0621199	valid's l2: 6.94551	valid's my_metric_3: 0.0726196
[103]	train's l2: 5.28434	train's my_m

[196]	train's l2: 3.44342	train's my_metric_3: 0.0489796	valid's l2: 5.52912	valid's my_metric_3: 0.063279
[197]	train's l2: 3.43186	train's my_metric_3: 0.0489001	valid's l2: 5.51524	valid's my_metric_3: 0.0631865
[198]	train's l2: 3.41864	train's my_metric_3: 0.0487941	valid's l2: 5.50847	valid's my_metric_3: 0.063123
[199]	train's l2: 3.40863	train's my_metric_3: 0.0487078	valid's l2: 5.50167	valid's my_metric_3: 0.0630604
[200]	train's l2: 3.39594	train's my_metric_3: 0.0486055	valid's l2: 5.49395	valid's my_metric_3: 0.0630124
Did not meet early stopping. Best iteration is:
[200]	train's l2: 3.39594	train's my_metric_3: 0.0486055	valid's l2: 5.49395	valid's my_metric_3: 0.0630124
Fold 5
[1]	train's l2: 109.921	train's my_metric_3: 0.274517	valid's l2: 106.433	valid's my_metric_3: 0.266403
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.704	train's my_metric_3: 0.263073	valid's l2: 97.5944	valid's my_metric_3: 0.255518
[3]	train's l2: 92.3563	trai

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[5]	train's l2: 77.9719	train's my_metric_3: 0.232713	valid's l2: 75.8588	valid's my_metric_3: 0.227011
[6]	train's l2: 71.7854	train's my_metric_3: 0.223769	valid's l2: 69.9539	valid's my_metric_3: 0.21857
[7]	train's l2: 66.1602	train's my_metric_3: 0.215373	valid's l2: 64.6112	valid's my_metric_3: 0.210778
[8]	train's l2: 61.1013	train's my_metric_3: 0.207576	valid's l2: 59.7787	valid's my_metric_3: 0.203431
[9]	train's l2: 56.5068	train's my_metric_3: 0.200225	valid's l2: 55.4182	valid's my_metric_3: 0.196668
[10]	train's l2: 52.3385	train's my_metric_3: 0.193254	valid's l2: 51.4792	valid's my_metric_3: 0.190303
[11]	train's l2: 48.5261	train's my_metric_3: 0.186688	valid's l2: 47.8852	valid's my_metric_3: 0.184365
[12]	train's l2: 45.0912	train's my_metric_3: 0.180496	valid's l2: 44.6522	valid's my_metric_3: 0.178697
[13]	train's l2: 41.9884	train's my_metric_3: 0.17477	valid's l2: 41.7849	valid's my_metric_3: 0.173682
[14]	train's l2: 39.1613	train's my_metric_3: 0.169262	valid's

[97]	train's l2: 5.67696	train's my_metric_3: 0.064376	valid's l2: 8.31703	valid's my_metric_3: 0.0744732
[98]	train's l2: 5.63861	train's my_metric_3: 0.0641374	valid's l2: 8.29059	valid's my_metric_3: 0.0743593
[99]	train's l2: 5.59445	train's my_metric_3: 0.0638561	valid's l2: 8.23962	valid's my_metric_3: 0.0740695
[100]	train's l2: 5.54966	train's my_metric_3: 0.0635813	valid's l2: 8.19368	valid's my_metric_3: 0.0738537
[101]	train's l2: 5.50593	train's my_metric_3: 0.0633	valid's l2: 8.14502	valid's my_metric_3: 0.0736463
[102]	train's l2: 5.47486	train's my_metric_3: 0.0631005	valid's l2: 8.11214	valid's my_metric_3: 0.0734809
[103]	train's l2: 5.44513	train's my_metric_3: 0.0629001	valid's l2: 8.0921	valid's my_metric_3: 0.0733791
[104]	train's l2: 5.40743	train's my_metric_3: 0.0626599	valid's l2: 8.06892	valid's my_metric_3: 0.0732698
[105]	train's l2: 5.36293	train's my_metric_3: 0.0623499	valid's l2: 8.02095	valid's my_metric_3: 0.0730137
[106]	train's l2: 5.32888	train's my

[200]	train's l2: 3.49087	train's my_metric_3: 0.0491759	valid's l2: 6.36089	valid's my_metric_3: 0.0635627
Did not meet early stopping. Best iteration is:
[200]	train's l2: 3.49087	train's my_metric_3: 0.0491759	valid's l2: 6.36089	valid's my_metric_3: 0.0635627
Fold 6
[1]	train's l2: 109.738	train's my_metric_3: 0.274083	valid's l2: 108.901	valid's my_metric_3: 0.271985
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.561	train's my_metric_3: 0.262727	valid's l2: 99.8759	valid's my_metric_3: 0.261209
[3]	train's l2: 92.2555	train's my_metric_3: 0.252019	valid's l2: 91.7378	valid's my_metric_3: 0.251054
[4]	train's l2: 84.724	train's my_metric_3: 0.241948	valid's l2: 84.3198	valid's my_metric_3: 0.241537
[5]	train's l2: 77.9102	train's my_metric_3: 0.232494	valid's l2: 77.655	valid's my_metric_3: 0.232713
[6]	train's l2: 71.7522	train's my_metric_3: 0.223663	valid's l2: 71.6187	valid's my_metric_3: 0.224326
[7]	train's l2: 66.1741	train's my_metric_3:

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



[8]	train's l2: 61.1125	train's my_metric_3: 0.207602	valid's l2: 61.2508	valid's my_metric_3: 0.209081
[9]	train's l2: 56.5677	train's my_metric_3: 0.200304	valid's l2: 56.7974	valid's my_metric_3: 0.20206
[10]	train's l2: 52.4071	train's my_metric_3: 0.193373	valid's l2: 52.7915	valid's my_metric_3: 0.195512
[11]	train's l2: 48.6058	train's my_metric_3: 0.186886	valid's l2: 49.1209	valid's my_metric_3: 0.189392
[12]	train's l2: 45.1502	train's my_metric_3: 0.180683	valid's l2: 45.8064	valid's my_metric_3: 0.183539
[13]	train's l2: 42.0503	train's my_metric_3: 0.174979	valid's l2: 42.7631	valid's my_metric_3: 0.177991
[14]	train's l2: 39.2056	train's my_metric_3: 0.16945	valid's l2: 40.0133	valid's my_metric_3: 0.172677
[15]	train's l2: 36.6223	train's my_metric_3: 0.164318	valid's l2: 37.5347	valid's my_metric_3: 0.167723
[16]	train's l2: 34.2865	train's my_metric_3: 0.159458	valid's l2: 35.3258	valid's my_metric_3: 0.163205
[17]	train's l2: 32.1582	train's my_metric_3: 0.154893	val

[100]	train's l2: 5.47588	train's my_metric_3: 0.0632788	valid's l2: 7.80721	valid's my_metric_3: 0.0735521
[101]	train's l2: 5.44521	train's my_metric_3: 0.0630891	valid's l2: 7.77966	valid's my_metric_3: 0.0734102
[102]	train's l2: 5.40434	train's my_metric_3: 0.0628157	valid's l2: 7.73535	valid's my_metric_3: 0.0731858
[103]	train's l2: 5.37316	train's my_metric_3: 0.0626076	valid's l2: 7.70751	valid's my_metric_3: 0.0730534
[104]	train's l2: 5.34366	train's my_metric_3: 0.0624147	valid's l2: 7.69429	valid's my_metric_3: 0.0729537
[105]	train's l2: 5.30463	train's my_metric_3: 0.0621813	valid's l2: 7.65243	valid's my_metric_3: 0.0727567
[106]	train's l2: 5.27296	train's my_metric_3: 0.0619659	valid's l2: 7.63216	valid's my_metric_3: 0.0726161
[107]	train's l2: 5.23009	train's my_metric_3: 0.0616964	valid's l2: 7.58121	valid's my_metric_3: 0.0723598
[108]	train's l2: 5.19613	train's my_metric_3: 0.0614744	valid's l2: 7.55057	valid's my_metric_3: 0.0721989
[109]	train's l2: 5.1649	tra

Fold 7
[1]	train's l2: 109.888	train's my_metric_3: 0.275051	valid's l2: 106.347	valid's my_metric_3: 0.264636
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 100.677	train's my_metric_3: 0.263683	valid's l2: 97.4115	valid's my_metric_3: 0.253479
[3]	train's l2: 92.3304	train's my_metric_3: 0.25289	valid's l2: 89.336	valid's my_metric_3: 0.242917
[4]	train's l2: 84.7631	train's my_metric_3: 0.242814	valid's l2: 81.9821	valid's my_metric_3: 0.232964
[5]	train's l2: 77.905	train's my_metric_3: 0.23329	valid's l2: 75.3788	valid's my_metric_3: 0.223695
[6]	train's l2: 71.7125	train's my_metric_3: 0.224349	valid's l2: 69.397	valid's my_metric_3: 0.215086
[7]	train's l2: 66.1174	train's my_metric_3: 0.21595	valid's l2: 64.084	valid's my_metric_3: 0.207293
[8]	train's l2: 60.9943	train's my_metric_3: 0.208031	valid's l2: 59.1312	valid's my_metric_3: 0.199804
[9]	train's l2: 56.3979	train's my_metric_3: 0.200704	valid's l2: 54.777	valid's my_metric_3: 0.192969
[1

[79]	train's l2: 6.50109	train's my_metric_3: 0.0692373	valid's l2: 9.4946	valid's my_metric_3: 0.0807526
[80]	train's l2: 6.42812	train's my_metric_3: 0.0688029	valid's l2: 9.43014	valid's my_metric_3: 0.0804382
[81]	train's l2: 6.36697	train's my_metric_3: 0.0684548	valid's l2: 9.38136	valid's my_metric_3: 0.0801996
[82]	train's l2: 6.31407	train's my_metric_3: 0.06812	valid's l2: 9.32215	valid's my_metric_3: 0.0798953
[83]	train's l2: 6.24972	train's my_metric_3: 0.0677118	valid's l2: 9.2647	valid's my_metric_3: 0.0796143
[84]	train's l2: 6.19015	train's my_metric_3: 0.0673548	valid's l2: 9.21867	valid's my_metric_3: 0.079403
[85]	train's l2: 6.13616	train's my_metric_3: 0.0670178	valid's l2: 9.16917	valid's my_metric_3: 0.0791967
[86]	train's l2: 6.06394	train's my_metric_3: 0.0666124	valid's l2: 9.11169	valid's my_metric_3: 0.0789332
[87]	train's l2: 6.00186	train's my_metric_3: 0.066271	valid's l2: 9.0607	valid's my_metric_3: 0.0787386
[88]	train's l2: 5.94162	train's my_metric_3

[180]	train's l2: 3.66309	train's my_metric_3: 0.0506604	valid's l2: 6.89752	valid's my_metric_3: 0.067592
[181]	train's l2: 3.64545	train's my_metric_3: 0.0505492	valid's l2: 6.87255	valid's my_metric_3: 0.0674967
[182]	train's l2: 3.62864	train's my_metric_3: 0.0504273	valid's l2: 6.85491	valid's my_metric_3: 0.0674041
[183]	train's l2: 3.61675	train's my_metric_3: 0.0503357	valid's l2: 6.84602	valid's my_metric_3: 0.0673374
[184]	train's l2: 3.60132	train's my_metric_3: 0.05023	valid's l2: 6.82624	valid's my_metric_3: 0.0672298
[185]	train's l2: 3.58912	train's my_metric_3: 0.0501267	valid's l2: 6.81649	valid's my_metric_3: 0.0671516
[186]	train's l2: 3.57593	train's my_metric_3: 0.0500141	valid's l2: 6.79998	valid's my_metric_3: 0.0670657
[187]	train's l2: 3.56453	train's my_metric_3: 0.0499247	valid's l2: 6.78594	valid's my_metric_3: 0.067018
[188]	train's l2: 3.54854	train's my_metric_3: 0.0498065	valid's l2: 6.77465	valid's my_metric_3: 0.0669665
[189]	train's l2: 3.53552	train'

[63]	train's l2: 8.07224	train's my_metric_3: 0.078278	valid's l2: 9.4676	valid's my_metric_3: 0.0807228
[64]	train's l2: 7.94377	train's my_metric_3: 0.0775856	valid's l2: 9.36167	valid's my_metric_3: 0.0801649
[65]	train's l2: 7.85286	train's my_metric_3: 0.0770395	valid's l2: 9.26485	valid's my_metric_3: 0.0796435
[66]	train's l2: 7.74245	train's my_metric_3: 0.0764865	valid's l2: 9.14372	valid's my_metric_3: 0.0791702
[67]	train's l2: 7.6436	train's my_metric_3: 0.0759242	valid's l2: 9.06523	valid's my_metric_3: 0.0786914
[68]	train's l2: 7.54855	train's my_metric_3: 0.0754271	valid's l2: 8.96739	valid's my_metric_3: 0.0782691
[69]	train's l2: 7.44382	train's my_metric_3: 0.0748118	valid's l2: 8.86207	valid's my_metric_3: 0.0777492
[70]	train's l2: 7.34729	train's my_metric_3: 0.0742978	valid's l2: 8.77232	valid's my_metric_3: 0.0773475
[71]	train's l2: 7.27313	train's my_metric_3: 0.0738563	valid's l2: 8.71407	valid's my_metric_3: 0.0770116
[72]	train's l2: 7.18458	train's my_metr

[160]	train's l2: 3.98618	train's my_metric_3: 0.0531603	valid's l2: 5.90996	valid's my_metric_3: 0.0622404
[161]	train's l2: 3.96656	train's my_metric_3: 0.053053	valid's l2: 5.8978	valid's my_metric_3: 0.062176
[162]	train's l2: 3.95278	train's my_metric_3: 0.0529457	valid's l2: 5.88642	valid's my_metric_3: 0.0620874
[163]	train's l2: 3.93626	train's my_metric_3: 0.0528343	valid's l2: 5.87369	valid's my_metric_3: 0.0619929
[164]	train's l2: 3.91995	train's my_metric_3: 0.0527072	valid's l2: 5.86246	valid's my_metric_3: 0.0619188
[165]	train's l2: 3.90403	train's my_metric_3: 0.0525994	valid's l2: 5.85465	valid's my_metric_3: 0.0618979
[166]	train's l2: 3.89081	train's my_metric_3: 0.0524988	valid's l2: 5.8427	valid's my_metric_3: 0.0618208
[167]	train's l2: 3.87846	train's my_metric_3: 0.0523974	valid's l2: 5.83211	valid's my_metric_3: 0.0617357
[168]	train's l2: 3.86433	train's my_metric_3: 0.0522841	valid's l2: 5.81981	valid's my_metric_3: 0.0616622
[169]	train's l2: 3.84568	train'

[36]	train's l2: 13.533	train's my_metric_3: 0.103652	valid's l2: 16.6503	valid's my_metric_3: 0.113148
[37]	train's l2: 13.1359	train's my_metric_3: 0.102109	valid's l2: 16.2807	valid's my_metric_3: 0.111814
[38]	train's l2: 12.7692	train's my_metric_3: 0.100664	valid's l2: 15.9096	valid's my_metric_3: 0.110466
[39]	train's l2: 12.4196	train's my_metric_3: 0.0991949	valid's l2: 15.5647	valid's my_metric_3: 0.109129
[40]	train's l2: 12.0874	train's my_metric_3: 0.0977908	valid's l2: 15.2179	valid's my_metric_3: 0.107769
[41]	train's l2: 11.7814	train's my_metric_3: 0.0964518	valid's l2: 14.9548	valid's my_metric_3: 0.106618
[42]	train's l2: 11.4963	train's my_metric_3: 0.0951914	valid's l2: 14.663	valid's my_metric_3: 0.105456
[43]	train's l2: 11.2116	train's my_metric_3: 0.0939464	valid's l2: 14.3855	valid's my_metric_3: 0.104279
[44]	train's l2: 10.9558	train's my_metric_3: 0.0927894	valid's l2: 14.1379	valid's my_metric_3: 0.103201
[45]	train's l2: 10.7202	train's my_metric_3: 0.091

[131]	train's l2: 4.53849	train's my_metric_3: 0.0570395	valid's l2: 8.06607	valid's my_metric_3: 0.0725092
[132]	train's l2: 4.51841	train's my_metric_3: 0.0568873	valid's l2: 8.04798	valid's my_metric_3: 0.0723924
[133]	train's l2: 4.48858	train's my_metric_3: 0.056693	valid's l2: 8.00702	valid's my_metric_3: 0.0721627
[134]	train's l2: 4.46755	train's my_metric_3: 0.0565525	valid's l2: 7.98168	valid's my_metric_3: 0.0720414
[135]	train's l2: 4.44425	train's my_metric_3: 0.0563934	valid's l2: 7.95996	valid's my_metric_3: 0.0719346
[136]	train's l2: 4.42667	train's my_metric_3: 0.0562639	valid's l2: 7.94701	valid's my_metric_3: 0.0718403
[137]	train's l2: 4.4075	train's my_metric_3: 0.0561278	valid's l2: 7.92533	valid's my_metric_3: 0.0717185
[138]	train's l2: 4.38304	train's my_metric_3: 0.0559654	valid's l2: 7.90316	valid's my_metric_3: 0.0715881
[139]	train's l2: 4.36171	train's my_metric_3: 0.0558265	valid's l2: 7.86972	valid's my_metric_3: 0.071414
[140]	train's l2: 4.33417	train

In [38]:
train_features = X_train_3.columns
model_3.feature_importances_
pd.DataFrame({'features':train_features, 'imp':model_3.feature_importances_}).sort_values('imp',ascending=False)

Unnamed: 0,features,imp
6,时间戳,837
8,day,750
21,温度*压强,650
20,风速*温度,620
3,温度,540
24,辐照度_差1,532
0,辐照度,480
5,湿度,460
13,湿度-mean_everyday,426
16,温度-mean_everyday,408


In [39]:
from sklearn.metrics import mean_absolute_error
def my_metric_4(y_true,y_pred): 
    y_true_4 = y_true[y_true>=50*0.03]
    y_pred_4 = y_pred[y_true>=50*0.03]
    score = mean_absolute_error(y_true_4,y_pred_4)/50.0
    return 'my_metric_4',score,False  # False表示score不是越大越好(is_bigger_better)

In [40]:
X_train_4 = data_4[:len(train_4)].drop(['时间','实发辐照度','实际功率','id'],axis=1)

y_train_4 = data_4[:len(train_4)]['实际功率']

X_test_4 = data_4[len(train_4):].drop(['时间','实发辐照度','实际功率','id'],axis=1)

prediction_4 = test_4[['id']]

X_train_4_loc = X_train_4.values
X_test_4_loc = X_test_4.values
y_train_4_loc = y_train_4.values

model_4 = lgb.LGBMRegressor('gbdt',num_leaves=51,max_depth=-1,learning_rate=0.05,n_estimators=200,max_bin=255,subsample_for_bin=200000,
                         objective='regression',min_split_gain=0,min_child_weight=0.01,min_child_samples=20,subsample=1,subsample_freq=1,
                         colsample_bytree=1, reg_alpha=0, reg_lambda=0, random_state=2018,n_jobs=-1)
# 五折交叉训练，构造五个模型
skf=list(StratifiedKFold(y_train_4_loc, n_folds=10, shuffle=True, random_state=2018))
baseloss = []
loss = 0
for i, (train_index, test_index) in enumerate(skf):
    print("Fold", i)
    lgb_model = model_4.fit(X_train_4_loc[train_index], y_train_4_loc[train_index],
                          eval_names =['train','valid'],
                          eval_set=[(X_train_4_loc[train_index], y_train_4_loc[train_index]), 
                                    (X_train_4_loc[test_index], y_train_4_loc[test_index])],
                          eval_metric=my_metric_4,
                          early_stopping_rounds=100)
    baseloss.append(lgb_model.best_score_['valid']['my_metric_4'])
    loss += lgb_model.best_score_['valid']['my_metric_4']
    test_pred= lgb_model.predict(X_test_4_loc, num_iteration=lgb_model.best_iteration_)
#     print('test mean:', test_pred.mean())
    prediction_4['predict_%s' % str(i)] = test_pred
print('mae:', baseloss, loss/10)

p = prediction_4.drop(['id'],axis=1)
prediction_4['prediction']=p.mean(axis=1)



Fold 0
[1]	train's l2: 206.217	train's my_metric_4: 0.298328	valid's l2: 206.906	valid's my_metric_4: 0.295207
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 188.746	train's my_metric_4: 0.285662	valid's l2: 189.573	valid's my_metric_4: 0.282825
[3]	train's l2: 172.951	train's my_metric_4: 0.273792	valid's l2: 173.965	valid's my_metric_4: 0.271334
[4]	train's l2: 158.641	train's my_metric_4: 0.262557	valid's l2: 159.847	valid's my_metric_4: 0.260442
[5]	train's l2: 145.672	train's my_metric_4: 0.252023	valid's l2: 147.139	valid's my_metric_4: 0.250364
[6]	train's l2: 133.905	train's my_metric_4: 0.242122	valid's l2: 135.635	valid's my_metric_4: 0.240945
[7]	train's l2: 123.266	train's my_metric_4: 0.232799	valid's l2: 125.145	valid's my_metric_4: 0.231907
[8]	train's l2: 113.671	train's my_metric_4: 0.224155	valid's l2: 115.734	valid's my_metric_4: 0.223583
[9]	train's l2: 104.936	train's my_metric_4: 0.216066	valid's l2: 107.159	valid's my_metric_4: 0.2

[84]	train's l2: 12.7902	train's my_metric_4: 0.0810976	valid's l2: 17.0376	valid's my_metric_4: 0.0909688
[85]	train's l2: 12.6999	train's my_metric_4: 0.0807607	valid's l2: 16.958	valid's my_metric_4: 0.0906967
[86]	train's l2: 12.6095	train's my_metric_4: 0.080401	valid's l2: 16.8786	valid's my_metric_4: 0.0903675
[87]	train's l2: 12.5351	train's my_metric_4: 0.0801729	valid's l2: 16.8056	valid's my_metric_4: 0.0901662
[88]	train's l2: 12.459	train's my_metric_4: 0.0798763	valid's l2: 16.7164	valid's my_metric_4: 0.0898748
[89]	train's l2: 12.3639	train's my_metric_4: 0.0795333	valid's l2: 16.6356	valid's my_metric_4: 0.0896368
[90]	train's l2: 12.286	train's my_metric_4: 0.0792468	valid's l2: 16.5804	valid's my_metric_4: 0.089447
[91]	train's l2: 12.2215	train's my_metric_4: 0.0790302	valid's l2: 16.5246	valid's my_metric_4: 0.0892808
[92]	train's l2: 12.1421	train's my_metric_4: 0.0787169	valid's l2: 16.4366	valid's my_metric_4: 0.0889718
[93]	train's l2: 12.0698	train's my_metric

[171]	train's l2: 8.99593	train's my_metric_4: 0.06657	valid's l2: 13.9007	valid's my_metric_4: 0.0803268
[172]	train's l2: 8.97452	train's my_metric_4: 0.0665001	valid's l2: 13.8862	valid's my_metric_4: 0.080291
[173]	train's l2: 8.93691	train's my_metric_4: 0.066341	valid's l2: 13.8537	valid's my_metric_4: 0.0801987
[174]	train's l2: 8.9173	train's my_metric_4: 0.0662522	valid's l2: 13.8364	valid's my_metric_4: 0.0801338
[175]	train's l2: 8.88663	train's my_metric_4: 0.0661063	valid's l2: 13.8053	valid's my_metric_4: 0.0800199
[176]	train's l2: 8.85531	train's my_metric_4: 0.065978	valid's l2: 13.7739	valid's my_metric_4: 0.0799179
[177]	train's l2: 8.83453	train's my_metric_4: 0.0658924	valid's l2: 13.7594	valid's my_metric_4: 0.0798695
[178]	train's l2: 8.81487	train's my_metric_4: 0.0658056	valid's l2: 13.7432	valid's my_metric_4: 0.0798243
[179]	train's l2: 8.7954	train's my_metric_4: 0.0657171	valid's l2: 13.7206	valid's my_metric_4: 0.0797494
[180]	train's l2: 8.7795	train's my

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 206.308	train's my_metric_4: 0.297554	valid's l2: 206.998	valid's my_metric_4: 0.301358
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 188.857	train's my_metric_4: 0.284916	valid's l2: 189.488	valid's my_metric_4: 0.288666
[3]	train's l2: 173.016	train's my_metric_4: 0.272976	valid's l2: 173.579	valid's my_metric_4: 0.276598
[4]	train's l2: 158.743	train's my_metric_4: 0.261854	valid's l2: 159.24	valid's my_metric_4: 0.265335
[5]	train's l2: 145.817	train's my_metric_4: 0.251485	valid's l2: 146.408	valid's my_metric_4: 0.254937
[6]	train's l2: 134.081	train's my_metric_4: 0.241648	valid's l2: 134.685	valid's my_metric_4: 0.245006
[7]	train's l2: 123.458	train's my_metric_4: 0.232443	valid's l2: 124.212	valid's my_metric_4: 0.235948
[8]	train's l2: 113.824	train's my_metric_4: 0.223768	valid's l2: 114.715	valid's my_metric_4: 0.227355
[9]	train's l2: 105.084	train's my_metric_4: 0.215679	valid's l2: 105.978	valid's my_metric_4: 0.219141
[1

[96]	train's l2: 11.9245	train's my_metric_4: 0.0778066	valid's l2: 14.7962	valid's my_metric_4: 0.0870392
[97]	train's l2: 11.8556	train's my_metric_4: 0.0775477	valid's l2: 14.7454	valid's my_metric_4: 0.0868399
[98]	train's l2: 11.7844	train's my_metric_4: 0.0772845	valid's l2: 14.6723	valid's my_metric_4: 0.0866153
[99]	train's l2: 11.7204	train's my_metric_4: 0.0770503	valid's l2: 14.6198	valid's my_metric_4: 0.0864146
[100]	train's l2: 11.6421	train's my_metric_4: 0.0767859	valid's l2: 14.5586	valid's my_metric_4: 0.0862049
[101]	train's l2: 11.5833	train's my_metric_4: 0.0765761	valid's l2: 14.5144	valid's my_metric_4: 0.0860518
[102]	train's l2: 11.4974	train's my_metric_4: 0.0762785	valid's l2: 14.4577	valid's my_metric_4: 0.0858854
[103]	train's l2: 11.4384	train's my_metric_4: 0.0760713	valid's l2: 14.4011	valid's my_metric_4: 0.0857092
[104]	train's l2: 11.3838	train's my_metric_4: 0.0758806	valid's l2: 14.3686	valid's my_metric_4: 0.0855793
[105]	train's l2: 11.3306	train'

[181]	train's l2: 8.78823	train's my_metric_4: 0.0657845	valid's l2: 12.5419	valid's my_metric_4: 0.0793634
[182]	train's l2: 8.74958	train's my_metric_4: 0.0656336	valid's l2: 12.5133	valid's my_metric_4: 0.0792535
[183]	train's l2: 8.73079	train's my_metric_4: 0.0655552	valid's l2: 12.5022	valid's my_metric_4: 0.0792097
[184]	train's l2: 8.69786	train's my_metric_4: 0.0653935	valid's l2: 12.4773	valid's my_metric_4: 0.079082
[185]	train's l2: 8.66363	train's my_metric_4: 0.0652616	valid's l2: 12.4582	valid's my_metric_4: 0.0790159
[186]	train's l2: 8.63885	train's my_metric_4: 0.0651714	valid's l2: 12.4485	valid's my_metric_4: 0.078977
[187]	train's l2: 8.62339	train's my_metric_4: 0.0651115	valid's l2: 12.4469	valid's my_metric_4: 0.0789691
[188]	train's l2: 8.6036	train's my_metric_4: 0.0650339	valid's l2: 12.4367	valid's my_metric_4: 0.0789207
[189]	train's l2: 8.57298	train's my_metric_4: 0.0649184	valid's l2: 12.4139	valid's my_metric_4: 0.0788502
[190]	train's l2: 8.55946	train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 2
[1]	train's l2: 206.654	train's my_metric_4: 0.298253	valid's l2: 203.142	valid's my_metric_4: 0.292344
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 189.184	train's my_metric_4: 0.285626	valid's l2: 186.161	valid's my_metric_4: 0.280218
[3]	train's l2: 173.354	train's my_metric_4: 0.273791	valid's l2: 170.626	valid's my_metric_4: 0.268787
[4]	train's l2: 159.02	train's my_metric_4: 0.262623	valid's l2: 156.669	valid's my_metric_4: 0.258065
[5]	train's l2: 146.035	train's my_metric_4: 0.252076	valid's l2: 144.059	valid's my_metric_4: 0.247919
[6]	train's l2: 134.285	train's my_metric_4: 0.242219	valid's l2: 132.652	valid's my_metric_4: 0.238413
[7]	train's l2: 123.669	train's my_metric_4: 0.233003	valid's l2: 122.207	valid's my_metric_4: 0.229323
[8]	train's l2: 114.028	train's my_metric_4: 0.224383	valid's l2: 112.809	valid's my_metric_4: 0.221038
[9]	train's l2: 105.277	train's my_metric_4: 0.216284	valid's l2: 104.227	valid's my_metric_4: 0.21

[92]	train's l2: 12.2593	train's my_metric_4: 0.0792221	valid's l2: 15.3882	valid's my_metric_4: 0.0856809
[93]	train's l2: 12.1819	train's my_metric_4: 0.0789244	valid's l2: 15.3401	valid's my_metric_4: 0.0854654
[94]	train's l2: 12.0997	train's my_metric_4: 0.0786196	valid's l2: 15.2956	valid's my_metric_4: 0.0852711
[95]	train's l2: 12.0424	train's my_metric_4: 0.0784008	valid's l2: 15.2606	valid's my_metric_4: 0.0851149
[96]	train's l2: 11.9658	train's my_metric_4: 0.0781118	valid's l2: 15.2113	valid's my_metric_4: 0.0848899
[97]	train's l2: 11.8815	train's my_metric_4: 0.0777632	valid's l2: 15.1484	valid's my_metric_4: 0.0846714
[98]	train's l2: 11.8206	train's my_metric_4: 0.0775379	valid's l2: 15.0956	valid's my_metric_4: 0.0844758
[99]	train's l2: 11.7555	train's my_metric_4: 0.0773183	valid's l2: 15.0697	valid's my_metric_4: 0.0843614
[100]	train's l2: 11.6759	train's my_metric_4: 0.0770684	valid's l2: 14.9969	valid's my_metric_4: 0.0841419
[101]	train's l2: 11.6167	train's my

[180]	train's l2: 8.90282	train's my_metric_4: 0.0662104	valid's l2: 13.1755	valid's my_metric_4: 0.0772089
[181]	train's l2: 8.87954	train's my_metric_4: 0.0661116	valid's l2: 13.1568	valid's my_metric_4: 0.0771191
[182]	train's l2: 8.84201	train's my_metric_4: 0.065971	valid's l2: 13.1225	valid's my_metric_4: 0.0769844
[183]	train's l2: 8.82195	train's my_metric_4: 0.0658924	valid's l2: 13.1208	valid's my_metric_4: 0.0769733
[184]	train's l2: 8.79988	train's my_metric_4: 0.0658107	valid's l2: 13.1061	valid's my_metric_4: 0.0769302
[185]	train's l2: 8.77266	train's my_metric_4: 0.0656928	valid's l2: 13.0932	valid's my_metric_4: 0.0768762
[186]	train's l2: 8.75102	train's my_metric_4: 0.0655967	valid's l2: 13.0805	valid's my_metric_4: 0.0768335
[187]	train's l2: 8.7181	train's my_metric_4: 0.0654568	valid's l2: 13.0522	valid's my_metric_4: 0.0767247
[188]	train's l2: 8.693	train's my_metric_4: 0.0653426	valid's l2: 13.0294	valid's my_metric_4: 0.0766395
[189]	train's l2: 8.67341	train'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3
[1]	train's l2: 205.787	train's my_metric_4: 0.296916	valid's l2: 211.106	valid's my_metric_4: 0.307183
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 188.363	train's my_metric_4: 0.284262	valid's l2: 193.437	valid's my_metric_4: 0.294299
[3]	train's l2: 172.585	train's my_metric_4: 0.272438	valid's l2: 177.508	valid's my_metric_4: 0.282233
[4]	train's l2: 158.337	train's my_metric_4: 0.261348	valid's l2: 163.04	valid's my_metric_4: 0.270907
[5]	train's l2: 145.466	train's my_metric_4: 0.250923	valid's l2: 149.904	valid's my_metric_4: 0.260213
[6]	train's l2: 133.778	train's my_metric_4: 0.241108	valid's l2: 137.95	valid's my_metric_4: 0.250086
[7]	train's l2: 123.201	train's my_metric_4: 0.231916	valid's l2: 127.247	valid's my_metric_4: 0.240771
[8]	train's l2: 113.585	train's my_metric_4: 0.223267	valid's l2: 117.387	valid's my_metric_4: 0.231817
[9]	train's l2: 104.849	train's my_metric_4: 0.215177	valid's l2: 108.516	valid's my_metric_4: 0.223

[91]	train's l2: 12.2959	train's my_metric_4: 0.0789646	valid's l2: 15.6997	valid's my_metric_4: 0.0896734
[92]	train's l2: 12.2144	train's my_metric_4: 0.0786657	valid's l2: 15.6323	valid's my_metric_4: 0.0894228
[93]	train's l2: 12.1426	train's my_metric_4: 0.0784164	valid's l2: 15.589	valid's my_metric_4: 0.0892406
[94]	train's l2: 12.0584	train's my_metric_4: 0.0781185	valid's l2: 15.5024	valid's my_metric_4: 0.0889602
[95]	train's l2: 11.9932	train's my_metric_4: 0.0778729	valid's l2: 15.456	valid's my_metric_4: 0.0888048
[96]	train's l2: 11.9197	train's my_metric_4: 0.0776216	valid's l2: 15.4005	valid's my_metric_4: 0.0885918
[97]	train's l2: 11.8352	train's my_metric_4: 0.0772951	valid's l2: 15.3246	valid's my_metric_4: 0.0883415
[98]	train's l2: 11.7637	train's my_metric_4: 0.0770181	valid's l2: 15.2508	valid's my_metric_4: 0.0880889
[99]	train's l2: 11.7103	train's my_metric_4: 0.0768209	valid's l2: 15.2138	valid's my_metric_4: 0.0879654
[100]	train's l2: 11.6572	train's my_me

[177]	train's l2: 8.95068	train's my_metric_4: 0.0661243	valid's l2: 13.1743	valid's my_metric_4: 0.0807308
[178]	train's l2: 8.92134	train's my_metric_4: 0.0659989	valid's l2: 13.1487	valid's my_metric_4: 0.0806335
[179]	train's l2: 8.89875	train's my_metric_4: 0.0658957	valid's l2: 13.1394	valid's my_metric_4: 0.0806006
[180]	train's l2: 8.88315	train's my_metric_4: 0.0658231	valid's l2: 13.1313	valid's my_metric_4: 0.0805677
[181]	train's l2: 8.85512	train's my_metric_4: 0.065698	valid's l2: 13.1035	valid's my_metric_4: 0.0804774
[182]	train's l2: 8.83475	train's my_metric_4: 0.0656193	valid's l2: 13.0848	valid's my_metric_4: 0.0804243
[183]	train's l2: 8.80221	train's my_metric_4: 0.0654786	valid's l2: 13.0544	valid's my_metric_4: 0.0803114
[184]	train's l2: 8.78897	train's my_metric_4: 0.06542	valid's l2: 13.0495	valid's my_metric_4: 0.0802926
[185]	train's l2: 8.76119	train's my_metric_4: 0.0653134	valid's l2: 13.0298	valid's my_metric_4: 0.0802274
[186]	train's l2: 8.73463	train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 4
[1]	train's l2: 206.079	train's my_metric_4: 0.297331	valid's l2: 208.877	valid's my_metric_4: 0.303157
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 188.698	train's my_metric_4: 0.284747	valid's l2: 191.204	valid's my_metric_4: 0.290129
[3]	train's l2: 172.963	train's my_metric_4: 0.272937	valid's l2: 175.205	valid's my_metric_4: 0.277947
[4]	train's l2: 158.643	train's my_metric_4: 0.261798	valid's l2: 160.793	valid's my_metric_4: 0.266523
[5]	train's l2: 145.715	train's my_metric_4: 0.251362	valid's l2: 147.714	valid's my_metric_4: 0.255721
[6]	train's l2: 133.999	train's my_metric_4: 0.241524	valid's l2: 135.886	valid's my_metric_4: 0.245628
[7]	train's l2: 123.395	train's my_metric_4: 0.232336	valid's l2: 125.063	valid's my_metric_4: 0.236061
[8]	train's l2: 113.788	train's my_metric_4: 0.223721	valid's l2: 115.29	valid's my_metric_4: 0.227132
[9]	train's l2: 105.135	train's my_metric_4: 0.215717	valid's l2: 106.486	valid's my_metric_4: 0.21

[87]	train's l2: 12.8061	train's my_metric_4: 0.0807835	valid's l2: 14.8086	valid's my_metric_4: 0.0870481
[88]	train's l2: 12.7067	train's my_metric_4: 0.0804241	valid's l2: 14.7026	valid's my_metric_4: 0.0866942
[89]	train's l2: 12.6108	train's my_metric_4: 0.0800855	valid's l2: 14.6289	valid's my_metric_4: 0.0864557
[90]	train's l2: 12.5356	train's my_metric_4: 0.0798289	valid's l2: 14.582	valid's my_metric_4: 0.0862678
[91]	train's l2: 12.466	train's my_metric_4: 0.0795699	valid's l2: 14.5421	valid's my_metric_4: 0.0861155
[92]	train's l2: 12.3775	train's my_metric_4: 0.0792438	valid's l2: 14.4433	valid's my_metric_4: 0.0857647
[93]	train's l2: 12.2959	train's my_metric_4: 0.0789497	valid's l2: 14.3727	valid's my_metric_4: 0.0854711
[94]	train's l2: 12.2042	train's my_metric_4: 0.0786202	valid's l2: 14.2944	valid's my_metric_4: 0.0851947
[95]	train's l2: 12.1442	train's my_metric_4: 0.0784069	valid's l2: 14.2712	valid's my_metric_4: 0.0851006
[96]	train's l2: 12.0724	train's my_met

[167]	train's l2: 9.29432	train's my_metric_4: 0.0675928	valid's l2: 12.4766	valid's my_metric_4: 0.078298
[168]	train's l2: 9.266	train's my_metric_4: 0.0674684	valid's l2: 12.448	valid's my_metric_4: 0.0782027
[169]	train's l2: 9.24163	train's my_metric_4: 0.0673841	valid's l2: 12.4414	valid's my_metric_4: 0.0781896
[170]	train's l2: 9.20505	train's my_metric_4: 0.0672296	valid's l2: 12.414	valid's my_metric_4: 0.0780776
[171]	train's l2: 9.18293	train's my_metric_4: 0.0671318	valid's l2: 12.3961	valid's my_metric_4: 0.0779933
[172]	train's l2: 9.16417	train's my_metric_4: 0.0670547	valid's l2: 12.3914	valid's my_metric_4: 0.0779802
[173]	train's l2: 9.14042	train's my_metric_4: 0.0669548	valid's l2: 12.3718	valid's my_metric_4: 0.077934
[174]	train's l2: 9.11842	train's my_metric_4: 0.0668603	valid's l2: 12.3584	valid's my_metric_4: 0.0778821
[175]	train's l2: 9.09415	train's my_metric_4: 0.0667648	valid's l2: 12.3477	valid's my_metric_4: 0.077836
[176]	train's l2: 9.06052	train's m

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 207.144	train's my_metric_4: 0.298101	valid's l2: 198.242	valid's my_metric_4: 0.288396
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 189.527	train's my_metric_4: 0.285422	valid's l2: 181.447	valid's my_metric_4: 0.276159
[3]	train's l2: 173.663	train's my_metric_4: 0.273529	valid's l2: 166.246	valid's my_metric_4: 0.264564
[4]	train's l2: 159.259	train's my_metric_4: 0.262323	valid's l2: 152.5	valid's my_metric_4: 0.253764
[5]	train's l2: 146.287	train's my_metric_4: 0.251824	valid's l2: 140.137	valid's my_metric_4: 0.243713
[6]	train's l2: 134.522	train's my_metric_4: 0.241929	valid's l2: 128.816	valid's my_metric_4: 0.234189
[7]	train's l2: 123.82	train's my_metric_4: 0.232621	valid's l2: 118.678	valid's my_metric_4: 0.225488
[8]	train's l2: 114.154	train's my_metric_4: 0.22394	valid's l2: 109.464	valid's my_metric_4: 0.217268
[9]	train's l2: 105.343	train's my_metric_4: 0.21576	valid's l2: 101.065	valid's my_metric_4: 0.209484
[10]	t

[102]	train's l2: 11.5722	train's my_metric_4: 0.0764737	valid's l2: 14.1519	valid's my_metric_4: 0.0826633
[103]	train's l2: 11.5151	train's my_metric_4: 0.0762779	valid's l2: 14.0958	valid's my_metric_4: 0.0824892
[104]	train's l2: 11.4544	train's my_metric_4: 0.0760584	valid's l2: 14.0389	valid's my_metric_4: 0.0823378
[105]	train's l2: 11.4049	train's my_metric_4: 0.0758701	valid's l2: 13.9945	valid's my_metric_4: 0.0821634
[106]	train's l2: 11.3517	train's my_metric_4: 0.0756851	valid's l2: 13.9642	valid's my_metric_4: 0.082072
[107]	train's l2: 11.2901	train's my_metric_4: 0.0754599	valid's l2: 13.9106	valid's my_metric_4: 0.0818903
[108]	train's l2: 11.2321	train's my_metric_4: 0.0752299	valid's l2: 13.8478	valid's my_metric_4: 0.0816811
[109]	train's l2: 11.1835	train's my_metric_4: 0.0750502	valid's l2: 13.8169	valid's my_metric_4: 0.0815463
[110]	train's l2: 11.1362	train's my_metric_4: 0.0748823	valid's l2: 13.79	valid's my_metric_4: 0.0814578
[111]	train's l2: 11.0831	train

[189]	train's l2: 8.63446	train's my_metric_4: 0.065072	valid's l2: 11.9595	valid's my_metric_4: 0.0749296
[190]	train's l2: 8.60044	train's my_metric_4: 0.0649296	valid's l2: 11.9377	valid's my_metric_4: 0.0748515
[191]	train's l2: 8.58193	train's my_metric_4: 0.0648341	valid's l2: 11.9235	valid's my_metric_4: 0.0748017
[192]	train's l2: 8.56608	train's my_metric_4: 0.0647629	valid's l2: 11.9199	valid's my_metric_4: 0.074786
[193]	train's l2: 8.53384	train's my_metric_4: 0.0646271	valid's l2: 11.8886	valid's my_metric_4: 0.0747058
[194]	train's l2: 8.50912	train's my_metric_4: 0.0645155	valid's l2: 11.8615	valid's my_metric_4: 0.0746061
[195]	train's l2: 8.48822	train's my_metric_4: 0.0644318	valid's l2: 11.8464	valid's my_metric_4: 0.0745612
[196]	train's l2: 8.4669	train's my_metric_4: 0.0643418	valid's l2: 11.8289	valid's my_metric_4: 0.0744919
[197]	train's l2: 8.43745	train's my_metric_4: 0.0642218	valid's l2: 11.8015	valid's my_metric_4: 0.0744242
[198]	train's l2: 8.40946	train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	train's l2: 205.889	train's my_metric_4: 0.297249	valid's l2: 210.577	valid's my_metric_4: 0.304816
Training until validation scores don't improve for 100 rounds.
[2]	train's l2: 188.487	train's my_metric_4: 0.284666	valid's l2: 192.943	valid's my_metric_4: 0.292029
[3]	train's l2: 172.719	train's my_metric_4: 0.272831	valid's l2: 177.037	valid's my_metric_4: 0.280049
[4]	train's l2: 158.455	train's my_metric_4: 0.261705	valid's l2: 162.618	valid's my_metric_4: 0.268883
[5]	train's l2: 145.544	train's my_metric_4: 0.251231	valid's l2: 149.637	valid's my_metric_4: 0.258454
[6]	train's l2: 133.856	train's my_metric_4: 0.24143	valid's l2: 137.815	valid's my_metric_4: 0.248693
[7]	train's l2: 123.281	train's my_metric_4: 0.23219	valid's l2: 127.148	valid's my_metric_4: 0.239456
[8]	train's l2: 113.68	train's my_metric_4: 0.223494	valid's l2: 117.416	valid's my_metric_4: 0.230767
[9]	train's l2: 104.994	train's my_metric_4: 0.215409	valid's l2: 108.646	valid's my_metric_4: 0.222739
[10]

[101]	train's l2: 11.4665	train's my_metric_4: 0.0759942	valid's l2: 16.0369	valid's my_metric_4: 0.0886242
[102]	train's l2: 11.395	train's my_metric_4: 0.0757237	valid's l2: 15.9698	valid's my_metric_4: 0.0883736
[103]	train's l2: 11.3445	train's my_metric_4: 0.0755431	valid's l2: 15.9377	valid's my_metric_4: 0.0882776
[104]	train's l2: 11.2865	train's my_metric_4: 0.0753122	valid's l2: 15.884	valid's my_metric_4: 0.088123
[105]	train's l2: 11.2366	train's my_metric_4: 0.075109	valid's l2: 15.8438	valid's my_metric_4: 0.0879497
[106]	train's l2: 11.1844	train's my_metric_4: 0.0749156	valid's l2: 15.8116	valid's my_metric_4: 0.0878475
[107]	train's l2: 11.1292	train's my_metric_4: 0.0746954	valid's l2: 15.7575	valid's my_metric_4: 0.0876524
[108]	train's l2: 11.0734	train's my_metric_4: 0.0744666	valid's l2: 15.7083	valid's my_metric_4: 0.0874876
[109]	train's l2: 11.0254	train's my_metric_4: 0.0742988	valid's l2: 15.6742	valid's my_metric_4: 0.087378
[110]	train's l2: 10.9675	train's

[192]	train's l2: 8.42491	train's my_metric_4: 0.0641077	valid's l2: 13.6272	valid's my_metric_4: 0.0800705
[193]	train's l2: 8.3977	train's my_metric_4: 0.063983	valid's l2: 13.6046	valid's my_metric_4: 0.0799912
[194]	train's l2: 8.36572	train's my_metric_4: 0.0638565	valid's l2: 13.5833	valid's my_metric_4: 0.0799031
[195]	train's l2: 8.34734	train's my_metric_4: 0.0637771	valid's l2: 13.5753	valid's my_metric_4: 0.0798713
[196]	train's l2: 8.32491	train's my_metric_4: 0.063697	valid's l2: 13.5497	valid's my_metric_4: 0.0798022
[197]	train's l2: 8.30415	train's my_metric_4: 0.0636061	valid's l2: 13.5336	valid's my_metric_4: 0.0797345
[198]	train's l2: 8.26735	train's my_metric_4: 0.0634473	valid's l2: 13.509	valid's my_metric_4: 0.0796073
[199]	train's l2: 8.24257	train's my_metric_4: 0.0633512	valid's l2: 13.4927	valid's my_metric_4: 0.0795553
[200]	train's l2: 8.21438	train's my_metric_4: 0.0632361	valid's l2: 13.4663	valid's my_metric_4: 0.079457
Did not meet early stopping. Best

[77]	train's l2: 13.5197	train's my_metric_4: 0.0835379	valid's l2: 16.9014	valid's my_metric_4: 0.09046
[78]	train's l2: 13.3943	train's my_metric_4: 0.0831251	valid's l2: 16.8006	valid's my_metric_4: 0.0901288
[79]	train's l2: 13.3004	train's my_metric_4: 0.0828208	valid's l2: 16.7284	valid's my_metric_4: 0.0898978
[80]	train's l2: 13.1933	train's my_metric_4: 0.0824307	valid's l2: 16.6275	valid's my_metric_4: 0.0895212
[81]	train's l2: 13.106	train's my_metric_4: 0.0821172	valid's l2: 16.5843	valid's my_metric_4: 0.0893158
[82]	train's l2: 13.0172	train's my_metric_4: 0.0818194	valid's l2: 16.5036	valid's my_metric_4: 0.0890787
[83]	train's l2: 12.92	train's my_metric_4: 0.0814744	valid's l2: 16.4157	valid's my_metric_4: 0.0887531
[84]	train's l2: 12.8257	train's my_metric_4: 0.08116	valid's l2: 16.3494	valid's my_metric_4: 0.0885402
[85]	train's l2: 12.7268	train's my_metric_4: 0.0807975	valid's l2: 16.2797	valid's my_metric_4: 0.0882936
[86]	train's l2: 12.6391	train's my_metric_4

[160]	train's l2: 9.29733	train's my_metric_4: 0.0678502	valid's l2: 14	valid's my_metric_4: 0.0797746
[161]	train's l2: 9.27327	train's my_metric_4: 0.0677562	valid's l2: 13.9785	valid's my_metric_4: 0.0797072
[162]	train's l2: 9.24324	train's my_metric_4: 0.0676335	valid's l2: 13.9678	valid's my_metric_4: 0.0796422
[163]	train's l2: 9.20449	train's my_metric_4: 0.0674764	valid's l2: 13.9461	valid's my_metric_4: 0.0795765
[164]	train's l2: 9.17058	train's my_metric_4: 0.067342	valid's l2: 13.9118	valid's my_metric_4: 0.07949
[165]	train's l2: 9.14043	train's my_metric_4: 0.0672285	valid's l2: 13.8997	valid's my_metric_4: 0.0794404
[166]	train's l2: 9.1232	train's my_metric_4: 0.0671581	valid's l2: 13.895	valid's my_metric_4: 0.0794207
[167]	train's l2: 9.10139	train's my_metric_4: 0.0670731	valid's l2: 13.8952	valid's my_metric_4: 0.0793936
[168]	train's l2: 9.08103	train's my_metric_4: 0.0669844	valid's l2: 13.8826	valid's my_metric_4: 0.0793364
[169]	train's l2: 9.0472	train's my_me

[51]	train's l2: 18.0349	train's my_metric_4: 0.0983018	valid's l2: 20.1406	valid's my_metric_4: 0.105152
[52]	train's l2: 17.7558	train's my_metric_4: 0.0975027	valid's l2: 19.9017	valid's my_metric_4: 0.104474
[53]	train's l2: 17.4889	train's my_metric_4: 0.0967394	valid's l2: 19.6765	valid's my_metric_4: 0.103838
[54]	train's l2: 17.2464	train's my_metric_4: 0.0959742	valid's l2: 19.4805	valid's my_metric_4: 0.103221
[55]	train's l2: 17.0349	train's my_metric_4: 0.0953192	valid's l2: 19.2912	valid's my_metric_4: 0.102669
[56]	train's l2: 16.8111	train's my_metric_4: 0.0946284	valid's l2: 19.1278	valid's my_metric_4: 0.102152
[57]	train's l2: 16.5816	train's my_metric_4: 0.0939144	valid's l2: 18.9261	valid's my_metric_4: 0.101529
[58]	train's l2: 16.3817	train's my_metric_4: 0.0932753	valid's l2: 18.7634	valid's my_metric_4: 0.100995
[59]	train's l2: 16.1994	train's my_metric_4: 0.0926404	valid's l2: 18.5972	valid's my_metric_4: 0.10044
[60]	train's l2: 16.02	train's my_metric_4: 0.0

[135]	train's l2: 10.1914	train's my_metric_4: 0.0712434	valid's l2: 13.72	valid's my_metric_4: 0.0838893
[136]	train's l2: 10.1674	train's my_metric_4: 0.0711394	valid's l2: 13.7104	valid's my_metric_4: 0.0838503
[137]	train's l2: 10.1348	train's my_metric_4: 0.0710084	valid's l2: 13.6745	valid's my_metric_4: 0.083747
[138]	train's l2: 10.1083	train's my_metric_4: 0.0709006	valid's l2: 13.66	valid's my_metric_4: 0.0836771
[139]	train's l2: 10.0782	train's my_metric_4: 0.0707806	valid's l2: 13.6404	valid's my_metric_4: 0.0836037
[140]	train's l2: 10.0242	train's my_metric_4: 0.0705658	valid's l2: 13.6135	valid's my_metric_4: 0.0834916
[141]	train's l2: 9.99529	train's my_metric_4: 0.0704658	valid's l2: 13.5972	valid's my_metric_4: 0.0834108
[142]	train's l2: 9.95525	train's my_metric_4: 0.070303	valid's l2: 13.5604	valid's my_metric_4: 0.0832656
[143]	train's l2: 9.91992	train's my_metric_4: 0.0701841	valid's l2: 13.5422	valid's my_metric_4: 0.0832098
[144]	train's l2: 9.8832	train's m

[25]	train's l2: 37.7046	train's my_metric_4: 0.138284	valid's l2: 39.6201	valid's my_metric_4: 0.140152
[26]	train's l2: 35.9601	train's my_metric_4: 0.135545	valid's l2: 37.8376	valid's my_metric_4: 0.137547
[27]	train's l2: 34.3685	train's my_metric_4: 0.132981	valid's l2: 36.2188	valid's my_metric_4: 0.135139
[28]	train's l2: 32.9083	train's my_metric_4: 0.130564	valid's l2: 34.7218	valid's my_metric_4: 0.132927
[29]	train's l2: 31.5773	train's my_metric_4: 0.128241	valid's l2: 33.3816	valid's my_metric_4: 0.130773
[30]	train's l2: 30.35	train's my_metric_4: 0.126117	valid's l2: 32.1405	valid's my_metric_4: 0.128831
[31]	train's l2: 29.2075	train's my_metric_4: 0.124029	valid's l2: 30.9746	valid's my_metric_4: 0.12689
[32]	train's l2: 28.1611	train's my_metric_4: 0.122027	valid's l2: 29.9082	valid's my_metric_4: 0.125049
[33]	train's l2: 27.1911	train's my_metric_4: 0.120184	valid's l2: 28.8998	valid's my_metric_4: 0.123345
[34]	train's l2: 26.3069	train's my_metric_4: 0.118409	val

[130]	train's l2: 10.3216	train's my_metric_4: 0.0718246	valid's l2: 13.0952	valid's my_metric_4: 0.0810937
[131]	train's l2: 10.2785	train's my_metric_4: 0.0716519	valid's l2: 13.0599	valid's my_metric_4: 0.080974
[132]	train's l2: 10.2511	train's my_metric_4: 0.0715351	valid's l2: 13.0405	valid's my_metric_4: 0.0808963
[133]	train's l2: 10.2179	train's my_metric_4: 0.071404	valid's l2: 13.0199	valid's my_metric_4: 0.0808112
[134]	train's l2: 10.192	train's my_metric_4: 0.0713062	valid's l2: 12.9981	valid's my_metric_4: 0.0807359
[135]	train's l2: 10.159	train's my_metric_4: 0.0711726	valid's l2: 12.9681	valid's my_metric_4: 0.0806241
[136]	train's l2: 10.1267	train's my_metric_4: 0.071036	valid's l2: 12.9449	valid's my_metric_4: 0.0805373
[137]	train's l2: 10.1018	train's my_metric_4: 0.0709397	valid's l2: 12.9215	valid's my_metric_4: 0.0804549
[138]	train's l2: 10.0735	train's my_metric_4: 0.0708233	valid's l2: 12.8895	valid's my_metric_4: 0.080359
[139]	train's l2: 10.0382	train's 

In [41]:
train_features = X_train_4.columns
model_4.feature_importances_
pd.DataFrame({'features':train_features, 'imp':model_4.feature_importances_}).sort_values('imp',ascending=False)

Unnamed: 0,features,imp
8,day,762
6,时间戳,616
20,风速*温度,592
21,温度*压强,582
5,湿度,467
3,温度,459
24,辐照度_差1,453
16,温度-mean_everyday,445
2,风向,435
12,辐照度-mean_everyday,429


In [42]:
prediction = pd.concat([prediction_1,prediction_2],axis=0,sort=False)
prediction = pd.concat([prediction,prediction_3],axis=0,sort=False)
prediction = pd.concat([prediction,prediction_4],axis=0,sort=False)
prediction

Unnamed: 0,id,predict_0,predict_1,predict_2,predict_3,predict_4,predict_5,predict_6,predict_7,predict_8,predict_9,prediction
0,1,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
1,2,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
2,3,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
3,4,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
4,5,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
5,6,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
6,7,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
7,8,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
8,9,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662
9,10,-0.005700,-0.005666,-0.005397,-0.005330,-0.005877,-0.005483,-0.006162,-0.004791,-0.005830,-0.006389,-0.005662


In [43]:
now = datetime.datetime.now()
now = now.strftime('%m-%d-%H-%M')

prediction.rename(columns={'prediction':'predicition'},inplace=True)
prediction[['id', 'predicition']].to_csv("./result/baseline_%s.csv" % now, index=False)
