In [7]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split,GridSearchCV,KFold
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib 
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor

from sklearn.decomposition import PCA
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import Ridge,RidgeCV,LinearRegression,Lasso
from sklearn.svm import SVR
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import xgboost as xgb
from lightgbm import LGBMRegressor
import math
%matplotlib inline

import keras
from keras.models import Sequential
from keras import layers
# from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras import optimizers

pd.set_option('display.max_colwidth',1000)
pd.set_option('display.height',1000)
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)
pd.set_option('display.width',1000)

In [2]:
# 生成数据
def generate_train_data(train_data, test_data, poly=False, select=False):
    y = train_data['发电量']
    X = train_data.drop(['发电量','ID'], axis=1)
    # 去除ID后的test_data
    sub_data = test_data.drop(['ID'], axis=1)
    
    polynm = None
    if poly:

        polynm = PolynomialFeatures(degree=2, interaction_only=False)
        X = polynm.fit_transform(X)
        sub_data = polynm.transform(sub_data)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
    
    sm = None
    if select:
        sm = SelectFromModel(GradientBoostingRegressor(random_state=2))
        X_train = sm.fit_transform(X_train, y_train)
        X_test = sm.transform(X_test)
        sub_data = sm.transform(sub_data)
        
    return X_train, X_test, y_train, y_test, sub_data, sm, polynm

def cal_score(mse):
    if isinstance(mse, float):
        return 1 / (1 + math.sqrt(mse))
    else:
        return np.divide(1, 1 + np.sqrt(mse))

#  定义交叉验证函数
def cross_validation_test(models, train_X_data, train_y_data, cv=5):
    model_name, mse_avg, score_avg = [], [], []
    for i, model in enumerate(models):
        print(i + 1,'- Model:', str(model).split('(')[0])
        model_name.append(str(i + 1) + '.' + str(model).split('(')[0])
        nmse = cross_val_score(model, train_X_data[i], train_y_data[i], cv=cv, scoring='neg_mean_squared_error')
        avg_mse = np.average(-nmse)
        scores = cal_score(-nmse)
        avg_score = np.average(scores)
        mse_avg.append(avg_mse)
        score_avg.append(avg_score)
        print('MSE:', -nmse)
        print('Score:', scores)
        print('Average XGB - MSE:', avg_mse, ' - Score:', avg_score, '\n')
    res = pd.DataFrame()
    res['Model'] = model_name
    res['Avg MSE'] = mse_avg
    res['Avg Score'] = score_avg
    return res

def add_avg(df):
    array = np.array(df["平均功率"])
    newarray=[]
    num = 0
    len_array = len(array)
    for i in np.arange(len_array):
        try:
            if i<10:
                num = (array[i-1]+array[i]+array[i+1])/3
            else:
                num = (array[i-1]+array[i-2]+array[i-3]+array[i-4]+array[i]+array[i+1]+array[i+2]+array[i+3]+array[i+4])/9
        except:
            num = (array[i-1]+array[i-2]+array[i-3]+array[i-4]+array[i]+array[i+1-len_array]+array[i+2-len_array]+array[i+3-len_array]+array[i+4-len_array])/9
        newarray.append(num)
    df["old平均功率"] = newarray
    return df

In [3]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
submit = pd.DataFrame()
submit['ID'] = list(test_data['ID'])
submit['发电量'] = 0
# 缺失值数据
special_missing_ID = test_data[test_data[(test_data == 0) | (test_data == 0.)].count(axis=1) > 13]['ID']

In [4]:
all_data = pd.concat([train_data, test_data], axis=0).sort_values(by='ID').reset_index().drop(['index'], axis=1)
bad_feature = ['功率A', '功率B', '功率C', '平均功率', '现场温度', '电压A', '电压B', '电压C', '电流B', '电流C', '转换效率', '转换效率A', '转换效率B', '转换效率C']

bad_index1 = all_data[bad_feature][
    (all_data[bad_feature] > all_data[bad_feature].mean() + 2 * all_data[bad_feature].std()) | 
    (all_data[bad_feature] < all_data[bad_feature].mean() - 2 * all_data[bad_feature].std())
].dropna(how='all').index

bad_index2 = all_data[
    ((all_data['电压A']<500)&(all_data['电压A']!=0))|
    ((all_data['电压B']<500)&(all_data['电压B']!=0))|
    ((all_data['电压C']<500)&(all_data['电压C']!=0))].index
bad_index = pd.Int64Index(list(bad_index1)+list(bad_index2))
# all_data.loc[np.concatenate([bad_index -1,bad_index,bad_index+1])].sort_values(by='ID', ascending=True)

# 坏样本相邻的index同样提取出来
nn_bad_data = all_data.loc[np.concatenate([bad_index - 1, bad_index, bad_index + 1])].sort_values(by='ID', ascending=True).drop_duplicates()
bad_data = all_data.loc[bad_index].sort_values(by='ID', ascending=True).drop_duplicates()
# 上下记录均值替代异常值
for idx, line in bad_data.iterrows():
    ID = line['ID']
    col_index = line[bad_feature][ 
        (line[bad_feature] > all_data[bad_feature].mean() + 3 * all_data[bad_feature].std())| 
        (line[bad_feature] < all_data[bad_feature].mean() - 3 * all_data[bad_feature].std())
    ].index
    index = all_data[all_data['ID'] == ID].index
    
    before_offset = 1
    while (idx + before_offset)in bad_index:
        before_offset += 1

    after_offset = 1
    while (idx + after_offset) in bad_index:
        after_offset += 1
     
    replace_value = (all_data.loc[index - before_offset, col_index].values + all_data.loc[index + after_offset, col_index].values) / (before_offset+after_offset)
    all_data.loc[index, col_index] = replace_value[0]

In [8]:
#拆分数据
train_data = all_data.drop(all_data[all_data['ID'].isin(submit['ID'])].index).reset_index().drop(['index'], axis=1)
test_data = all_data[all_data['ID'].isin(submit['ID'])].drop(['发电量'], axis=1).reset_index().drop(['index'], axis=1)
len(train_data), len(test_data)
# 去除重复值
train_data = train_data.drop_duplicates(train_data.columns.drop('ID'), keep='first')
train_data = add_avg(train_data)
test_data = add_avg(test_data)

X_train, X_test, y_train, y_test, sub_data, sm, polynm = generate_train_data(train_data, test_data, poly=True, select=False)
all_X_train = np.concatenate([X_train, X_test])
all_y_train = np.concatenate([y_train, y_test])

In [15]:
def lgb_cv():
    rmse_score = 0
    cvFold = 10
    result = submit.copy()
    result['发电量']=0
    for i in range(cvFold):
        print('第'+str(i)+'次交叉验证')
        X_train, X_test, y_train, y_test = train_test_split(all_X_train, all_y_train, test_size=0.2, random_state=i)

        lgb_clf = LGBMRegressor(boosting_type='gbdt', num_leaves=31, max_depth=-1,
                            learning_rate=0.1, n_estimators=1500, max_bin=225,
                            min_child_weight=0.01, min_child_samples=20, subsample=1, subsample_freq=1,
                            reg_alpha=0, reg_lambda=0, random_state=100*i+500, n_jobs=-1,
                            )

        lgb_clf.fit(X_train, y_train, eval_metric='rmse', eval_set=(X_test, y_test), early_stopping_rounds=100)

        y_pred = lgb_clf.predict(X_test, num_iteration=lgb_clf.best_iteration_)
        score = mean_squared_error(y_test, y_pred)

        y_pred_test = lgb_clf.predict(sub_data, num_iteration=lgb_clf.best_iteration_)
        result['发电量'] += y_pred_test

        score += 1/(1 + math.sqrt(score))
        rmse_score += score

        print('测试集 RMSE：', score)

    result['发电量'] = result['发电量']/cvFold

    print(rmse_score/cvFold)
    return result

In [16]:
result1 = lgb_cv()

第0次交叉验证
[1]	valid_0's rmse: 3.11044
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's rmse: 2.80096
[3]	valid_0's rmse: 2.52284
[4]	valid_0's rmse: 2.27318
[5]	valid_0's rmse: 2.04857
[6]	valid_0's rmse: 1.84622
[7]	valid_0's rmse: 1.6649
[8]	valid_0's rmse: 1.50212
[9]	valid_0's rmse: 1.35639
[10]	valid_0's rmse: 1.22528
[11]	valid_0's rmse: 1.10843
[12]	valid_0's rmse: 1.00317
[13]	valid_0's rmse: 0.908812
[14]	valid_0's rmse: 0.824986
[15]	valid_0's rmse: 0.750439
[16]	valid_0's rmse: 0.683722
[17]	valid_0's rmse: 0.624861
[18]	valid_0's rmse: 0.572448
[19]	valid_0's rmse: 0.526239
[20]	valid_0's rmse: 0.485077
[21]	valid_0's rmse: 0.449179
[22]	valid_0's rmse: 0.417876
[23]	valid_0's rmse: 0.390754
[24]	valid_0's rmse: 0.367199
[25]	valid_0's rmse: 0.346804
[26]	valid_0's rmse: 0.329373
[27]	valid_0's rmse: 0.314367
[28]	valid_0's rmse: 0.301756
[29]	valid_0's rmse: 0.291092
[30]	valid_0's rmse: 0.282064
[31]	valid_0's rmse: 0.274529
[32]	valid_0's rmse: 

[269]	valid_0's rmse: 0.232137
[270]	valid_0's rmse: 0.232148
[271]	valid_0's rmse: 0.232145
[272]	valid_0's rmse: 0.232153
[273]	valid_0's rmse: 0.232159
[274]	valid_0's rmse: 0.232159
[275]	valid_0's rmse: 0.232129
[276]	valid_0's rmse: 0.232132
[277]	valid_0's rmse: 0.232136
[278]	valid_0's rmse: 0.232141
[279]	valid_0's rmse: 0.232136
[280]	valid_0's rmse: 0.232123
[281]	valid_0's rmse: 0.232124
[282]	valid_0's rmse: 0.232127
[283]	valid_0's rmse: 0.232121
[284]	valid_0's rmse: 0.232118
[285]	valid_0's rmse: 0.232111
[286]	valid_0's rmse: 0.232125
[287]	valid_0's rmse: 0.232116
[288]	valid_0's rmse: 0.232099
[289]	valid_0's rmse: 0.23208
[290]	valid_0's rmse: 0.232058
[291]	valid_0's rmse: 0.232049
[292]	valid_0's rmse: 0.23206
[293]	valid_0's rmse: 0.232037
[294]	valid_0's rmse: 0.232046
[295]	valid_0's rmse: 0.232029
[296]	valid_0's rmse: 0.232031
[297]	valid_0's rmse: 0.232046
[298]	valid_0's rmse: 0.232046
[299]	valid_0's rmse: 0.232042
[300]	valid_0's rmse: 0.232033
[301]	vali

[230]	valid_0's rmse: 0.191874
[231]	valid_0's rmse: 0.191833
[232]	valid_0's rmse: 0.191844
[233]	valid_0's rmse: 0.191852
[234]	valid_0's rmse: 0.191828
[235]	valid_0's rmse: 0.191895
[236]	valid_0's rmse: 0.191861
[237]	valid_0's rmse: 0.191794
[238]	valid_0's rmse: 0.191808
[239]	valid_0's rmse: 0.191808
[240]	valid_0's rmse: 0.191824
[241]	valid_0's rmse: 0.191828
[242]	valid_0's rmse: 0.191831
[243]	valid_0's rmse: 0.19183
[244]	valid_0's rmse: 0.191844
[245]	valid_0's rmse: 0.191871
[246]	valid_0's rmse: 0.191867
[247]	valid_0's rmse: 0.191893
[248]	valid_0's rmse: 0.191865
[249]	valid_0's rmse: 0.191878
[250]	valid_0's rmse: 0.191842
[251]	valid_0's rmse: 0.191831
[252]	valid_0's rmse: 0.191823
[253]	valid_0's rmse: 0.191853
[254]	valid_0's rmse: 0.191815
[255]	valid_0's rmse: 0.191838
[256]	valid_0's rmse: 0.191815
[257]	valid_0's rmse: 0.191837
[258]	valid_0's rmse: 0.191856
[259]	valid_0's rmse: 0.191823
[260]	valid_0's rmse: 0.19184
[261]	valid_0's rmse: 0.191837
[262]	vali

[21]	valid_0's rmse: 0.420183
[22]	valid_0's rmse: 0.383997
[23]	valid_0's rmse: 0.351825
[24]	valid_0's rmse: 0.323332
[25]	valid_0's rmse: 0.29812
[26]	valid_0's rmse: 0.275464
[27]	valid_0's rmse: 0.25561
[28]	valid_0's rmse: 0.238346
[29]	valid_0's rmse: 0.223039
[30]	valid_0's rmse: 0.209537
[31]	valid_0's rmse: 0.198195
[32]	valid_0's rmse: 0.188181
[33]	valid_0's rmse: 0.179489
[34]	valid_0's rmse: 0.172017
[35]	valid_0's rmse: 0.165703
[36]	valid_0's rmse: 0.159873
[37]	valid_0's rmse: 0.155445
[38]	valid_0's rmse: 0.151273
[39]	valid_0's rmse: 0.148285
[40]	valid_0's rmse: 0.145306
[41]	valid_0's rmse: 0.143203
[42]	valid_0's rmse: 0.141204
[43]	valid_0's rmse: 0.13927
[44]	valid_0's rmse: 0.137426
[45]	valid_0's rmse: 0.136271
[46]	valid_0's rmse: 0.135183
[47]	valid_0's rmse: 0.134499
[48]	valid_0's rmse: 0.133498
[49]	valid_0's rmse: 0.132774
[50]	valid_0's rmse: 0.132044
[51]	valid_0's rmse: 0.131452
[52]	valid_0's rmse: 0.130851
[53]	valid_0's rmse: 0.130539
[54]	valid_0'

[290]	valid_0's rmse: 0.119967
[291]	valid_0's rmse: 0.119939
[292]	valid_0's rmse: 0.119908
[293]	valid_0's rmse: 0.119868
[294]	valid_0's rmse: 0.119907
[295]	valid_0's rmse: 0.119948
[296]	valid_0's rmse: 0.119976
[297]	valid_0's rmse: 0.120036
[298]	valid_0's rmse: 0.120007
[299]	valid_0's rmse: 0.120082
[300]	valid_0's rmse: 0.120058
[301]	valid_0's rmse: 0.120123
[302]	valid_0's rmse: 0.120106
[303]	valid_0's rmse: 0.120132
[304]	valid_0's rmse: 0.12011
[305]	valid_0's rmse: 0.120114
[306]	valid_0's rmse: 0.120147
[307]	valid_0's rmse: 0.120196
[308]	valid_0's rmse: 0.120208
[309]	valid_0's rmse: 0.120256
[310]	valid_0's rmse: 0.120256
[311]	valid_0's rmse: 0.120252
[312]	valid_0's rmse: 0.120309
[313]	valid_0's rmse: 0.120357
[314]	valid_0's rmse: 0.120365
[315]	valid_0's rmse: 0.120382
[316]	valid_0's rmse: 0.120409
[317]	valid_0's rmse: 0.120392
[318]	valid_0's rmse: 0.120368
[319]	valid_0's rmse: 0.120401
[320]	valid_0's rmse: 0.120366
[321]	valid_0's rmse: 0.120367
[322]	val

[215]	valid_0's rmse: 0.102988
[216]	valid_0's rmse: 0.103074
[217]	valid_0's rmse: 0.103002
[218]	valid_0's rmse: 0.102961
[219]	valid_0's rmse: 0.102954
[220]	valid_0's rmse: 0.102989
[221]	valid_0's rmse: 0.102965
[222]	valid_0's rmse: 0.102973
[223]	valid_0's rmse: 0.103021
[224]	valid_0's rmse: 0.103059
[225]	valid_0's rmse: 0.103153
[226]	valid_0's rmse: 0.103208
[227]	valid_0's rmse: 0.103325
[228]	valid_0's rmse: 0.103281
[229]	valid_0's rmse: 0.103321
[230]	valid_0's rmse: 0.103359
[231]	valid_0's rmse: 0.103394
[232]	valid_0's rmse: 0.103453
[233]	valid_0's rmse: 0.10344
[234]	valid_0's rmse: 0.103474
[235]	valid_0's rmse: 0.103488
[236]	valid_0's rmse: 0.103491
[237]	valid_0's rmse: 0.103626
[238]	valid_0's rmse: 0.103628
[239]	valid_0's rmse: 0.103661
[240]	valid_0's rmse: 0.103664
[241]	valid_0's rmse: 0.103692
[242]	valid_0's rmse: 0.103707
[243]	valid_0's rmse: 0.103717
[244]	valid_0's rmse: 0.103723
[245]	valid_0's rmse: 0.103799
[246]	valid_0's rmse: 0.103895
[247]	val

[214]	valid_0's rmse: 0.100425
[215]	valid_0's rmse: 0.100485
[216]	valid_0's rmse: 0.100538
[217]	valid_0's rmse: 0.100636
[218]	valid_0's rmse: 0.10067
[219]	valid_0's rmse: 0.100597
[220]	valid_0's rmse: 0.100635
[221]	valid_0's rmse: 0.100702
[222]	valid_0's rmse: 0.100635
[223]	valid_0's rmse: 0.10056
[224]	valid_0's rmse: 0.100551
[225]	valid_0's rmse: 0.100565
[226]	valid_0's rmse: 0.100539
[227]	valid_0's rmse: 0.100507
[228]	valid_0's rmse: 0.100491
[229]	valid_0's rmse: 0.100517
[230]	valid_0's rmse: 0.100581
[231]	valid_0's rmse: 0.10068
[232]	valid_0's rmse: 0.100674
Early stopping, best iteration is:
[132]	valid_0's rmse: 0.0996088
测试集 RMSE： 0.9193362495762213
第5次交叉验证
[1]	valid_0's rmse: 3.09074
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's rmse: 2.78282
[3]	valid_0's rmse: 2.50616
[4]	valid_0's rmse: 2.25733
[5]	valid_0's rmse: 2.03388
[6]	valid_0's rmse: 1.83232
[7]	valid_0's rmse: 1.65174
[8]	valid_0's rmse: 1.48912
[9]	valid_0's rmse: 1.3

[56]	valid_0's rmse: 0.113125
[57]	valid_0's rmse: 0.11287
[58]	valid_0's rmse: 0.112796
[59]	valid_0's rmse: 0.112548
[60]	valid_0's rmse: 0.112255
[61]	valid_0's rmse: 0.112028
[62]	valid_0's rmse: 0.111872
[63]	valid_0's rmse: 0.111777
[64]	valid_0's rmse: 0.111979
[65]	valid_0's rmse: 0.111793
[66]	valid_0's rmse: 0.111565
[67]	valid_0's rmse: 0.111519
[68]	valid_0's rmse: 0.111279
[69]	valid_0's rmse: 0.11115
[70]	valid_0's rmse: 0.111004
[71]	valid_0's rmse: 0.110837
[72]	valid_0's rmse: 0.110879
[73]	valid_0's rmse: 0.110728
[74]	valid_0's rmse: 0.110534
[75]	valid_0's rmse: 0.110572
[76]	valid_0's rmse: 0.110451
[77]	valid_0's rmse: 0.110447
[78]	valid_0's rmse: 0.110492
[79]	valid_0's rmse: 0.110477
[80]	valid_0's rmse: 0.110386
[81]	valid_0's rmse: 0.110301
[82]	valid_0's rmse: 0.110274
[83]	valid_0's rmse: 0.110242
[84]	valid_0's rmse: 0.110104
[85]	valid_0's rmse: 0.109949
[86]	valid_0's rmse: 0.10994
[87]	valid_0's rmse: 0.109859
[88]	valid_0's rmse: 0.109905
[89]	valid_0'

[42]	valid_0's rmse: 0.127679
[43]	valid_0's rmse: 0.12576
[44]	valid_0's rmse: 0.124351
[45]	valid_0's rmse: 0.122858
[46]	valid_0's rmse: 0.121222
[47]	valid_0's rmse: 0.120319
[48]	valid_0's rmse: 0.119285
[49]	valid_0's rmse: 0.118341
[50]	valid_0's rmse: 0.117625
[51]	valid_0's rmse: 0.11695
[52]	valid_0's rmse: 0.116462
[53]	valid_0's rmse: 0.116174
[54]	valid_0's rmse: 0.115713
[55]	valid_0's rmse: 0.115061
[56]	valid_0's rmse: 0.114632
[57]	valid_0's rmse: 0.114328
[58]	valid_0's rmse: 0.113838
[59]	valid_0's rmse: 0.113705
[60]	valid_0's rmse: 0.113373
[61]	valid_0's rmse: 0.113052
[62]	valid_0's rmse: 0.112937
[63]	valid_0's rmse: 0.112604
[64]	valid_0's rmse: 0.112509
[65]	valid_0's rmse: 0.112261
[66]	valid_0's rmse: 0.112241
[67]	valid_0's rmse: 0.112053
[68]	valid_0's rmse: 0.111873
[69]	valid_0's rmse: 0.111788
[70]	valid_0's rmse: 0.111704
[71]	valid_0's rmse: 0.111773
[72]	valid_0's rmse: 0.111612
[73]	valid_0's rmse: 0.111273
[74]	valid_0's rmse: 0.111226
[75]	valid_0

[102]	valid_0's rmse: 0.125305
[103]	valid_0's rmse: 0.125234
[104]	valid_0's rmse: 0.125154
[105]	valid_0's rmse: 0.125182
[106]	valid_0's rmse: 0.125232
[107]	valid_0's rmse: 0.125102
[108]	valid_0's rmse: 0.125076
[109]	valid_0's rmse: 0.125105
[110]	valid_0's rmse: 0.125037
[111]	valid_0's rmse: 0.125131
[112]	valid_0's rmse: 0.125166
[113]	valid_0's rmse: 0.125159
[114]	valid_0's rmse: 0.12512
[115]	valid_0's rmse: 0.125124
[116]	valid_0's rmse: 0.125113
[117]	valid_0's rmse: 0.125051
[118]	valid_0's rmse: 0.125108
[119]	valid_0's rmse: 0.125056
[120]	valid_0's rmse: 0.125058
[121]	valid_0's rmse: 0.124992
[122]	valid_0's rmse: 0.125128
[123]	valid_0's rmse: 0.125101
[124]	valid_0's rmse: 0.125099
[125]	valid_0's rmse: 0.125033
[126]	valid_0's rmse: 0.125012
[127]	valid_0's rmse: 0.124947
[128]	valid_0's rmse: 0.124887
[129]	valid_0's rmse: 0.124894
[130]	valid_0's rmse: 0.124835
[131]	valid_0's rmse: 0.12479
[132]	valid_0's rmse: 0.124795
[133]	valid_0's rmse: 0.124752
[134]	vali

[69]	valid_0's rmse: 0.102119
[70]	valid_0's rmse: 0.101869
[71]	valid_0's rmse: 0.101609
[72]	valid_0's rmse: 0.101551
[73]	valid_0's rmse: 0.101381
[74]	valid_0's rmse: 0.101225
[75]	valid_0's rmse: 0.101051
[76]	valid_0's rmse: 0.101126
[77]	valid_0's rmse: 0.10104
[78]	valid_0's rmse: 0.101216
[79]	valid_0's rmse: 0.101095
[80]	valid_0's rmse: 0.100967
[81]	valid_0's rmse: 0.100922
[82]	valid_0's rmse: 0.100802
[83]	valid_0's rmse: 0.100734
[84]	valid_0's rmse: 0.100882
[85]	valid_0's rmse: 0.100697
[86]	valid_0's rmse: 0.100567
[87]	valid_0's rmse: 0.100606
[88]	valid_0's rmse: 0.100493
[89]	valid_0's rmse: 0.1008
[90]	valid_0's rmse: 0.100687
[91]	valid_0's rmse: 0.100661
[92]	valid_0's rmse: 0.100585
[93]	valid_0's rmse: 0.10053
[94]	valid_0's rmse: 0.100542
[95]	valid_0's rmse: 0.100377
[96]	valid_0's rmse: 0.100383
[97]	valid_0's rmse: 0.100283
[98]	valid_0's rmse: 0.100377
[99]	valid_0's rmse: 0.100185
[100]	valid_0's rmse: 0.100108
[101]	valid_0's rmse: 0.100001
[102]	valid_

In [43]:
def xgb_cv():
    rmse_score = 0
    cvFold = 10
    result = submit.copy()
    for i in range(cvFold):
        print('第'+str(i)+'次交叉验证')
        X_train, X_test, y_train, y_test = train_test_split(all_X_train, all_y_train, test_size=0.2, random_state=i)

        xgb_clf = xgb.XGBRegressor(max_depth=5,learning_rate=0.1, n_estimators=1500,random_state=100*i+500, n_jobs=8)

        xgb_clf.fit(X_train, y_train, eval_metric='rmse')

        y_pred = xgb_clf.predict(X_test)
        score = mean_squared_error(y_test, y_pred)

        y_pred_test = xgb_clf.predict(sub_data, num_iteration=lgb_clf.best_iteration_)
        submit['发电量'] += y_pred_test

        score += 1/(1 + math.sqrt(score))
        rmse_score += score

        print('测试集 RMSE：', score)

    result['发电量'] = result['发电量']/cvFold

    print(rmse_score/cvFold)
    return result

In [44]:
all_X_train.shape

(8918, 231)

In [37]:
def pip_all():
    pipelines = []
    pipelines.append(('ScaledLR', Pipeline([('Scaler', StandardScaler()),('LR',LinearRegression())])))
    pipelines.append(('ScaledLASSO', Pipeline([('Scaler', StandardScaler()),('LASSO', Lasso())])))

    pipelines.append(('ScaledKNN', Pipeline([('Scaler', StandardScaler()),('KNN', KNeighborsRegressor())])))
    pipelines.append(('ScaledCART', Pipeline([('Scaler', StandardScaler()),('CART', DecisionTreeRegressor())])))
    pipelines.append(('ScaledGBM', Pipeline([('Scaler', StandardScaler()),('GBM', GradientBoostingRegressor())])))
#     X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2, random_state=i)
    results = []
    names = []
    for name, model in pipelines:
        kfold = KFold(n_splits=10, random_state=21)
        cv_results = cross_val_score(model, all_X_train, all_y_train, cv=kfold, scoring='neg_mean_squared_error')
        results.append(cv_results)
        names.append(name)
        msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
        print(msg)
# def pip_all():
#     result = submit.copy()
#     pipelines = []
#     pipelines.append(('ScaledCART', Pipeline([('Scaler', StandardScaler()),('CART', DecisionTreeRegressor())])))
#     pipelines.append(('ScaledGBM', Pipeline([('Scaler', StandardScaler()),('GBM', GradientBoostingRegressor())])))

#     results = []
#     names = []
#     for name, model in pipelines:
#         kfold = KFold(n_splits=10, random_state=21)
#         nmse = cross_val_score(model, all_X_train, all_y_train, cv=kfold, scoring='neg_mean_squared_error')

#         result['发电量'] += model.predict(test_data)
#         avg_mse = np.average(-nmse)
#         scores = cal_score(-nmse)
        
#         avg_score = np.average(scores)

#         print('MSE:', -nmse)
#         print('Score:', scores)
#         print('Average-MSE:', avg_mse, ' - Score:', avg_score, '\n')
#     result['发电量'] /= 6
#     return result

In [38]:
result3 = pip_all()

TypeError: 'KFold' object is not iterable

In [41]:
from keras import backend

def rmse(y_true, y_pred):
    return backend.sqrt(backend.mean(backend.square(y_pred - y_true), axis=-1))

def pred_nn():
    scaler = StandardScaler()
    train_x = all_X_train.copy()
    train_y = all_y_train.copy()
    train_x = scaler.fit_transform(train_x)
    
#     train_x = train_x.fillna(0)
#     test_x = test_x.fillna(0)
    test_x = sub_data.copy()
    test_x = scaler.transform(test_x)
    
    result = submit.copy()
    rmse_score = 0
    
    for i in range(10):
        X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2, random_state=i)

        X_train = X_train

        row,col = X_train.shape

        model = Sequential()

        model.add(layers.Dense(32,activation='relu',input_shape=(col,)))
#         model.add(layers.normalization.BatchNormalization())
#         model.add(layers.core.Dropout(0.1))
        model.add(layers.Dense(32,kernel_initializer='normal', activation='relu'))
#         model.add(layers.core.Dropout(0.1))
        model.add(layers.Dense(16,kernel_initializer='normal', activation='relu'))
        model.add(layers.core.Dropout(0.1))
        model.add(layers.Dense(8,kernel_initializer='normal', activation='relu'))
        model.add(layers.core.Dropout(0.1))
        
        model.add(layers.Dense(1, kernel_initializer='normal'))

        model.summary()
        model.compile(optimizer=optimizers.Adadelta(),
                      loss='mse',
                      metrics=[rmse])

        # callbacks_list = [
        #     keras.callbacks.TensorBoard(
        #         log_dir="LOF_drop_model_1",
        #         write_graph=True),
        #     keras.callbacks.ModelCheckpoint(
        #         filepath="LOF_drop_model_1.h5",
        #         monitor="val_acc",
        #         save_best_only=True
        #     )
        # ],validation_data=(X_test, y_test)validation_split=0.2,

        model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), batch_size=100, verbose=2, shuffle=True)

        y_prob_test = model.predict(X_test)

        mse = mean_squared_error(y_test, y_prob_test)
        print(mse)
        score = 1/(1 + math.sqrt(mse))
        rmse_score += score

        print('测试集 rmse：',score)
        y_prob = model.predict(test_x)
#         print(y_prob)

        result['发电量'] += y_prob[:, 0]
    result['发电量'] = result['发电量']/10
    
    print(rmse_score/10)
#     return result
pred_nn()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_430 (Dense)            (None, 64)                14848     
_________________________________________________________________
dense_431 (Dense)            (None, 32)                2080      
_________________________________________________________________
dense_432 (Dense)            (None, 16)                528       
_________________________________________________________________
dense_433 (Dense)            (None, 8)                 136       
_________________________________________________________________
dense_434 (Dense)            (None, 1)                 9         
Total params: 17,601
Trainable params: 17,601
Non-trainable params: 0
_________________________________________________________________
Train on 7134 samples, validate on 1784 samples
Epoch 1/100
 - 7s - loss: 29.4262 - rmse: 4.4171 - val_loss: 6.9170 - val_rmse: 2.1524
Epo

Epoch 84/100
 - 0s - loss: 0.0112 - rmse: 0.0574 - val_loss: 0.0670 - val_rmse: 0.0676
Epoch 85/100
 - 0s - loss: 0.0107 - rmse: 0.0545 - val_loss: 0.0681 - val_rmse: 0.0726
Epoch 86/100
 - 0s - loss: 0.0124 - rmse: 0.0640 - val_loss: 0.1012 - val_rmse: 0.1689
Epoch 87/100
 - 0s - loss: 0.0177 - rmse: 0.0811 - val_loss: 0.0675 - val_rmse: 0.0719
Epoch 88/100
 - 0s - loss: 0.0122 - rmse: 0.0581 - val_loss: 0.0659 - val_rmse: 0.0570
Epoch 89/100
 - 0s - loss: 0.0112 - rmse: 0.0516 - val_loss: 0.0645 - val_rmse: 0.0649
Epoch 90/100
 - 0s - loss: 0.0119 - rmse: 0.0520 - val_loss: 0.0660 - val_rmse: 0.0667
Epoch 91/100
 - 0s - loss: 0.0130 - rmse: 0.0519 - val_loss: 0.0664 - val_rmse: 0.0623
Epoch 92/100
 - 0s - loss: 0.0108 - rmse: 0.0539 - val_loss: 0.0668 - val_rmse: 0.0738
Epoch 93/100
 - 0s - loss: 0.0114 - rmse: 0.0598 - val_loss: 0.0648 - val_rmse: 0.0613
Epoch 94/100
 - 0s - loss: 0.0106 - rmse: 0.0560 - val_loss: 0.0693 - val_rmse: 0.0839
Epoch 95/100
 - 0s - loss: 0.0119 - rmse: 0

Epoch 66/100
 - 0s - loss: 0.0227 - rmse: 0.0642 - val_loss: 0.0470 - val_rmse: 0.0657
Epoch 67/100
 - 0s - loss: 0.0220 - rmse: 0.0601 - val_loss: 0.0461 - val_rmse: 0.0606
Epoch 68/100
 - 0s - loss: 0.0230 - rmse: 0.0657 - val_loss: 0.0463 - val_rmse: 0.0640
Epoch 69/100
 - 0s - loss: 0.0247 - rmse: 0.0693 - val_loss: 0.0694 - val_rmse: 0.1351
Epoch 70/100
 - 0s - loss: 0.0257 - rmse: 0.0749 - val_loss: 0.0468 - val_rmse: 0.0659
Epoch 71/100
 - 0s - loss: 0.0213 - rmse: 0.0571 - val_loss: 0.0487 - val_rmse: 0.0704
Epoch 72/100
 - 0s - loss: 0.0229 - rmse: 0.0657 - val_loss: 0.0458 - val_rmse: 0.0643
Epoch 73/100
 - 0s - loss: 0.0209 - rmse: 0.0561 - val_loss: 0.0502 - val_rmse: 0.0788
Epoch 74/100
 - 0s - loss: 0.0237 - rmse: 0.0666 - val_loss: 0.0481 - val_rmse: 0.0763
Epoch 75/100
 - 0s - loss: 0.0255 - rmse: 0.0774 - val_loss: 0.0452 - val_rmse: 0.0716
Epoch 76/100
 - 0s - loss: 0.0239 - rmse: 0.0702 - val_loss: 0.0429 - val_rmse: 0.0599
Epoch 77/100
 - 0s - loss: 0.0230 - rmse: 0

Epoch 48/100
 - 0s - loss: 0.0233 - rmse: 0.0698 - val_loss: 0.0169 - val_rmse: 0.0652
Epoch 49/100
 - 0s - loss: 0.0215 - rmse: 0.0586 - val_loss: 0.0184 - val_rmse: 0.0673
Epoch 50/100
 - 0s - loss: 0.0231 - rmse: 0.0670 - val_loss: 0.0164 - val_rmse: 0.0600
Epoch 51/100
 - 0s - loss: 0.0241 - rmse: 0.0681 - val_loss: 0.0163 - val_rmse: 0.0602
Epoch 52/100
 - 0s - loss: 0.0221 - rmse: 0.0586 - val_loss: 0.0188 - val_rmse: 0.0717
Epoch 53/100
 - 0s - loss: 0.0243 - rmse: 0.0677 - val_loss: 0.0302 - val_rmse: 0.1121
Epoch 54/100
 - 0s - loss: 0.0247 - rmse: 0.0691 - val_loss: 0.0229 - val_rmse: 0.0960
Epoch 55/100
 - 0s - loss: 0.0237 - rmse: 0.0692 - val_loss: 0.0162 - val_rmse: 0.0585
Epoch 56/100
 - 0s - loss: 0.0231 - rmse: 0.0661 - val_loss: 0.0177 - val_rmse: 0.0624
Epoch 57/100
 - 0s - loss: 0.0240 - rmse: 0.0682 - val_loss: 0.0186 - val_rmse: 0.0637
Epoch 58/100
 - 0s - loss: 0.0236 - rmse: 0.0654 - val_loss: 0.0185 - val_rmse: 0.0709
Epoch 59/100
 - 0s - loss: 0.0212 - rmse: 0

Epoch 30/100
 - 0s - loss: 0.0269 - rmse: 0.0760 - val_loss: 0.0465 - val_rmse: 0.0846
Epoch 31/100
 - 0s - loss: 0.0271 - rmse: 0.0758 - val_loss: 0.0505 - val_rmse: 0.1140
Epoch 32/100
 - 0s - loss: 0.0268 - rmse: 0.0743 - val_loss: 0.0425 - val_rmse: 0.0761
Epoch 33/100
 - 0s - loss: 0.0272 - rmse: 0.0734 - val_loss: 0.0416 - val_rmse: 0.0705
Epoch 34/100
 - 0s - loss: 0.0271 - rmse: 0.0721 - val_loss: 0.0412 - val_rmse: 0.0759
Epoch 35/100
 - 0s - loss: 0.0314 - rmse: 0.0860 - val_loss: 0.0463 - val_rmse: 0.0990
Epoch 36/100
 - 0s - loss: 0.0276 - rmse: 0.0785 - val_loss: 0.0448 - val_rmse: 0.0938
Epoch 37/100
 - 0s - loss: 0.0261 - rmse: 0.0721 - val_loss: 0.0437 - val_rmse: 0.0801
Epoch 38/100
 - 0s - loss: 0.0254 - rmse: 0.0703 - val_loss: 0.0414 - val_rmse: 0.0679
Epoch 39/100
 - 0s - loss: 0.0285 - rmse: 0.0818 - val_loss: 0.0422 - val_rmse: 0.0706
Epoch 40/100
 - 0s - loss: 0.0267 - rmse: 0.0737 - val_loss: 0.0444 - val_rmse: 0.0889
Epoch 41/100
 - 0s - loss: 0.0263 - rmse: 0

Epoch 12/100
 - 0s - loss: 0.0394 - rmse: 0.0955 - val_loss: 0.0275 - val_rmse: 0.0992
Epoch 13/100
 - 0s - loss: 0.0385 - rmse: 0.0933 - val_loss: 0.0262 - val_rmse: 0.0973
Epoch 14/100
 - 0s - loss: 0.0370 - rmse: 0.0900 - val_loss: 0.0354 - val_rmse: 0.1257
Epoch 15/100
 - 0s - loss: 0.0368 - rmse: 0.0922 - val_loss: 0.0225 - val_rmse: 0.0838
Epoch 16/100
 - 0s - loss: 0.0354 - rmse: 0.0878 - val_loss: 0.0212 - val_rmse: 0.0786
Epoch 17/100
 - 0s - loss: 0.0343 - rmse: 0.0857 - val_loss: 0.0210 - val_rmse: 0.0765
Epoch 18/100
 - 0s - loss: 0.0338 - rmse: 0.0849 - val_loss: 0.0200 - val_rmse: 0.0761
Epoch 19/100
 - 0s - loss: 0.0333 - rmse: 0.0828 - val_loss: 0.0188 - val_rmse: 0.0720
Epoch 20/100
 - 0s - loss: 0.0321 - rmse: 0.0797 - val_loss: 0.0186 - val_rmse: 0.0703
Epoch 21/100
 - 0s - loss: 0.0306 - rmse: 0.0787 - val_loss: 0.0207 - val_rmse: 0.0843
Epoch 22/100
 - 0s - loss: 0.0312 - rmse: 0.0809 - val_loss: 0.0195 - val_rmse: 0.0812
Epoch 23/100
 - 0s - loss: 0.0312 - rmse: 0

 - 7s - loss: 30.0233 - rmse: 4.4818 - val_loss: 8.2199 - val_rmse: 2.3362
Epoch 2/100
 - 0s - loss: 4.4856 - rmse: 1.6967 - val_loss: 2.0232 - val_rmse: 1.1430
Epoch 3/100
 - 0s - loss: 0.9667 - rmse: 0.7284 - val_loss: 0.3257 - val_rmse: 0.3883
Epoch 4/100
 - 0s - loss: 0.1839 - rmse: 0.2755 - val_loss: 0.1704 - val_rmse: 0.2271
Epoch 5/100
 - 0s - loss: 0.1058 - rmse: 0.1954 - val_loss: 0.1207 - val_rmse: 0.1833
Epoch 6/100
 - 0s - loss: 0.0771 - rmse: 0.1612 - val_loss: 0.0940 - val_rmse: 0.1514
Epoch 7/100
 - 0s - loss: 0.0608 - rmse: 0.1391 - val_loss: 0.0797 - val_rmse: 0.1391
Epoch 8/100
 - 0s - loss: 0.0509 - rmse: 0.1227 - val_loss: 0.0833 - val_rmse: 0.1425
Epoch 9/100
 - 0s - loss: 0.0461 - rmse: 0.1165 - val_loss: 0.0818 - val_rmse: 0.1178
Epoch 10/100
 - 0s - loss: 0.0412 - rmse: 0.1057 - val_loss: 0.0967 - val_rmse: 0.1387
Epoch 11/100
 - 0s - loss: 0.0400 - rmse: 0.1054 - val_loss: 0.0876 - val_rmse: 0.1031
Epoch 12/100
 - 0s - loss: 0.0383 - rmse: 0.1022 - val_loss: 0.

Epoch 96/100
 - 0s - loss: 0.0209 - rmse: 0.0583 - val_loss: 0.1326 - val_rmse: 0.0667
Epoch 97/100
 - 0s - loss: 0.0220 - rmse: 0.0627 - val_loss: 0.1298 - val_rmse: 0.0670
Epoch 98/100
 - 0s - loss: 0.0213 - rmse: 0.0577 - val_loss: 0.1298 - val_rmse: 0.0661
Epoch 99/100
 - 0s - loss: 0.0212 - rmse: 0.0587 - val_loss: 0.1365 - val_rmse: 0.0815
Epoch 100/100
 - 0s - loss: 0.0218 - rmse: 0.0617 - val_loss: 0.1321 - val_rmse: 0.0659
0.1321188248067944
测试集 rmse： 0.7334166072826889
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_460 (Dense)            (None, 64)                14848     
_________________________________________________________________
dense_461 (Dense)            (None, 32)                2080      
_________________________________________________________________
dense_462 (Dense)            (None, 16)                528       
______________________________________________________

Epoch 78/100
 - 0s - loss: 0.0205 - rmse: 0.0572 - val_loss: 0.0605 - val_rmse: 0.0628
Epoch 79/100
 - 0s - loss: 0.0222 - rmse: 0.0652 - val_loss: 0.0588 - val_rmse: 0.0617
Epoch 80/100
 - 0s - loss: 0.0230 - rmse: 0.0683 - val_loss: 0.0605 - val_rmse: 0.0780
Epoch 81/100
 - 0s - loss: 0.0213 - rmse: 0.0572 - val_loss: 0.0607 - val_rmse: 0.0615
Epoch 82/100
 - 0s - loss: 0.0234 - rmse: 0.0658 - val_loss: 0.0588 - val_rmse: 0.0608
Epoch 83/100
 - 0s - loss: 0.0258 - rmse: 0.0693 - val_loss: 0.0584 - val_rmse: 0.0666
Epoch 84/100
 - 0s - loss: 0.0247 - rmse: 0.0674 - val_loss: 0.0632 - val_rmse: 0.0827
Epoch 85/100
 - 0s - loss: 0.0217 - rmse: 0.0619 - val_loss: 0.0603 - val_rmse: 0.0661
Epoch 86/100
 - 0s - loss: 0.0236 - rmse: 0.0699 - val_loss: 0.0601 - val_rmse: 0.0660
Epoch 87/100
 - 0s - loss: 0.0216 - rmse: 0.0627 - val_loss: 0.0584 - val_rmse: 0.0673
Epoch 88/100
 - 0s - loss: 0.0217 - rmse: 0.0622 - val_loss: 0.0634 - val_rmse: 0.0738
Epoch 89/100
 - 0s - loss: 0.0236 - rmse: 0

Epoch 60/100
 - 0s - loss: 0.0234 - rmse: 0.0695 - val_loss: 0.0231 - val_rmse: 0.0795
Epoch 61/100
 - 0s - loss: 0.0221 - rmse: 0.0612 - val_loss: 0.0250 - val_rmse: 0.0863
Epoch 62/100
 - 0s - loss: 0.0231 - rmse: 0.0671 - val_loss: 0.0338 - val_rmse: 0.1134
Epoch 63/100
 - 0s - loss: 0.0234 - rmse: 0.0676 - val_loss: 0.0208 - val_rmse: 0.0661
Epoch 64/100
 - 0s - loss: 0.0212 - rmse: 0.0580 - val_loss: 0.0205 - val_rmse: 0.0648
Epoch 65/100
 - 0s - loss: 0.0227 - rmse: 0.0658 - val_loss: 0.0206 - val_rmse: 0.0631
Epoch 66/100
 - 0s - loss: 0.0227 - rmse: 0.0661 - val_loss: 0.0193 - val_rmse: 0.0603
Epoch 67/100
 - 0s - loss: 0.0213 - rmse: 0.0589 - val_loss: 0.0296 - val_rmse: 0.1035
Epoch 68/100
 - 0s - loss: 0.0222 - rmse: 0.0655 - val_loss: 0.0221 - val_rmse: 0.0749
Epoch 69/100
 - 0s - loss: 0.0234 - rmse: 0.0683 - val_loss: 0.0247 - val_rmse: 0.0808
Epoch 70/100
 - 0s - loss: 0.0229 - rmse: 0.0679 - val_loss: 0.0219 - val_rmse: 0.0741
Epoch 71/100
 - 0s - loss: 0.0221 - rmse: 0

Epoch 42/100
 - 0s - loss: 0.0250 - rmse: 0.0675 - val_loss: 0.0385 - val_rmse: 0.0799
Epoch 43/100
 - 0s - loss: 0.0263 - rmse: 0.0748 - val_loss: 0.0358 - val_rmse: 0.0594
Epoch 44/100
 - 0s - loss: 0.0260 - rmse: 0.0720 - val_loss: 0.0426 - val_rmse: 0.0896
Epoch 45/100
 - 0s - loss: 0.0258 - rmse: 0.0709 - val_loss: 0.0368 - val_rmse: 0.0654
Epoch 46/100
 - 0s - loss: 0.0233 - rmse: 0.0636 - val_loss: 0.0647 - val_rmse: 0.1559
Epoch 47/100
 - 0s - loss: 0.0285 - rmse: 0.0843 - val_loss: 0.0368 - val_rmse: 0.0592
Epoch 48/100
 - 0s - loss: 0.0246 - rmse: 0.0685 - val_loss: 0.0425 - val_rmse: 0.0818
Epoch 49/100
 - 0s - loss: 0.0290 - rmse: 0.0865 - val_loss: 0.0449 - val_rmse: 0.0850
Epoch 50/100
 - 0s - loss: 0.0271 - rmse: 0.0761 - val_loss: 0.0404 - val_rmse: 0.0725
Epoch 51/100
 - 0s - loss: 0.0292 - rmse: 0.0727 - val_loss: 0.0409 - val_rmse: 0.0688
Epoch 52/100
 - 0s - loss: 0.0281 - rmse: 0.0682 - val_loss: 0.0418 - val_rmse: 0.0691
Epoch 53/100
 - 0s - loss: 0.0254 - rmse: 0

Epoch 24/100
 - 0s - loss: 0.0316 - rmse: 0.0786 - val_loss: 0.0310 - val_rmse: 0.1191
Epoch 25/100
 - 0s - loss: 0.0338 - rmse: 0.0877 - val_loss: 0.0191 - val_rmse: 0.0849
Epoch 26/100
 - 0s - loss: 0.0320 - rmse: 0.0810 - val_loss: 0.0148 - val_rmse: 0.0664
Epoch 27/100
 - 0s - loss: 0.0296 - rmse: 0.0757 - val_loss: 0.0291 - val_rmse: 0.1168
Epoch 28/100
 - 0s - loss: 0.0318 - rmse: 0.0843 - val_loss: 0.0143 - val_rmse: 0.0635
Epoch 29/100
 - 0s - loss: 0.0304 - rmse: 0.0792 - val_loss: 0.0233 - val_rmse: 0.0998
Epoch 30/100
 - 0s - loss: 0.0289 - rmse: 0.0766 - val_loss: 0.0146 - val_rmse: 0.0654
Epoch 31/100
 - 0s - loss: 0.0282 - rmse: 0.0721 - val_loss: 0.0172 - val_rmse: 0.0812
Epoch 32/100
 - 0s - loss: 0.0290 - rmse: 0.0770 - val_loss: 0.0134 - val_rmse: 0.0594
Epoch 33/100
 - 0s - loss: 0.0270 - rmse: 0.0676 - val_loss: 0.0175 - val_rmse: 0.0826
Epoch 34/100
 - 0s - loss: 0.0277 - rmse: 0.0739 - val_loss: 0.0148 - val_rmse: 0.0670
Epoch 35/100
 - 0s - loss: 0.0267 - rmse: 0

In [None]:
def pred_lgb_CV():

    auc_score = 0
    test_id['prob'] = 0
    for num in range(4):
        print('第'+str(num)+'次交叉验证')
        kf = KFold(n_splits = 10, random_state=100*num + 10, shuffle=True)
        for train_ix, val_ix in kf.split(train):
            
            train_y = new_train.loc[train_ix,:]['label']
            train_x = new_train.loc[train_ix,:].drop(['label','user_id'],axis=1)

            X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2, random_state=num*20)
            lgb_clf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1,
                                learning_rate=0.01, n_estimators=2000, max_bin=225,
                                min_child_weight=0.01, min_child_samples=20, subsample=0.7, subsample_freq=1,
                                colsample_bytree=0.7, reg_alpha=0.0, reg_lambda=1, random_state=100*num+500, n_jobs=-1,
                                )
            lgb_clf.fit(X_train, y_train, eval_metric='auc', eval_set=(X_test, y_test), early_stopping_rounds=200)

            auc_score += list(lgb_clf.best_score_.values())[0]['auc']
            y_prob = lgb_clf.predict_proba(new_test_x, num_iteration=lgb_clf.best_iteration_)
            test_id['prob'] += y_prob[:,1]
            
    test_id['prob'] = test_id['prob']/20
    test_id.to_csv('lgb_linxi_080901.csv', index=False, header=False)

    print(auc_score/20)

In [19]:
result1

Unnamed: 0,ID,发电量
0,1,0.379993
1,9,1.310647
2,13,2.148519
3,17,3.399214
4,18,3.637901
5,21,4.142491
6,23,4.267773
7,25,4.791955
8,26,4.956942
9,28,5.234955


In [18]:
index = result1[result1['ID'].isin(special_missing_ID)].index
result1.loc[index, '发电量'] = 0.379993053

result1.to_csv('sub_linxi_081602.csv', index=False, header=False)