# 第3回データ分析コンペティション ： PUBG Finish Placement Prediction

## 方針
1. 特徴量の作成
2. RFEによる特徴量削減
3. スタッキング

## 参考 
https://www.kaggle.com/deffro/eda-is-fun<br>
https://www.kaggle.com/kamalchhirang/5th-place-solution-0-0184-score<br>
https://www.kaggle.com/ceshine/a-simple-post-processing-trick-lb-0237-0204<br>
https://github.com/ghmagazine/kagglebook/blob/master/ch07/ch07-01-stacking.py<br>
<div style = "text-align: right;"> 
    GCI2019-Winter Sekikawa318
</div>

In [1]:
%%html
<style>
    img {
        float:left;
        width: 1300px;
        height: 450px;
        border: 2px solid #000;
    }
    h1{color: #00008b; padding: 0.25em 0.5em; border-left: solid 5px #00008b;}
    h2{color: #2323b1; padding: 0.25em 0.5em; border-left: solid 5px #2323b1;}
    h3{color: #4e4edc; padding: 0.5em; border-bottom: solid 3px #4e4edc;}
    h4{color: #00008b; padding: 0.5em; border-left: solid 3px #00008b;}
</style>

## 扱うモジュールのインポート

In [1]:
import warnings
warnings.simplefilter('ignore')
import numpy as np
import pandas as pd

# lightgbmをインストールしておいてください
import lightgbm as lgb
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, GroupKFold
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge

# ! pip3 install eli5
from eli5.sklearn import PermutationImportance

%matplotlib inline
pd.set_option('display.max_columns', 1000)

Using TensorFlow backend.


## データのインポート
このファイルを保存したディレクトリに"input"ディレクトリを作成し，その中に"train.csv"と"test.csv"のcsvファイルを保存しておいてください．

In [2]:
test_df = pd.read_csv("./input/test.csv")
train_df = pd.read_csv("./input/train.csv")
train_test_df = pd.concat([train_df, test_df])

## 1. 特徴量作成

### matchTypeを数値に変更する

In [3]:
train_test_df['matchType'].value_counts()

squad-fpp           3439
duo-fpp             2479
solo-fpp             966
squad                828
duo                  657
solo                 398
crashfpp              97
normal-squad-fpp      64
Name: matchType, dtype: int64

In [4]:
def standardize_matchType(df):
    df['team'] = [1 if i>50 else 2 if (i>25 & i<=50) else 4 for i in df['numGroups']]
    df['team'][df['matchType'] == 'solo'] = 1
    df['team'][df['matchType'] == 'solo-fpp'] = 1
    df['team'][df['matchType'] == 'duo'] = 2
    df['team'][df['matchType'] == 'duo-fpp'] = 2
    df['team'][df['matchType'] == 'squad'] = 4
    df['team'][df['matchType'] == 'squad-fpp'] = 4
    df['team'][df['matchType'] == 'normal-squad-fpp'] = 4
    return df

In [5]:
train_test_df = standardize_matchType(train_test_df)
train_test_df = train_test_df.drop(["matchType"], axis=1)

In [6]:
train_test_df["team"].value_counts()

4    4331
2    3233
1    1364
Name: team, dtype: int64

### 基本的な特徴量を追加する
自チームの人数やマッチに参加した総人数など<br>
チームメンバーが5人以上いる場合は,中央値で置き換え<br>
それでも5以上なら4で置き換え

In [7]:
def basic_features(df):
    df['playersJoined'] = df.groupby('matchId')['matchId'].transform('count')
    df['teamMembers'] = df.groupby(['matchId', 'groupId'])['groupId'].transform('count')
    # チームメンバーが5人以上の人たちがいる -> 中央値で置き換え -> それでも5以上なら4で埋める
    temp = df.groupby("matchId")["teamMembers"].transform("median")
    df.loc[df["teamMembers"]>4, "teamMembers"] = temp[df["teamMembers"]>4]
    df.loc[df["teamMembers"]>4, "teamMembers"] = 4
    df['opponents'] = df['playersJoined'] - df['teamMembers']
    return df

In [8]:
train_test_df = basic_features(train_test_df)

In [9]:
# 確認用
# temp_columns = ["playersJoined", "teamMembers", "opponents"]
# train_test_df[temp_columns].head()

### 特徴量を追加する
合計移動距離やヘッドショットキルレートなど

In [10]:
def add_features(df):
    df['totalDistance'] = df['rideDistance'] + df['walkDistance'] + df['swimDistance']
    df['kills_assists'] = df['kills'] + df['assists']
    df['headshot_rate'] = df['headshotKills'] / (df['kills']+0.01)
    df['killStreaks_rate'] = df["killStreaks"] / (df["kills"]+0.01)
    df['pointsSum'] = df['killPoints'] + df['rankPoints'] + df['winPoints']
    df['heals_boosts'] = df['heals'] + df['boosts']
    df['kills_assists_per_heal_boost'] = df['kills_assists'] / (df['heals_boosts']+0.01)
    df['damageDealt_per_heal_boost'] = df['damageDealt'] / (df['heals_boosts'] + 1)
    df['kills_assists_per_heal_boost'] = df['kills_assists'] / (df['heals_boosts'] + 1)
    df['killPerc'] = df.groupby('matchId')['kills'].rank(pct=True).values
    df['killPlacePerc'] = df.groupby('matchId')['killPlace'].rank(pct=True).values
    df['heals_boostsPerc'] = df.groupby('matchId')['heals_boosts'].rank(pct=True).values
    return df

In [11]:
train_test_df = add_features(train_test_df)

In [12]:
# 確認用
# temp_columns = [
#     "totalDistance", "kills_assists", "headshot_rate", "killStreaks_rate", "pointsSum", "heals_boosts", "kills_assists_per_heal_boost",
#     "damageDealt_per_heal_boost", "kills_assists_per_heal_boost", "killPerc", "killPlacePerc", "heals_boostsPerc"
# ]
# train_test_df[temp_columns].head()

### 特徴量を統一化する
対戦相手が99人いる中の3人をkillした場合と，対戦相手が50人いる中の3人をkillした場合では重みが違うはず

In [13]:
def norm_features(df):
    df['teamKills'] = df['teamKills'] / df['teamMembers']
    df['killPlace'] = df['killPlace'] / (df['maxPlace'])
    df['roadKills'] = df['roadKills'] / (df['rideDistance'] + 0.01) / df["opponents"]
    df['maxPlace'] = train_test_df['maxPlace'] / train_test_df['numGroups']
    df['kills_Dis'] = df['kills'] / df["opponents"] /(df['totalDistance']+0.01)
    df['kills'] = df['kills'] / (df['opponents']) / df["matchDuration"]
    df["headshotKills"] = df["headshotKills"] / df["opponents"]
    df["killStreaks"] = df["killStreaks"]  / df["opponents"]

    df["boosts_Dis"] = df["boosts"] / (df["totalDistance"]+0.01)
    df['boosts'] = df['boosts'] / (df['matchDuration'])
    df['heals'] = df['heals'] / (df['matchDuration'])
    df["DBNOs_Dis"] = df["DBNOs"] / (df["totalDistance"]+0.01)
    df['DBNOs'] = df['DBNOs'] / (df['opponents']) / df["matchDuration"]
    df['damageDealt'] = df['damageDealt'] / df["opponents"] / df["matchDuration"]
    df['revives'] = df['revives'] / (df['numGroups']+0.01)
    df["weaponsAcquired_Dis"] = df["weaponsAcquired"] / (df["totalDistance"]+0.01)
    df["weaponsAcquired"] = df["weaponsAcquired"] / df["matchDuration"]
    df["heals_boosts_Dis"] = df["heals_boosts"] / (df["totalDistance"]+0.01)
    df["heals_boosts"] = df["heals_boosts"] / df["matchDuration"]
    df["kills_assists_Dis"] = df["kills_assists"] / df["opponents"] / (df["totalDistance"]+0.01)
    df["kills_assists"] = df["kills_assists"] / df["opponents"] / df["matchDuration"]
    
    df['rideDistance'] = df['rideDistance'] / (df['matchDuration'])
    df['swimDistance'] = df['swimDistance'] / (df['matchDuration'])
    df['walkDistance'] = df['walkDistance'] / (df['matchDuration'])
    df["totalDistance"] = df["totalDistance"] / (df['matchDuration'])
    return df

In [14]:
train_test_df = norm_features(train_test_df)

In [15]:
# 確認用
# temp_columns = [
#     "teamKills", "killPlace", "roadKills", "maxPlace", "kills_Dis", "kills", "headshotKills", "killStreaks", "boosts_Dis", "boosts", "heals", "DBNOs_Dis",
#     "DBNOs", "damageDealt", "revives", "weaponsAcquired_Dis", "weaponsAcquired", "heals_boosts_Dis", "heals_boosts", "kills_assists_Dis",
#     "kills_assists", "rideDistance", "swimDistance", "walkDistance", "totalDistance"
# ]
# train_test_df[temp_columns].head()

### 同じマッチの同じチームにおける特徴量作成
同じチームの合計キル数など

In [16]:
cols_to_drop = ["Id", "groupId", "matchId", "winPlacePerc", "matchType", "opponents", "playersJoined", "team", "missing_groups_percent", "maxPlace", "missingMembers","matchDuration", "numGroups", "teamMembers"]
features = [col for col in train_test_df.columns if col not in cols_to_drop]
def by_team(df): 
    # std
    agg0 = df.groupby(['matchId','groupId'])[features].std()
    agg0 = agg0.replace([np.inf, np.NINF,np.nan], 0)
    df.merge(agg0, suffixes=['', '_std'], how='left', on=['matchId', 'groupId'])
    # std_rank
    agg0 = agg0.groupby(['matchId'])[features].rank(pct=True)
    df = df.merge(agg0, suffixes=['', '_std_rank'], how='left', on=['matchId', 'groupId'])
    # mean
    agg1 = df.groupby(['matchId', 'groupId'])[features].mean()
    df = df.merge(agg1, suffixes=['', '_mean'], how='left', on=['matchId', 'groupId'])
    # mean_rank
    agg1 = agg1.groupby('matchId')[features].rank(pct=True)
    df = df.merge(agg1, suffixes=['', '_mean_rank'], how='left', on=['matchId', 'groupId'])
    # max
    agg2 = df.groupby(['matchId', 'groupId'])[features].max()
    df = df.merge(agg2, suffixes=['', '_max'], how='left', on=['matchId', 'groupId'])
    # max_rank
    agg2 = agg2.groupby("matchId")[features].rank(pct=True)
    df = df.merge(agg2, suffixes = ["", "_max_rank"], how="left", on=["matchId", 'groupId'])
    # min 
    agg3 = df.groupby(['matchId', 'groupId'])[features].min()
    df = df.merge(agg3, suffixes=['', '_min'], how='left', on=['matchId', 'groupId'])
    # min_rank
    agg3 = agg3.groupby("matchId")[features].rank(pct=True)
    df = df.merge(agg3, suffixes=['', '_min_rank'], how='left', on=['matchId', 'groupId'])
    # sum 
    agg4 = df.groupby(['matchId', 'groupId'])[features].sum()
    df = df.merge(agg4, suffixes=['', '_sum'], how='left', on=['matchId', 'groupId'])
    return df

In [17]:
train_test_df = by_team(train_test_df)

### 同じマッチにおける特徴量作成

In [18]:
def by_match(df):  
    # std
    agg0 = df.groupby('matchId')[features].std()
    agg0 = agg0.replace([np.inf, np.NINF,np.nan], 0)
    df.merge(agg0, suffixes=['', '_match_std'], how='left', on='matchId')
    # mean
    agg1 = df.groupby('matchId')[features].mean()
    df = df.merge(agg1, suffixes=['', '_match_mean'], how='left', on='matchId')
    # max
    agg2 = df.groupby('matchId')[features].max()
    df = df.merge(agg2, suffixes=['', '_match_max'], how='left', on='matchId')
    # min 
    agg3 = df.groupby("matchId")[features].min()
    df = df.merge(agg3, suffixes=["", "_match_min"], how="left", on="matchId")
    return df

In [19]:
# コンペ中は下記のように集合を活用してリストの更新をしていたため，columnsの順番が変わってしまい，モデルのスコアにバラツキが出てしまっていました.
# features = list(set(features) - set(["killPerc", "killPlacePerc", "heals_boostsPerc"]))

features.remove("killPerc")
features.remove("killPlacePerc")
features.remove("heals_boostsPerc")
train_test_df = by_match(train_test_df)

### 必要のない特徴量の削除

In [20]:
train_test_df = train_test_df.drop(["Id", "groupId", "matchId"], axis=1)

### 出来上がったデータフレームの確認

In [21]:
print("columns: ", len(train_test_df.columns))
print("isna: ", 1794 - train_test_df.isna().sum().sum())
train_test_df.describe()

columns:  455
isna:  0


Unnamed: 0,DBNOs,assists,boosts,damageDealt,headshotKills,heals,killPlace,killPoints,killStreaks,kills,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPlacePerc,winPoints,team,playersJoined,teamMembers,opponents,totalDistance,kills_assists,headshot_rate,killStreaks_rate,pointsSum,heals_boosts,kills_assists_per_heal_boost,damageDealt_per_heal_boost,killPerc,killPlacePerc,heals_boostsPerc,kills_Dis,boosts_Dis,DBNOs_Dis,weaponsAcquired_Dis,heals_boosts_Dis,kills_assists_Dis,DBNOs_std_rank,assists_std_rank,boosts_std_rank,damageDealt_std_rank,headshotKills_std_rank,heals_std_rank,killPlace_std_rank,killPoints_std_rank,killStreaks_std_rank,kills_std_rank,longestKill_std_rank,rankPoints_std_rank,revives_std_rank,rideDistance_std_rank,roadKills_std_rank,swimDistance_std_rank,teamKills_std_rank,vehicleDestroys_std_rank,walkDistance_std_rank,weaponsAcquired_std_rank,winPoints_std_rank,totalDistance_std_rank,kills_assists_std_rank,headshot_rate_std_rank,killStreaks_rate_std_rank,pointsSum_std_rank,heals_boosts_std_rank,kills_assists_per_heal_boost_std_rank,damageDealt_per_heal_boost_std_rank,killPerc_std_rank,killPlacePerc_std_rank,heals_boostsPerc_std_rank,kills_Dis_std_rank,boosts_Dis_std_rank,DBNOs_Dis_std_rank,weaponsAcquired_Dis_std_rank,heals_boosts_Dis_std_rank,kills_assists_Dis_std_rank,DBNOs_mean,assists_mean,boosts_mean,damageDealt_mean,headshotKills_mean,heals_mean,killPlace_mean,killPoints_mean,killStreaks_mean,kills_mean,longestKill_mean,rankPoints_mean,revives_mean,rideDistance_mean,roadKills_mean,swimDistance_mean,teamKills_mean,vehicleDestroys_mean,walkDistance_mean,weaponsAcquired_mean,winPoints_mean,totalDistance_mean,kills_assists_mean,headshot_rate_mean,killStreaks_rate_mean,pointsSum_mean,heals_boosts_mean,kills_assists_per_heal_boost_mean,damageDealt_per_heal_boost_mean,killPerc_mean,killPlacePerc_mean,heals_boostsPerc_mean,kills_Dis_mean,boosts_Dis_mean,DBNOs_Dis_mean,weaponsAcquired_Dis_mean,heals_boosts_Dis_mean,kills_assists_Dis_mean,DBNOs_mean_rank,assists_mean_rank,boosts_mean_rank,damageDealt_mean_rank,headshotKills_mean_rank,heals_mean_rank,killPlace_mean_rank,killPoints_mean_rank,killStreaks_mean_rank,kills_mean_rank,longestKill_mean_rank,rankPoints_mean_rank,revives_mean_rank,rideDistance_mean_rank,roadKills_mean_rank,swimDistance_mean_rank,teamKills_mean_rank,vehicleDestroys_mean_rank,walkDistance_mean_rank,weaponsAcquired_mean_rank,winPoints_mean_rank,totalDistance_mean_rank,kills_assists_mean_rank,headshot_rate_mean_rank,killStreaks_rate_mean_rank,pointsSum_mean_rank,heals_boosts_mean_rank,kills_assists_per_heal_boost_mean_rank,damageDealt_per_heal_boost_mean_rank,killPerc_mean_rank,killPlacePerc_mean_rank,heals_boostsPerc_mean_rank,kills_Dis_mean_rank,boosts_Dis_mean_rank,DBNOs_Dis_mean_rank,weaponsAcquired_Dis_mean_rank,heals_boosts_Dis_mean_rank,kills_assists_Dis_mean_rank,DBNOs_max,assists_max,boosts_max,damageDealt_max,headshotKills_max,heals_max,killPlace_max,killPoints_max,killStreaks_max,kills_max,longestKill_max,rankPoints_max,revives_max,rideDistance_max,roadKills_max,swimDistance_max,teamKills_max,vehicleDestroys_max,walkDistance_max,weaponsAcquired_max,winPoints_max,totalDistance_max,kills_assists_max,headshot_rate_max,killStreaks_rate_max,pointsSum_max,heals_boosts_max,kills_assists_per_heal_boost_max,damageDealt_per_heal_boost_max,killPerc_max,killPlacePerc_max,heals_boostsPerc_max,kills_Dis_max,boosts_Dis_max,DBNOs_Dis_max,weaponsAcquired_Dis_max,heals_boosts_Dis_max,kills_assists_Dis_max,DBNOs_max_rank,assists_max_rank,boosts_max_rank,damageDealt_max_rank,headshotKills_max_rank,heals_max_rank,killPlace_max_rank,killPoints_max_rank,killStreaks_max_rank,kills_max_rank,longestKill_max_rank,rankPoints_max_rank,revives_max_rank,rideDistance_max_rank,roadKills_max_rank,swimDistance_max_rank,teamKills_max_rank,vehicleDestroys_max_rank,walkDistance_max_rank,weaponsAcquired_max_rank,winPoints_max_rank,totalDistance_max_rank,kills_assists_max_rank,headshot_rate_max_rank,killStreaks_rate_max_rank,pointsSum_max_rank,heals_boosts_max_rank,kills_assists_per_heal_boost_max_rank,damageDealt_per_heal_boost_max_rank,killPerc_max_rank,killPlacePerc_max_rank,heals_boostsPerc_max_rank,kills_Dis_max_rank,boosts_Dis_max_rank,DBNOs_Dis_max_rank,weaponsAcquired_Dis_max_rank,heals_boosts_Dis_max_rank,kills_assists_Dis_max_rank,DBNOs_min,assists_min,boosts_min,damageDealt_min,headshotKills_min,heals_min,killPlace_min,killPoints_min,killStreaks_min,kills_min,longestKill_min,rankPoints_min,revives_min,rideDistance_min,roadKills_min,swimDistance_min,teamKills_min,vehicleDestroys_min,walkDistance_min,weaponsAcquired_min,winPoints_min,totalDistance_min,kills_assists_min,headshot_rate_min,killStreaks_rate_min,pointsSum_min,heals_boosts_min,kills_assists_per_heal_boost_min,damageDealt_per_heal_boost_min,killPerc_min,killPlacePerc_min,heals_boostsPerc_min,kills_Dis_min,boosts_Dis_min,DBNOs_Dis_min,weaponsAcquired_Dis_min,heals_boosts_Dis_min,kills_assists_Dis_min,DBNOs_min_rank,assists_min_rank,boosts_min_rank,damageDealt_min_rank,headshotKills_min_rank,heals_min_rank,killPlace_min_rank,killPoints_min_rank,killStreaks_min_rank,kills_min_rank,longestKill_min_rank,rankPoints_min_rank,revives_min_rank,rideDistance_min_rank,roadKills_min_rank,swimDistance_min_rank,teamKills_min_rank,vehicleDestroys_min_rank,walkDistance_min_rank,weaponsAcquired_min_rank,winPoints_min_rank,totalDistance_min_rank,kills_assists_min_rank,headshot_rate_min_rank,killStreaks_rate_min_rank,pointsSum_min_rank,heals_boosts_min_rank,kills_assists_per_heal_boost_min_rank,damageDealt_per_heal_boost_min_rank,killPerc_min_rank,killPlacePerc_min_rank,heals_boostsPerc_min_rank,kills_Dis_min_rank,boosts_Dis_min_rank,DBNOs_Dis_min_rank,weaponsAcquired_Dis_min_rank,heals_boosts_Dis_min_rank,kills_assists_Dis_min_rank,DBNOs_sum,assists_sum,boosts_sum,damageDealt_sum,headshotKills_sum,heals_sum,killPlace_sum,killPoints_sum,killStreaks_sum,kills_sum,longestKill_sum,rankPoints_sum,revives_sum,rideDistance_sum,roadKills_sum,swimDistance_sum,teamKills_sum,vehicleDestroys_sum,walkDistance_sum,weaponsAcquired_sum,winPoints_sum,totalDistance_sum,kills_assists_sum,headshot_rate_sum,killStreaks_rate_sum,pointsSum_sum,heals_boosts_sum,kills_assists_per_heal_boost_sum,damageDealt_per_heal_boost_sum,killPerc_sum,killPlacePerc_sum,heals_boostsPerc_sum,kills_Dis_sum,boosts_Dis_sum,DBNOs_Dis_sum,weaponsAcquired_Dis_sum,heals_boosts_Dis_sum,kills_assists_Dis_sum,DBNOs_match_mean,assists_match_mean,boosts_match_mean,damageDealt_match_mean,headshotKills_match_mean,heals_match_mean,killPlace_match_mean,killPoints_match_mean,killStreaks_match_mean,kills_match_mean,longestKill_match_mean,rankPoints_match_mean,revives_match_mean,rideDistance_match_mean,roadKills_match_mean,swimDistance_match_mean,teamKills_match_mean,vehicleDestroys_match_mean,walkDistance_match_mean,weaponsAcquired_match_mean,winPoints_match_mean,totalDistance_match_mean,kills_assists_match_mean,headshot_rate_match_mean,killStreaks_rate_match_mean,pointsSum_match_mean,heals_boosts_match_mean,kills_assists_per_heal_boost_match_mean,damageDealt_per_heal_boost_match_mean,kills_Dis_match_mean,boosts_Dis_match_mean,DBNOs_Dis_match_mean,weaponsAcquired_Dis_match_mean,heals_boosts_Dis_match_mean,kills_assists_Dis_match_mean,DBNOs_match_max,assists_match_max,boosts_match_max,damageDealt_match_max,headshotKills_match_max,heals_match_max,killPlace_match_max,killPoints_match_max,killStreaks_match_max,kills_match_max,longestKill_match_max,rankPoints_match_max,revives_match_max,rideDistance_match_max,roadKills_match_max,swimDistance_match_max,teamKills_match_max,vehicleDestroys_match_max,walkDistance_match_max,weaponsAcquired_match_max,winPoints_match_max,totalDistance_match_max,kills_assists_match_max,headshot_rate_match_max,killStreaks_rate_match_max,pointsSum_match_max,heals_boosts_match_max,kills_assists_per_heal_boost_match_max,damageDealt_per_heal_boost_match_max,kills_Dis_match_max,boosts_Dis_match_max,DBNOs_Dis_match_max,weaponsAcquired_Dis_match_max,heals_boosts_Dis_match_max,kills_assists_Dis_match_max,DBNOs_match_min,assists_match_min,boosts_match_min,damageDealt_match_min,headshotKills_match_min,heals_match_min,killPlace_match_min,killPoints_match_min,killStreaks_match_min,kills_match_min,longestKill_match_min,rankPoints_match_min,revives_match_min,rideDistance_match_min,roadKills_match_min,swimDistance_match_min,teamKills_match_min,vehicleDestroys_match_min,walkDistance_match_min,weaponsAcquired_match_min,winPoints_match_min,totalDistance_match_min,kills_assists_match_min,headshot_rate_match_min,killStreaks_rate_match_min,pointsSum_match_min,heals_boosts_match_min,kills_assists_per_heal_boost_match_min,damageDealt_per_heal_boost_match_min,kills_Dis_match_min,boosts_Dis_match_min,DBNOs_Dis_match_min,weaponsAcquired_Dis_match_min,heals_boosts_Dis_match_min,kills_assists_Dis_match_min
count,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,7134.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0,8928.0
mean,5e-06,0.236335,0.000703,0.000914,0.002343,0.000869,1.272324,539.477935,0.005902,6e-06,22.495015,1571.843414,1.052289,43.983759,838.469758,0.005211,0.342786,0.000122,0.002988,0.010103,0.008849,0.727845,0.002299,0.447767,658.293011,2.817428,94.874104,2.644489,92.229615,1.073619,8e-06,0.098233,0.327765,2036.240703,0.001573,0.394853,50.181873,0.50532,0.50532,0.50532,2.1e-05,0.000704,0.001872,0.773972,0.001763,0.000152,0.527737,0.521924,0.519477,0.534731,0.524142,0.520009,0.55244,0.533628,0.528956,0.527885,0.530879,0.538116,0.520143,0.519135,0.514429,0.517307,0.518657,0.515213,0.536484,0.536566,0.535402,0.536962,0.530535,0.527733,0.53506,0.557336,0.521732,0.541884,0.550775,0.533216,0.55244,0.529003,0.546531,0.529911,0.550126,0.562225,0.533947,0.547836,5e-06,0.236335,0.000703,0.000914,0.002343,0.000869,1.272324,539.477935,0.005902,6e-06,22.495015,838.469758,0.005211,0.342786,0.000122,0.002988,0.010103,0.008849,0.727845,0.002299,658.293011,1.073619,8e-06,0.098233,0.327765,2036.240703,0.001573,0.394853,50.181873,0.50532,0.50532,0.50532,2.1e-05,0.000704,0.001872,0.773972,0.001763,0.000152,0.504432,0.518874,0.495628,0.496939,0.517016,0.500406,0.539941,0.513611,0.505418,0.505456,0.510219,0.512518,0.51803,0.506374,0.514422,0.514522,0.517643,0.515022,0.489036,0.489963,0.512084,0.48987,0.506679,0.519077,0.506316,0.511802,0.49582,0.517482,0.516061,0.500766,0.53995,0.492492,0.528771,0.505726,0.531775,0.536357,0.50741,0.52953,9e-06,0.511761,0.001076,0.001542,0.005169,0.001525,1.563598,581.514337,0.010351,1.1e-05,42.265957,854.244064,0.01372,0.444666,0.000487,0.005062,0.026034,0.025874,0.851397,0.003002,668.752464,1.250054,1.3e-05,0.232897,0.58675,2100.294579,0.00247,0.766133,93.992704,0.645363,0.604595,0.609052,4.9e-05,0.001433,0.005957,1.879873,0.00363,0.000433,0.531308,0.524597,0.50484,0.518137,0.526945,0.509194,0.549734,0.525929,0.536197,0.525785,0.519663,0.530498,0.5221,0.508769,0.514429,0.514712,0.517783,0.515076,0.496275,0.509433,0.524862,0.496757,0.525906,0.525871,0.529104,0.541882,0.505848,0.534541,0.540212,0.517996,0.549734,0.505848,0.537153,0.517408,0.540476,0.547023,0.521493,0.538365,1e-06,0.05578,0.000395,0.000434,0.000687,0.000384,0.880779,501.015009,0.002587,3e-06,8.675071,820.490591,0.000546,0.252619,6.881187e-09,0.001809,0.002464,0.000672,0.600368,0.001676,646.376568,0.894151,4e-06,0.025765,0.133067,1973.416779,0.000857,0.152768,20.014601,0.393357,0.375544,0.41458,7e-06,0.000296,0.000158,0.241152,0.000701,1e-05,0.494673,0.510931,0.491292,0.479942,0.508764,0.494042,0.517834,0.502502,0.496408,0.495402,0.495378,0.498607,0.512014,0.50148,0.514257,0.510987,0.513352,0.51408,0.480433,0.475646,0.501407,0.481399,0.494083,0.508702,0.494441,0.487173,0.488193,0.492247,0.479569,0.494562,0.517834,0.488193,0.494193,0.490204,0.493194,0.492929,0.486713,0.492231,1.5e-05,0.738575,0.001865,0.002648,0.006447,0.002427,4.913238,1581.795475,0.016814,1.8e-05,61.188474,2695.197805,0.017999,0.958331,0.000487,0.007429,0.028078,0.027442,2.015775,0.006585,1916.574709,2.981535,2.3e-05,0.281059,0.952225,6193.567988,0.004292,1.185951,153.921605,1.510949,1.692277,1.465292,6.4e-05,0.001986,0.007856,8.011621,0.005069,0.000457,5e-06,0.236335,0.000703,0.000914,0.002343,0.000869,1.272324,539.477935,0.005902,6e-06,22.495015,838.469758,0.005211,0.342786,0.000122,0.002988,0.010103,0.008849,0.727845,0.002299,658.293011,1.073619,8e-06,0.098233,0.327765,2036.240703,0.001573,0.394853,50.181873,2.1e-05,0.000704,0.001872,0.773972,0.001763,0.000152,3.5e-05,2.90401,0.004878,0.006206,0.033148,0.008673,2.517448,746.34599,0.029793,5.6e-05,261.982711,995.947021,0.067552,2.783187,0.01169,0.09685,0.457531,0.429772,2.759856,0.007051,735.723566,4.265876,6.5e-05,0.991053,0.993294,2451.189628,0.011377,3.205304,338.552456,0.000495,0.010982,0.052834,13.499988,0.028437,0.012445,0.0,0.0,0.0,0.0,0.0,0.0,0.026695,396.921259,0.0,0.0,0.0,735.439628,0.0,0.0,0.0,0.0,0.0,0.0,0.001239,8e-06,558.485999,0.002383,0.0,0.0,0.0,1718.349574,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,0.0,0.0
std,8e-06,0.594336,0.001088,0.001211,0.006188,0.00168,0.906398,625.611659,0.007744,1.1e-05,50.282603,259.511623,0.32743,22.974079,743.269932,0.015998,0.864675,0.011504,0.019211,0.075808,0.098323,0.746371,0.001484,0.28962,748.394274,1.193801,5.39692,1.11079,5.647265,1.2545,1.3e-05,0.252244,0.419262,642.714295,0.002436,0.637399,64.685681,0.258356,0.288676,0.26809,8.6e-05,0.001727,0.014033,18.49832,0.004893,0.011507,0.245064,0.220842,0.247129,0.250283,0.218623,0.248253,0.250746,0.174339,0.242123,0.247008,0.251845,0.188641,0.215005,0.212268,0.046778,0.142353,0.12031,0.075168,0.254024,0.253446,0.175361,0.254741,0.247481,0.226176,0.247019,0.254577,0.253525,0.254978,0.255231,0.246837,0.250697,0.255062,0.261167,0.260353,0.260746,0.26145,0.262083,0.261666,6e-06,0.448022,0.000964,0.000964,0.004664,0.001397,0.799843,621.76787,0.005733,9e-06,39.551685,742.507112,0.010047,0.829147,0.005751,0.016768,0.061646,0.064085,0.718737,0.001262,747.72261,1.216653,1.1e-05,0.179052,0.303666,635.799143,0.002127,0.465509,44.651639,0.203055,0.251942,0.23559,7e-05,0.001221,0.00618,14.527088,0.003396,0.006643,0.251279,0.22998,0.272184,0.280837,0.232791,0.26938,0.289832,0.187804,0.267754,0.270348,0.273035,0.210203,0.218444,0.221977,0.047643,0.149015,0.122137,0.075833,0.294902,0.292364,0.190647,0.2953,0.273113,0.235483,0.26902,0.282441,0.277645,0.275053,0.279672,0.268389,0.289828,0.277055,0.281601,0.275598,0.264477,0.292529,0.278777,0.28268,1e-05,0.829119,0.001296,0.001713,0.00859,0.002221,0.909428,672.885475,0.009095,1.3e-05,68.945767,757.860432,0.026527,0.989649,0.023003,0.026304,0.106621,0.166347,0.806312,0.001628,758.973798,1.359749,1.6e-05,0.362831,0.445327,679.85403,0.003012,0.856624,85.732786,0.262727,0.253216,0.271498,0.000126,0.002722,0.034662,35.324338,0.007779,0.019927,0.251001,0.231092,0.273486,0.280641,0.242333,0.271613,0.290725,0.188633,0.271241,0.273446,0.276116,0.210481,0.219213,0.224427,0.047695,0.149448,0.122546,0.076101,0.293565,0.287903,0.187827,0.294175,0.274588,0.243009,0.266041,0.281481,0.279283,0.279794,0.28643,0.270173,0.290725,0.279283,0.28623,0.283322,0.268026,0.293653,0.286777,0.287141,4e-06,0.260163,0.000808,0.000793,0.003391,0.001001,0.730639,580.00148,0.005095,7e-06,27.256099,727.961393,0.004122,0.735125,4.155089e-07,0.014137,0.050514,0.025917,0.679895,0.001196,737.126301,1.143366,9e-06,0.129139,0.286981,612.18652,0.001684,0.36002,37.305496,0.200794,0.282177,0.235081,6.1e-05,0.000882,0.000987,9.284252,0.002094,9.3e-05,0.163927,0.1075,0.227747,0.262323,0.107044,0.213569,0.286988,0.191024,0.203017,0.203673,0.203668,0.213753,0.068395,0.188878,0.018002,0.097027,0.028275,0.017333,0.295609,0.285036,0.195368,0.296307,0.226528,0.106953,0.202204,0.286369,0.2426,0.224579,0.261474,0.201922,0.286988,0.2426,0.202386,0.227152,0.160886,0.29538,0.241075,0.224871,1.9e-05,1.411353,0.002628,0.003438,0.011623,0.003958,6.086171,2397.383282,0.017057,2.3e-05,107.700371,3410.927309,0.036744,2.379036,0.023003,0.040225,0.115303,0.180944,2.282613,0.004715,2866.254401,3.76197,3.1e-05,0.475406,0.9312,4510.995578,0.006086,1.494273,159.510472,1.018,1.769625,0.976166,0.000153,0.003288,0.036373,177.945491,0.00908,0.019927,2e-06,0.098217,0.000139,0.000192,0.000609,0.000284,0.469712,613.585435,0.000651,1e-06,6.834044,740.805646,0.003582,0.392393,0.001168,0.003209,0.008883,0.01574,0.11429,0.000242,746.408708,0.434799,2e-06,0.02368,0.043271,620.49342,0.000375,0.08,7.073098,1.2e-05,0.000193,0.001849,12.827508,0.000581,0.001173,2e-05,1.198031,0.001259,0.001972,0.014451,0.002961,0.928535,850.330782,0.011269,1.7e-05,93.276854,905.337589,0.056224,1.907904,0.112111,0.09466,0.355177,0.573095,0.597492,0.002731,835.005565,1.63212,2e-05,0.043378,0.043416,807.153647,0.003031,1.075873,116.685861,0.0005,0.006456,0.117523,47.045085,0.027861,0.111471,0.0,0.0,0.0,0.0,0.0,0.0,0.010224,468.952334,0.0,0.0,0.0,659.726888,0.0,0.0,0.0,0.0,0.0,0.0,0.003299,7.3e-05,660.303988,0.011703,0.0,0.0,0.0,582.620219,0.0,0.0,0.0,0.0,0.0,0.0,6.8e-05,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,905.0,1.0,2.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,17.0,1.0,13.0,0.0,0.0,0.0,0.0,718.0,0.0,0.0,0.0,0.236842,0.01,0.171875,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.1875,0.09375,0.037037,0.21875,0.0625,0.020833,0.021277,0.111111,0.089286,0.089286,0.035714,0.217391,0.0625,0.340426,0.321429,0.384615,0.43617,0.020833,0.037037,0.042553,0.020833,0.089286,0.21875,0.089286,0.031915,0.0625,0.089286,0.037037,0.089286,0.020833,0.0625,0.089286,0.0625,0.038462,0.020833,0.0625,0.089286,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1135.333333,0.0,0.0,0.0,0.236842,0.010204,0.171875,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.1875,0.057692,0.021277,0.21875,0.055556,0.010417,0.010526,0.0625,0.0625,0.0625,0.010417,0.195652,0.0625,0.340426,0.285714,0.384615,0.425532,0.010526,0.016304,0.010526,0.010526,0.0625,0.21875,0.0625,0.010417,0.055556,0.0625,0.021277,0.023256,0.010417,0.023256,0.0625,0.057692,0.038462,0.016304,0.055556,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1204.0,0.0,0.0,0.0,0.236842,0.010204,0.171875,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.1875,0.057692,0.021277,0.21875,0.055556,0.010417,0.010526,0.0625,0.0625,0.0625,0.010417,0.195652,0.0625,0.340426,0.285714,0.384615,0.425532,0.010526,0.016304,0.010526,0.010526,0.0625,0.21875,0.0625,0.010417,0.055556,0.0625,0.021277,0.0625,0.010417,0.055556,0.0625,0.057692,0.038462,0.016304,0.055556,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,718.0,0.0,0.0,0.0,0.236842,0.01,0.171875,0.0,0.0,0.0,0.0,0.0,0.0,0.339286,0.425532,0.26087,0.132979,0.397849,0.244444,0.010417,0.010526,0.241935,0.241935,0.241935,0.010417,0.467742,0.125,0.489362,0.346154,0.488372,0.489362,0.010526,0.020833,0.010526,0.010526,0.231183,0.397849,0.241935,0.010417,0.217391,0.231183,0.132979,0.241935,0.010417,0.217391,0.241935,0.26087,0.339286,0.020833,0.217391,0.231183,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1264.0,0.0,0.0,0.0,0.247368,0.010204,0.190722,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000623,0.0,0.0,0.505102,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001139,0.00164,0.0,0.001139,0.0,0.0,0.0,1428.591837,0.0,0.0,30.344398,0.0,0.0,0.0,0.001115,0.0,0.0,0.0,0.0,0.0,0.003314,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019358,0.004167,0.0,0.019358,0.0,0.0,0.0,1500.0,0.0,0.0,193.7,0.0,0.0,0.0,0.005012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,718.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.532979,0.0,0.0,0.0,0.0,1366.0,1.0,27.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.100564,0.001379,0.191378,0.0,2.0,93.0,2.0,91.0,0.102303,0.0,0.0,0.0,1492.0,0.0,0.0,0.0,0.285714,0.255102,0.256984,0.0,0.0,0.0,0.001413,0.0,0.0,0.290865,0.351064,0.291667,0.36,0.357143,0.283333,0.372093,0.505319,0.308511,0.27907,0.267857,0.5,0.363636,0.392857,0.505376,0.457447,0.480769,0.5,0.357143,0.354839,0.505319,0.354839,0.290698,0.351852,0.279149,0.385417,0.26087,0.285714,0.375,0.27907,0.375,0.267442,0.266667,0.255319,0.333333,0.382979,0.260467,0.296296,0.0,0.0,0.0,0.000286,0.0,0.0,0.653061,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111563,0.001376,0.0,0.115478,0.0,0.0,0.0,1486.25,0.0,0.0,19.705914,0.31,0.309086,0.28866,0.0,0.0,0.0,0.001759,0.0,0.0,0.275862,0.339286,0.234043,0.25,0.34375,0.258621,0.294737,0.505249,0.244186,0.244792,0.25,0.468584,0.35,0.361702,0.505319,0.457447,0.48,0.5,0.229167,0.234043,0.505208,0.229167,0.239583,0.34375,0.244792,0.271739,0.229167,0.244681,0.269231,0.244792,0.294737,0.230769,0.244792,0.234043,0.297478,0.282609,0.234043,0.244681,0.0,0.0,0.0,0.000578,0.0,0.0,0.831131,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.165634,0.001952,0.0,0.167732,0.0,0.0,0.0,1500.0,0.0,0.0,33.32,0.31,0.445652,0.292553,0.0,0.0,0.0,0.002244,0.0,0.0,0.357143,0.339286,0.234043,0.275862,0.34375,0.26,0.302326,0.505319,0.25,0.25,0.25,0.505263,0.35,0.362069,0.505319,0.457447,0.48,0.5,0.23913,0.258065,0.505319,0.234043,0.25,0.34375,0.25,0.310345,0.234043,0.247312,0.291667,0.25,0.302326,0.234043,0.244792,0.234043,0.310345,0.295455,0.234043,0.247312,0.0,0.0,0.0,0.0,0.0,0.0,0.326316,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.049677,0.000723,0.0,0.050595,0.0,0.0,0.0,1467.0,0.0,0.0,0.0,0.280612,0.136364,0.247368,0.0,0.0,0.0,0.00084,0.0,0.0,0.407407,0.473958,0.333333,0.258621,0.478261,0.357143,0.275862,0.505208,0.375,0.375,0.375,0.428571,0.489583,0.375,0.510638,0.479167,0.505435,0.510417,0.214286,0.214286,0.505208,0.214286,0.336957,0.478261,0.375,0.23913,0.307692,0.336957,0.258621,0.375,0.275862,0.307692,0.375,0.333333,0.407407,0.229167,0.307692,0.336957,0.0,0.0,0.0,0.000711,0.0,0.0,1.125,0.0,0.0,0.0,0.0,-2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320946,0.002947,0.0,0.326752,0.0,0.0,0.0,3000.0,0.0,0.0,43.0,0.694737,0.636876,0.742063,0.0,0.0,0.0,0.004122,0.0,0.0,4e-06,0.191919,0.000605,0.000783,0.002007,0.000645,0.98913,0.0,0.005508,5e-06,18.45937,-1.0,0.002763,0.05991,0.0,0.000715,0.002632,0.0,0.64674,0.002144,0.0,0.76858,7e-06,0.081311,0.298816,1484.363636,0.001308,0.333611,44.865557,1.2e-05,0.000561,0.000859,0.007741,0.001386,1.5e-05,2.7e-05,2.0,0.004209,0.004708,0.021277,0.006529,1.956522,0.0,0.021277,4.3e-05,203.1,-1.0,0.041658,1.141379,0.0,0.033819,0.25,0.0,2.461087,0.005759,0.0,2.899631,5.2e-05,0.990099,0.995025,1658.0,0.009319,2.5,261.9,0.000203,0.006228,0.014769,0.079001,0.016587,0.000233,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1302.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.000565,0.0,0.0,1.074074,0.0,0.0,0.0,0.0,1423.0,1.032609,42.0,1426.0,0.0,0.0,0.0,0.0,0.0,0.0,0.418263,0.002157,0.434249,0.0,2.0,96.0,2.0,94.0,0.499894,0.0,0.0,0.0,1540.0,0.0,0.0,29.4625,0.316327,0.505263,0.426136,0.0,0.0,0.0,0.003542,0.0,0.0,0.505376,0.428312,0.505208,0.5,0.42,0.505263,0.5,0.517241,0.505263,0.505263,0.505263,0.511111,0.440476,0.465909,0.511364,0.482143,0.5,0.505435,0.5,0.505208,0.517241,0.5,0.505263,0.42,0.505435,0.510638,0.505319,0.505376,0.505263,0.505376,0.5,0.505376,0.505435,0.505376,0.505435,0.505263,0.505376,0.505435,3e-06,0.0,0.000276,0.000649,0.0,0.000272,1.12069,0.0,0.005319,4e-06,5.48,1434.2,0.0,0.0,0.0,0.0,0.0,0.0,0.437607,0.002201,0.0,0.552241,5e-06,0.0,0.330033,1537.0,0.000698,0.2625,42.046,0.479592,0.5,0.448454,5e-06,0.000334,0.000306,0.004717,0.000894,7e-06,0.505319,0.4375,0.448276,0.483871,0.397849,0.435484,0.543478,0.517241,0.5,0.489362,0.5,0.511111,0.428571,0.444444,0.511364,0.478723,0.5,0.505435,0.481481,0.484043,0.517241,0.480741,0.5,0.397849,0.5,0.510638,0.464286,0.517241,0.518519,0.488372,0.543478,0.458333,0.534884,0.468085,0.505376,0.545455,0.5,0.535714,7e-06,0.0,0.000703,0.001174,0.0,0.000692,1.533333,0.0,0.010638,8e-06,13.62,1470.0,0.0,0.0,0.0,0.0,0.0,0.0,0.559448,0.002846,0.0,0.71291,8e-06,0.0,0.990099,1570.5,0.001441,0.5,76.121667,0.6875,0.623656,0.658163,9e-06,0.00057,0.000618,0.006681,0.001498,1.1e-05,0.505435,0.462366,0.47,0.518519,0.397849,0.47,0.56,0.517241,0.568966,0.530612,0.517241,0.511628,0.428571,0.446809,0.511364,0.478723,0.5,0.505435,0.488372,0.51,0.517241,0.48913,0.534483,0.397849,0.593023,0.541667,0.488867,0.570714,0.555556,0.517857,0.56,0.488867,0.541667,0.489362,0.505376,0.5625,0.517241,0.548387,0.0,0.0,0.0,0.0,0.0,0.0,0.690722,0.0,0.0,0.0,0.0,1369.0,0.0,0.0,0.0,0.0,0.0,0.0,0.275061,0.001524,0.0,0.301118,0.0,0.0,0.0,1521.0,0.0,0.0,0.0,0.294737,0.319588,0.28125,0.0,0.0,0.0,0.001899,0.0,0.0,0.434783,0.488372,0.375,0.357143,0.489583,0.396552,0.518519,0.516667,0.413793,0.413793,0.414894,0.511111,0.505208,0.47,0.511905,0.5,0.511364,0.511905,0.464286,0.468421,0.516667,0.465116,0.396552,0.489583,0.413793,0.481481,0.346154,0.396552,0.357143,0.413793,0.518519,0.346154,0.413793,0.375,0.434783,0.489362,0.346154,0.396552,8e-06,0.0,0.000723,0.001811,0.0,0.000718,2.82,0.0,0.01087,1.1e-05,16.9255,1511.0,0.0,0.0,0.0,0.0,0.0,0.0,1.11936,0.005658,0.0,1.334378,1.3e-05,0.0,0.990099,5335.0,0.001625,0.830303,105.7,1.268817,1.153846,1.25,1.3e-05,0.000837,0.000846,0.011447,0.00223,1.7e-05,5e-06,0.25,0.000706,0.000897,0.002313,0.000861,1.010638,0.0,0.005931,7e-06,21.31105,1458.294737,0.004754,0.13234,0.0,0.001861,0.008065,0.0,0.74114,0.002269,0.0,0.945853,8e-06,0.098799,0.328236,1523.556701,0.001522,0.397894,50.200018,1.8e-05,0.000712,0.001647,0.01119,0.001706,2.3e-05,3.3e-05,3.0,0.00478,0.005819,0.031579,0.00853,2.0,0.0,0.03125,5.4e-05,243.1,1578.0,0.065203,2.138649,0.0,0.068151,0.5,0.0,2.65851,0.006589,0.0,3.810854,6e-05,0.995025,0.995025,1968.0,0.010616,3.0,300.4,0.000371,0.00994,0.035137,0.131978,0.02248,0.000418,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,1055.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1435.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,8e-06,0.0,0.001098,0.001288,0.0,0.001025,1.84,1176.25,0.010753,8e-06,20.6925,1839.0,1.06383,47.0,1500.0,0.0,7e-06,0.0,0.0,0.0,0.0,1.243738,0.003086,0.702232,1500.0,4.0,98.0,4.0,95.0,1.810363,1.2e-05,0.0,0.664452,2669.25,0.00224,0.6,74.567857,0.704542,0.755102,0.753266,1.1e-05,0.000804,0.000627,0.011307,0.00191,1.5e-05,0.697917,0.7,0.72,0.724138,0.714286,0.730769,0.758621,0.520833,0.739583,0.722222,0.730769,0.538462,0.678571,0.607143,0.517857,0.505319,0.505435,0.517241,0.740741,0.73913,0.520833,0.740741,0.729167,0.741379,0.740741,0.767442,0.723588,0.75,0.758621,0.739583,0.758621,0.740741,0.767442,0.75,0.769231,0.785714,0.75,0.769231,6e-06,0.333333,0.001096,0.001198,0.00314,0.001194,1.811563,1199.125,0.010526,8e-06,25.775,1496.5625,0.008925,0.179141,0.0,0.0,0.0,0.0,1.248601,0.003125,1497.818182,1.781993,1.1e-05,0.165017,0.497512,2691.333333,0.002393,0.583333,69.325,0.644558,0.695055,0.709184,1.8e-05,0.00096,0.001529,0.013414,0.002251,2.4e-05,0.696429,0.724138,0.740741,0.73913,0.75,0.741379,0.791667,0.520833,0.75,0.730769,0.75,0.542553,0.68,0.62069,0.517857,0.5,0.505263,0.517241,0.744681,0.75,0.520833,0.747368,0.73913,0.75,0.740741,0.75,0.741379,0.758621,0.75,0.71875,0.791667,0.738636,0.785714,0.75,0.75,0.791667,0.75,0.785714,1.5e-05,1.0,0.001604,0.001995,0.010638,0.002196,2.178571,1286.0,0.012048,1.7e-05,51.45,1504.0,0.02325,0.354478,0.0,0.0,0.0,0.0,1.43004,0.003946,1511.25,2.0477,1.8e-05,0.497512,0.990099,2792.0,0.003771,1.0,132.3,0.859375,0.806452,0.857143,3.9e-05,0.001628,0.003676,0.020774,0.004187,4.9e-05,0.714286,0.74,0.75,0.755556,0.792683,0.759259,0.807692,0.521739,0.758621,0.755319,0.76087,0.595745,0.724138,0.638298,0.517857,0.5,0.505263,0.517241,0.75,0.758621,0.520833,0.75,0.767857,0.777778,0.729167,0.785714,0.755319,0.785714,0.790698,0.744681,0.807692,0.755319,0.793103,0.777778,0.766667,0.807692,0.785138,0.795455,0.0,0.0,0.000542,0.000583,0.0,0.0,1.285714,1078.0,0.0,0.0,0.0,1487.0,0.0,0.0,0.0,0.0,0.0,0.0,1.048195,0.00225,1481.0,1.503551,6e-06,0.0,0.0,2570.0,0.001058,0.142857,26.225,0.326087,0.569919,0.613402,0.0,0.000241,0.0,0.00595,0.000675,3e-06,0.505208,0.5,0.714286,0.714286,0.517857,0.482759,0.758621,0.52,0.480769,0.480769,0.480769,0.53125,0.517857,0.5,0.518519,0.516129,0.517857,0.518519,0.74,0.708333,0.52,0.740741,0.673913,0.517857,0.480769,0.728261,0.72,0.68,0.714286,0.480769,0.758621,0.72,0.480769,0.702128,0.505208,0.75,0.714286,0.68,2.3e-05,1.0,0.0028,0.003517,0.010638,0.003282,6.392857,2510.25,0.022727,2.4e-05,69.31,4473.0,0.02325,0.482846,0.0,0.0,0.0,0.0,2.987768,0.009043,3026.0,4.5016,3.1e-05,0.497512,1.487611,7497.0,0.00597,1.776923,210.3075,2.022222,2.043011,1.979694,5.2e-05,0.002614,0.004775,0.037965,0.006798,7e-05,6e-06,0.309278,0.000772,0.001021,0.002685,0.001079,1.714286,1223.142857,0.006294,7e-06,26.568731,1500.0,0.008149,0.578348,0.0,0.004339,0.015306,0.010753,0.80707,0.00244,1504.44898,1.325233,9e-06,0.113163,0.350101,2724.917526,0.001848,0.452172,54.785685,2.6e-05,0.00083,0.002603,0.015351,0.002129,3.7e-05,4.9e-05,4.0,0.005328,0.007309,0.042553,0.010653,3.392857,1675.0,0.033333,6.7e-05,309.7,1723.0,0.103413,4.187852,0.0,0.133685,0.5,1.0,2.855111,0.007885,1670.0,5.347471,7.6e-05,0.995025,0.996678,3291.0,0.013006,4.0,382.2,0.000559,0.014539,0.065104,0.264784,0.029581,0.000776,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,977.0,0.0,0.0,0.0,1369.0,0.0,0.0,0.0,0.0,0.0,0.0,5.3e-05,0.0,1369.0,0.000157,0.0,0.0,0.0,2423.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.1e-05,6.0,0.008,0.024545,0.09375,0.019231,4.375,1937.0,0.089888,0.000118,537.9,1993.0,8.5,96.0,3076.0,0.428418,8.289503,1.086957,0.57092,2.0,2.0,6.24301,0.027977,1.0,1844.0,4.0,100.0,4.0,99.0,8.736819,0.000126,0.998573,0.998004,3676.0,0.019765,7.0,804.7,1.0,1.0,1.0,0.003842,0.044307,1.145082,800.0,0.224618,1.086957,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.2e-05,3.666667,0.007571,0.009466,0.074468,0.013661,3.90625,1836.0,0.057471,9e-05,404.55,2357.75,0.079968,7.869855,0.271739,0.307321,2.0,1.5,4.524384,0.014964,1844.0,8.376502,0.000118,0.998573,0.998004,3557.5,0.017206,6.0,625.3,1.0,1.0,1.0,0.003842,0.022153,0.163583,800.0,0.112309,0.362319,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,9.1e-05,6.0,0.008,0.024545,0.09375,0.019231,4.375,1937.0,0.089888,0.000118,537.9,3076.0,0.428418,8.289503,1.086957,0.57092,2.0,2.0,6.24301,0.027977,1844.0,8.736819,0.000126,0.998573,0.998004,3676.0,0.019765,7.0,804.7,1.0,1.0,1.0,0.003842,0.044307,1.145082,800.0,0.224618,1.086957,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.2e-05,3.0,0.007571,0.009466,0.074468,0.013661,3.8125,1836.0,0.057471,8.5e-05,398.6,2093.0,0.071403,7.513904,3.745748e-05,0.287719,2.0,1.0,4.524384,0.01183,1844.0,8.016185,0.000114,0.998573,0.998004,3532.0,0.017206,6.0,625.3,1.0,1.0,1.0,0.003842,0.018716,0.03199,800.0,0.077531,0.006146,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.00015,14.0,0.018077,0.053097,0.114583,0.035287,45.576923,14875.0,0.11236,0.000203,995.6,24000.0,0.46412,20.897043,1.086957,0.832788,2.0,3.0,13.676932,0.029928,19122.0,26.555213,0.000312,2.980149,4.955421,33595.0,0.044963,12.6,1248.0,8.470588,12.387097,8.470588,0.003842,0.044307,1.145082,4200.08569,0.224618,1.086957,9e-06,0.5,0.001276,0.003669,0.006242,0.001527,2.253906,1375.505155,0.007796,9e-06,54.285312,1553.621053,0.015615,2.209878,0.011322,0.01575,0.036082,0.103093,0.995606,0.003004,1526.381443,3.096836,1.3e-05,0.167747,0.464548,2900.680412,0.002711,0.677103,89.194118,6.1e-05,0.001185,0.015519,294.122688,0.004429,0.01146,9.1e-05,6.0,0.008,0.024545,0.09375,0.019231,4.375,1937.0,0.089888,0.000118,537.9,3076.0,0.428418,8.289503,1.086957,0.57092,2.0,2.0,6.24301,0.027977,1844.0,8.736819,0.000126,0.998573,0.998004,3676.0,0.019765,7.0,804.7,0.003842,0.044307,1.145082,800.0,0.224618,1.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,1009.0,0.0,0.0,0.0,1500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021614,0.000708,1432.0,0.131042,0.0,0.0,0.0,2501.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000655,0.0,0.0


## 2. データやモデルの準備
データの分割，標準化など

In [22]:
# データの分割
train_df = train_test_df[train_test_df["winPlacePerc"].notnull()]
test_df = train_test_df[train_test_df["winPlacePerc"].isnull()]

X_train = train_df.copy()
y_train = X_train["winPlacePerc"]
X_train = X_train.drop("winPlacePerc", axis=1)
X_test = test_df.copy()
X_test = X_test.drop("winPlacePerc", axis=1)
X_train1, X_valid, y_train1, y_valid = train_test_split(X_train, y_train, random_state=42)

In [23]:
# 何もしてない状態でどの程度のスコアになっているか確認
rfr = RandomForestRegressor(n_estimators=500, n_jobs=-1,random_state=42)
rfr.fit(X_train1, y_train1)
print("MAE: ", MAE(rfr.predict(X_valid), y_valid))

MAE:  0.02391583694869215


### gbm, rfr, svr用のデータを用意する

In [24]:
X_train_gbm = X_train.copy()
X_test_gbm = X_test.copy()
X_train1_gbm = X_train1.copy()
X_valid_gbm = X_valid.copy()

X_train_rfr = X_train.copy()
X_test_rfr = X_test.copy()
X_train1_rfr = X_train1.copy()
X_valid_rfr = X_valid.copy()

# スケールが違いすぎると影響を受けてしまうので標準化
SS = StandardScaler().fit(pd.concat([X_train, X_test]))
X_train_svr = pd.DataFrame(SS.transform(X_train))
X_test_svr = pd.DataFrame(SS.transform(X_test))
X_train1_svr = pd.DataFrame(SS.transform(X_train1))
X_valid_svr = pd.DataFrame(SS.transform(X_valid))

### 特徴量の数をscikit-learnのRFEを使って減らす
特徴量の数を50個,20個,5個まで減らしたデータセットを作成する．(5時間以上かかります)<br>
同じ流れを追いたい人以外は，一つ下のセルは飛ばす事を推奨します(実行してもこれ以降のセルで結果が上書きされます)．<br>

In [26]:
# 時間がかなりかかります．
# 流れを追いたい人のみ実行することを推奨します(実行してもこれ以降のセルで結果が上書きされます)．
# gbm 
perm_gbm = PermutationImportance(lgb.LGBMRegressor(n_estimators=80, random_state=42,n_jobs=-1),random_state=42,cv=None,scoring="neg_mean_absolute_error",n_iter=5)
select_gbm = RFE(perm_gbm, n_features_to_select=200,verbose=1,step=3).fit(X_train_gbm, y_train)
temp_gbm_200 = select_gbm.get_support()
X_train_gbm_200 = X_train_gbm.loc[:, temp_gbm_200]
X_test_gbm_200 = X_test_gbm.loc[:, temp_gbm_200]
X_train1_gbm_200 = X_train1_gbm.loc[:, temp_gbm_200]
X_valid_gbm_200 = X_valid_gbm.loc[:, temp_gbm_200]
select_gbm = RFE(perm_gbm, n_features_to_select=50,verbose=1,step=1).fit(X_train_gbm_200, y_train)
temp_gbm_50 = select_gbm.get_support()
X_train_gbm_50 = X_train_gbm_200.loc[:, temp_gbm_50]
X_test_gbm_50 = X_test_gbm_200.loc[:, temp_gbm_50]
X_train1_gbm_50 = X_train1_gbm_200.loc[:, temp_gbm_50]
X_valid_gbm_50 = X_valid_gbm_200.loc[:, temp_gbm_50]
select_gbm = RFE(perm_gbm, n_features_to_select=20,verbose=1,step=1).fit(X_train_gbm_50, y_train)
temp_gbm_20 = select_gbm.get_support()
X_train_gbm_20 = X_train_gbm_50.loc[:, temp_gbm_20]
X_test_gbm_20 = X_test_gbm_50.loc[:, temp_gbm_20]
X_train1_gbm_20 = X_train1_gbm_50.loc[:, temp_gbm_20]
X_valid_gbm_20 = X_valid_gbm_50.loc[:, temp_gbm_20]
select_gbm = RFE(perm_gbm, n_features_to_select=5,verbose=1,step=1).fit(X_train_gbm_20, y_train)
temp_gbm_5 = select_gbm.get_support()
X_train_gbm_5 = X_train_gbm_20.loc[:, temp_gbm_5]
X_test_gbm_5 = X_test_gbm_20.loc[:, temp_gbm_5]
X_train1_gbm_5 = X_train1_gbm_20.loc[:, temp_gbm_5]
X_valid_gbm_5 = X_valid_gbm_20.loc[:, temp_gbm_5]
# rfr
perm_rfr = PermutationImportance(RandomForestRegressor(n_estimators=50, random_state=42,n_jobs=-1),random_state=42,cv=None,scoring="neg_mean_absolute_error",n_iter=3)
select_rfr = RFE(perm_rfr, n_features_to_select=200, verbose=1,step=5).fit(X_train_rfr, y_train)
temp_rfr_200 = select_rfr.get_support()
X_train_rfr_200 = X_train_rfr.loc[:, temp_rfr_200]
X_test_rfr_200 = X_test_rfr.loc[:, temp_rfr_200]
X_train1_rfr_200 = X_train1_rfr.loc[:, temp_rfr_200]
X_valid_rfr_200 = X_valid_rfr.loc[:, temp_rfr_200]
select_rfr = RFE(perm_rfr, n_features_to_select=50, verbose=1,step=1).fit(X_train_rfr_200, y_train)
temp_rfr_50 = select_rfr.get_support()
X_train_rfr_50 = X_train_rfr_200.loc[:, temp_rfr_50]
X_test_rfr_50 = X_test_rfr_200.loc[:, temp_rfr_50]
X_train1_rfr_50 = X_train1_rfr_200.loc[:, temp_rfr_50]
X_valid_rfr_50 = X_valid_rfr_200.loc[:, temp_rfr_50]
select_rfr = RFE(perm_rfr, n_features_to_select=20, verbose=1,step=1).fit(X_train_rfr_50, y_train)
temp_rfr_20 = select_rfr.get_support()
X_train_rfr_20 = X_train_rfr_50.loc[:, temp_rfr_20]
X_test_rfr_20 = X_test_rfr_50.loc[:, temp_rfr_20]
X_train1_rfr_20 = X_train1_rfr_50.loc[:, temp_rfr_20]
X_valid_rfr_20 = X_valid_rfr_50.loc[:, temp_rfr_20]
select_rfr = RFE(perm_rfr, n_features_to_select=5, verbose=1,step=1).fit(X_train_rfr_20, y_train)
temp_rfr_5 = select_rfr.get_support()
X_train_rfr_5 = X_train_rfr_20.loc[:, temp_rfr_5]
X_test_rfr_5 = X_test_rfr_20.loc[:, temp_rfr_5]
X_train1_rfr_5 = X_train1_rfr_20.loc[:, temp_rfr_5]
X_valid_rfr_5 = X_valid_rfr_20.loc[:, temp_rfr_5]
# svm
perm_svr = PermutationImportance(LinearSVR(random_state=42),random_state=42,cv=None,scoring="neg_mean_absolute_error",n_iter=5)
select_svr = RFE(perm_svr, n_features_to_select=200, verbose=1,step=3).fit(X_train_svr, y_train)
temp_svr_200 = select_svr.get_support()
X_train_svr_200 = pd.DataFrame(X_train_svr).loc[:, temp_svr_200]
X_test_svr_200 = pd.DataFrame(X_test_svr).loc[:, temp_svr_200]
X_train1_svr_200 = pd.DataFrame(X_train1_svr).loc[:, temp_svr_200]
X_valid_svr_200 = pd.DataFrame(X_valid_svr).loc[:, temp_svr_200]
select_svr = RFE(perm_svr, n_features_to_select=50, verbose=1,step=1).fit(X_train_svr_200, y_train)
temp_svr_50 = select_svr.get_support()
X_train_svr_50 = pd.DataFrame(X_train_svr_200).loc[:, temp_svr_50]
X_test_svr_50 = pd.DataFrame(X_test_svr_200).loc[:, temp_svr_50]
X_train1_svr_50 = pd.DataFrame(X_train1_svr_200).loc[:, temp_svr_50]
X_valid_svr_50 = pd.DataFrame(X_valid_svr_200).loc[:, temp_svr_50]
select_svr = RFE(perm_svr, n_features_to_select=20, verbose=1,step=1).fit(X_train_svr_50, y_train)
temp_svr_20 = select_svr.get_support()
X_train_svr_20 = pd.DataFrame(X_train_svr_50).loc[:, temp_svr_20]
X_test_svr_20 = pd.DataFrame(X_test_svr_50).loc[:, temp_svr_20]
X_train1_svr_20 = pd.DataFrame(X_train1_svr_50).loc[:, temp_svr_20]
X_valid_svr_20 = pd.DataFrame(X_valid_svr_50).loc[:, temp_svr_20]
select_svr = RFE(perm_svr, n_features_to_select=5, verbose=1,step=1).fit(X_train_svr_20, y_train)
temp_svr_5 = select_svr.get_support()
X_train_svr_5 = pd.DataFrame(X_train_svr_20).loc[:, temp_svr_5]
X_test_svr_5 = pd.DataFrame(X_test_svr_20).loc[:, temp_svr_5]
X_train1_svr_5 = pd.DataFrame(X_train1_svr_20).loc[:, temp_svr_5]
X_valid_svr_5 = pd.DataFrame(X_valid_svr_20).loc[:, temp_svr_5]

Fitting estimator with 454 features.
Fitting estimator with 451 features.
Fitting estimator with 448 features.
Fitting estimator with 445 features.
Fitting estimator with 442 features.
Fitting estimator with 439 features.
Fitting estimator with 436 features.
Fitting estimator with 433 features.
Fitting estimator with 430 features.
Fitting estimator with 427 features.
Fitting estimator with 424 features.
Fitting estimator with 421 features.
Fitting estimator with 418 features.
Fitting estimator with 415 features.
Fitting estimator with 412 features.
Fitting estimator with 409 features.
Fitting estimator with 406 features.
Fitting estimator with 403 features.
Fitting estimator with 400 features.
Fitting estimator with 397 features.
Fitting estimator with 394 features.
Fitting estimator with 391 features.
Fitting estimator with 388 features.
Fitting estimator with 385 features.
Fitting estimator with 382 features.
Fitting estimator with 379 features.
Fitting estimator with 376 features.
F

Fitting estimator with 62 features.
Fitting estimator with 61 features.
Fitting estimator with 60 features.
Fitting estimator with 59 features.
Fitting estimator with 58 features.
Fitting estimator with 57 features.
Fitting estimator with 56 features.
Fitting estimator with 55 features.
Fitting estimator with 54 features.
Fitting estimator with 53 features.
Fitting estimator with 52 features.
Fitting estimator with 51 features.
Fitting estimator with 50 features.
Fitting estimator with 49 features.
Fitting estimator with 48 features.
Fitting estimator with 47 features.
Fitting estimator with 46 features.
Fitting estimator with 45 features.
Fitting estimator with 44 features.
Fitting estimator with 43 features.
Fitting estimator with 42 features.
Fitting estimator with 41 features.
Fitting estimator with 40 features.
Fitting estimator with 39 features.
Fitting estimator with 38 features.
Fitting estimator with 37 features.
Fitting estimator with 36 features.
Fitting estimator with 35 fe

Fitting estimator with 84 features.
Fitting estimator with 83 features.
Fitting estimator with 82 features.
Fitting estimator with 81 features.
Fitting estimator with 80 features.
Fitting estimator with 79 features.
Fitting estimator with 78 features.
Fitting estimator with 77 features.
Fitting estimator with 76 features.
Fitting estimator with 75 features.
Fitting estimator with 74 features.
Fitting estimator with 73 features.
Fitting estimator with 72 features.
Fitting estimator with 71 features.
Fitting estimator with 70 features.
Fitting estimator with 69 features.
Fitting estimator with 68 features.
Fitting estimator with 67 features.
Fitting estimator with 66 features.
Fitting estimator with 65 features.
Fitting estimator with 64 features.
Fitting estimator with 63 features.
Fitting estimator with 62 features.
Fitting estimator with 61 features.
Fitting estimator with 60 features.
Fitting estimator with 59 features.
Fitting estimator with 58 features.
Fitting estimator with 57 fe

Fitting estimator with 140 features.
Fitting estimator with 139 features.
Fitting estimator with 138 features.
Fitting estimator with 137 features.
Fitting estimator with 136 features.
Fitting estimator with 135 features.
Fitting estimator with 134 features.
Fitting estimator with 133 features.
Fitting estimator with 132 features.
Fitting estimator with 131 features.
Fitting estimator with 130 features.
Fitting estimator with 129 features.
Fitting estimator with 128 features.
Fitting estimator with 127 features.
Fitting estimator with 126 features.
Fitting estimator with 125 features.
Fitting estimator with 124 features.
Fitting estimator with 123 features.
Fitting estimator with 122 features.
Fitting estimator with 121 features.
Fitting estimator with 120 features.
Fitting estimator with 119 features.
Fitting estimator with 118 features.
Fitting estimator with 117 features.
Fitting estimator with 116 features.
Fitting estimator with 115 features.
Fitting estimator with 114 features.
F

In [25]:
temp_gbm_50 = [
       'maxPlace', 'heals_boosts_std_rank', 'kills_mean', 'walkDistance_mean',
       'weaponsAcquired_mean', 'totalDistance_mean', 'killPerc_mean',
       'killPlacePerc_mean', 'kills_Dis_mean', 'boosts_mean_rank',
       'walkDistance_mean_rank', 'weaponsAcquired_mean_rank',
       'winPoints_mean_rank', 'totalDistance_mean_rank',
       'killStreaks_rate_mean_rank', 'heals_boosts_mean_rank',
       'weaponsAcquired_Dis_mean_rank', 'boosts_max', 'killPlace_max',
       'walkDistance_max', 'totalDistance_max', 'killPerc_max',
       'killPlacePerc_max', 'heals_boostsPerc_max', 'boosts_max_rank',
       'killPlace_max_rank', 'killPoints_max_rank', 'rideDistance_max_rank',
       'walkDistance_max_rank', 'totalDistance_max_rank', 'killPlace_min',
       'totalDistance_min', 'killPerc_min', 'killPlacePerc_min',
       'kills_Dis_min', 'headshotKills_min_rank', 'killPlace_min_rank',
       'killStreaks_min_rank', 'longestKill_min_rank', 'rankPoints_min_rank',
       'totalDistance_min_rank', 'kills_assists_min_rank',
       'killStreaks_rate_min_rank', 'killPerc_min_rank', 'killPlacePerc_sum',
       'heals_boosts_match_mean', 'damageDealt_match_mean',
       'longestKill_match_mean', 'boosts_match_mean', 'kills_Dis_match_max']
X_train_gbm_50 = X_train_gbm.loc[:, temp_gbm_50]
X_test_gbm_50 = X_test_gbm.loc[:, temp_gbm_50]
X_train1_gbm_50 = X_train1_gbm.loc[:, temp_gbm_50]
X_valid_gbm_50 = X_valid_gbm.loc[:, temp_gbm_50]

In [26]:
temp_gbm_20 = [
       'maxPlace', 'weaponsAcquired_mean', 'totalDistance_mean',
       'killPerc_mean', 'killPlacePerc_mean', 'totalDistance_mean_rank',
       'killPlace_max', 'walkDistance_max', 'killPerc_max',
       'killPlacePerc_max', 'boosts_max_rank', 'killPlace_max_rank',
       'walkDistance_max_rank', 'totalDistance_max_rank', 'killPerc_min',
       'killPlacePerc_min', 'kills_Dis_min', 'killStreaks_min_rank',
       'killPerc_min_rank', 'boosts_match_mean']
X_train_gbm_20 = X_train_gbm_50.loc[:, temp_gbm_20]
X_test_gbm_20 = X_test_gbm_50.loc[:, temp_gbm_20]
X_train1_gbm_20 = X_train1_gbm_50.loc[:, temp_gbm_20]
X_valid_gbm_20 = X_valid_gbm_50.loc[:, temp_gbm_20]

In [27]:
temp_gbm_5 = [
       'totalDistance_mean_rank', 'killPlacePerc_max', 'killPlace_max_rank',
       'walkDistance_max_rank', 'killPerc_min']
X_train_gbm_5 = X_train_gbm_20.loc[:, temp_gbm_5]
X_test_gbm_5 = X_test_gbm_20.loc[:, temp_gbm_5]
X_train1_gbm_5 = X_train1_gbm_20.loc[:, temp_gbm_5]
X_valid_gbm_5 = X_valid_gbm_20.loc[:, temp_gbm_5]

In [28]:
temp_rfr_50 = [
       'walkDistance', 'pointsSum', 'killPlacePerc', 'weaponsAcquired_Dis',
       'damageDealt_std_rank', 'heals_boosts_std_rank', 'damageDealt_mean',
       'killPlace_mean', 'walkDistance_mean', 'weaponsAcquired_mean',
       'totalDistance_mean', 'killStreaks_rate_mean', 'boosts_mean_rank',
       'swimDistance_mean_rank', 'walkDistance_mean_rank',
       'totalDistance_mean_rank', 'killStreaks_rate_mean_rank',
       'killPlacePerc_mean_rank', 'boosts_max', 'killPlace_max',
       'walkDistance_max', 'totalDistance_max', 'killPlacePerc_max',
       'heals_boostsPerc_max', 'boosts_max_rank', 'killPlace_max_rank',
       'walkDistance_max_rank', 'weaponsAcquired_max_rank',
       'totalDistance_max_rank', 'killPlacePerc_max_rank',
       'DBNOs_Dis_max_rank', 'weaponsAcquired_Dis_max_rank', 'killPlace_min',
       'weaponsAcquired_min', 'totalDistance_min', 'killPerc_min',
       'killPlacePerc_min', 'headshotKills_min_rank', 'heals_min_rank',
       'longestKill_min_rank', 'walkDistance_min_rank',
       'totalDistance_min_rank', 'killStreaks_rate_min_rank',
       'kills_assists_per_heal_boost_min_rank', 'killPlacePerc_min_rank',
       'kills_Dis_min_rank', 'boosts_sum', 'totalDistance_sum',
       'damageDealt_per_heal_boost_sum', 'kills_Dis_match_max']
X_train_rfr_50 = X_train_rfr.loc[:, temp_rfr_50]
X_test_rfr_50 = X_test_rfr.loc[:, temp_rfr_50]
X_train1_rfr_50 = X_train1_rfr.loc[:, temp_rfr_50]
X_valid_rfr_50 = X_valid_rfr.loc[:, temp_rfr_50]

In [29]:
temp_rfr_20 = [
       'weaponsAcquired_mean', 'totalDistance_mean', 'walkDistance_mean_rank',
       'totalDistance_mean_rank', 'killPlace_max', 'walkDistance_max',
       'killPlacePerc_max', 'boosts_max_rank', 'killPlace_max_rank',
       'walkDistance_max_rank', 'totalDistance_max_rank',
       'killPlacePerc_max_rank', 'killPlace_min', 'killPerc_min',
       'killPlacePerc_min', 'totalDistance_min_rank',
       'killStreaks_rate_min_rank', 'kills_Dis_min_rank', 'boosts_sum',
       'totalDistance_sum']
X_train_rfr_20 = X_train_rfr_50.loc[:, temp_rfr_20]
X_test_rfr_20 = X_test_rfr_50.loc[:, temp_rfr_20]
X_train1_rfr_20 = X_train1_rfr_50.loc[:, temp_rfr_20]
X_valid_rfr_20 = X_valid_rfr_50.loc[:, temp_rfr_20]

In [30]:
temp_rfr_5 = [
       'totalDistance_mean_rank', 'killPlacePerc_max', 'killPlace_max_rank',
       'walkDistance_max_rank', 'killPlacePerc_max_rank']
X_train_rfr_5 = X_train_rfr_20.loc[:, temp_rfr_5]
X_test_rfr_5 = X_test_rfr_20.loc[:, temp_rfr_5]
X_train1_rfr_5 = X_train1_rfr_20.loc[:, temp_rfr_5]
X_valid_rfr_5 = X_valid_rfr_20.loc[:, temp_rfr_5]

In [31]:
temp_svr_50 = np.array(
    [12,  16,  21,  28,  79,  84,  92, 101, 103, 105, 113, 127, 134,
     136, 151, 169, 189, 191, 193, 203, 210, 212, 215, 226, 227, 231,
     259, 264, 265, 277, 279, 282, 296, 297, 302, 303, 305, 322, 329,
     331, 333, 336, 343, 345, 346, 408, 444, 430, 426, 439])
X_train_svr_50 = pd.DataFrame(X_train_svr).loc[:, temp_svr_50]
X_test_svr_50 = pd.DataFrame(X_test_svr).loc[:, temp_svr_50]
X_train1_svr_50 = pd.DataFrame(X_train1_svr).loc[:, temp_svr_50]
X_valid_svr_50 = pd.DataFrame(X_valid_svr).loc[:, temp_svr_50]

In [32]:
temp_svr_20 = np.array(
    [16,  21,  28, 101, 127, 136, 151, 189, 203, 212, 227, 259, 282,
     297, 302, 305, 444, 430, 426, 439])
X_train_svr_20 = pd.DataFrame(X_train_svr_50).loc[:, temp_svr_20]
X_test_svr_20 = pd.DataFrame(X_test_svr_50).loc[:, temp_svr_20]
X_train1_svr_20 = pd.DataFrame(X_train1_svr_50).loc[:, temp_svr_20]
X_valid_svr_20 = pd.DataFrame(X_valid_svr_50).loc[:, temp_svr_20]

In [33]:
temp_svr_5 = np.array([203, 227, 282, 302, 305])
X_train_svr_5 = pd.DataFrame(X_train_svr_20).loc[:, temp_svr_5]
X_test_svr_5 = pd.DataFrame(X_test_svr_20).loc[:, temp_svr_5]
X_train1_svr_5 = pd.DataFrame(X_train1_svr_20).loc[:, temp_svr_5]
X_valid_svr_5 = pd.DataFrame(X_valid_svr_20).loc[:, temp_svr_5]

### 出来上がったデータフレームに対してスケール変換をかける
svrとMLPで使うときのためにスケール変換をしておく

In [34]:
# スケール変換
SS = StandardScaler().fit(pd.concat([X_train_gbm_50, X_test_gbm_50]))
X_train_gbm_50_scaled = pd.DataFrame(SS.transform(X_train_gbm_50))
X_test_gbm_50_scaled = pd.DataFrame(SS.transform(X_test_gbm_50))
SS = StandardScaler().fit(pd.concat([X_train_rfr_50, X_test_rfr_50]))
X_train_rfr_50_scaled = pd.DataFrame(SS.transform(X_train_rfr_50))
X_test_rfr_50_scaled = pd.DataFrame(SS.transform(X_test_rfr_50))

SS = StandardScaler().fit(pd.concat([X_train_gbm_20, X_test_gbm_20]))
X_train_gbm_20_scaled = pd.DataFrame(SS.transform(X_train_gbm_20))
X_test_gbm_20_scaled = pd.DataFrame(SS.transform(X_test_gbm_20))
SS = StandardScaler().fit(pd.concat([X_train_rfr_20, X_test_rfr_20]))
X_train_rfr_20_scaled = pd.DataFrame(SS.transform(X_train_rfr_20))
X_test_rfr_20_scaled = pd.DataFrame(SS.transform(X_test_rfr_20))

### 解析に使うモデルの準備

In [35]:
# 1層目
gbm_top50_1 = lgb.LGBMRegressor(n_jobs=-1, random_state=42, n_estimators=5000, max_depth=10, num_leaves=30, learning_rate=0.005, min_data_in_leaf=30)
rfr_top50_1 = RandomForestRegressor(n_jobs=-1, random_state=42, max_depth=30, max_features=25, min_samples_leaf=2, min_samples_split=2, n_estimators=10000)
gbm_top20_1 = lgb.LGBMRegressor(n_jobs=-1, random_state=42, n_estimators=5000, max_depth=10, num_leaves=30, learning_rate=0.005, min_data_in_leaf=30)
rfr_top20_1 = RandomForestRegressor(n_jobs=-1, random_state=42, max_depth=30, max_features=10, min_samples_leaf=2, min_samples_split=2, n_estimators=10000)
svr_1 = SVR(kernel="rbf", C=10, gamma=0.001)
MLP_svr_1 = MLPRegressor(random_state=42, alpha=0.1, hidden_layer_sizes=(50, 50, 50, 50, 50, 50, 50, 50,))
MLP_gbm_1 = MLPRegressor(random_state=42, alpha=0.05, hidden_layer_sizes=(20, 20, 20, 20, 20, 20, 20, 20,))
MLP_rfr_1 = MLPRegressor(random_state=42, alpha= 0.05, hidden_layer_sizes=(20, 20, 20, 20, 20, 20, 20, 20,))
ridge_1 = Ridge(alpha=10, random_state=42)
knn_svr_1 = KNeighborsRegressor(n_neighbors=30, n_jobs=-1, weights="distance")
knn_gbm_1 = KNeighborsRegressor(n_neighbors=30, n_jobs=-1, weights="distance")
knn_rfr_1 = KNeighborsRegressor(n_neighbors=30, n_jobs=-1, weights="distance")
# 2層目
gbm_2 = lgb.LGBMRegressor(n_jobs=-1, random_state=42, learning_rate=0.005, max_depth=2, n_estimators=5000,num_leaves=20, min_data_in_leaf=50)
rfr_2 = RandomForestRegressor(n_jobs=-1, random_state=42, max_depth=6, max_features=8, min_samples_leaf=4, min_samples_split=2, n_estimators=10000)
# 3層目
gbm_3 = lgb.LGBMRegressor(n_jobs=-1, random_state=42, learning_rate=0.005, max_depth=2, n_estimators=5000, num_leaves=20, min_data_in_leaf=50)

## 3. モデル作成(スタッキング)
1時間くらいかかります

In [36]:
# 後で使うデータを事前に持っきておく
test_temp = pd.read_csv("./input/test.csv")
test_maxPlace = test_temp["maxPlace"]
test_matchId = test_temp["matchId"]
train_temp = pd.read_csv("./input/train.csv")
train_maxPlace = train_temp["maxPlace"]
train_matchId = train_temp["matchId"]

In [37]:
# 参考： kaggleで勝つデータ分析の技術
# https://github.com/ghmagazine/kagglebook/blob/master/ch07/ch07-01-stacking.py
def predict_cv(model, train_x, train_y, test_x):
    preds = []
    preds_test = []
    va_idxes = []
    kf = GroupKFold(n_splits=5)

    # クロスバリデーションで学習・予測を行い，予測値とインデックスを保存する
    for i, (tr_idx, va_idx) in enumerate(kf.split(train_x, groups=train_matchId)):
        tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
        tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]
        model.fit(tr_x, tr_y)
        pred = model.predict(va_x)
        preds.append(pred)
        pred_test = model.predict(test_x)
        preds_test.append(pred_test)
        va_idxes.append(va_idx)

    # バリデーションデータに対する予測値を連結し，その後元の順序に並べ直す
    va_idxes = np.concatenate(va_idxes)
    preds = np.concatenate(preds, axis=0)
    order = np.argsort(va_idxes)
    pred_train = preds[order]

    # テストデータに対する予測値の平均をとる
    preds_test = np.mean(preds_test, axis=0)

    return pred_train, preds_test

In [38]:
# 1層目
print("-----Start-----\n")
pred1_train_a, pred1_test_a = predict_cv(gbm_top50_1, X_train_gbm_50, y_train, X_test_gbm_50)
pred1_train_b, pred1_test_b = predict_cv(rfr_top50_1, X_train_rfr_50, y_train, X_test_rfr_50)
pred1_train_c, pred1_test_c = predict_cv(gbm_top20_1, X_train_gbm_20, y_train, X_test_gbm_20)
pred1_train_d, pred1_test_d = predict_cv(rfr_top20_1, X_train_rfr_20, y_train, X_test_rfr_20)

pred1_train_e, pred1_test_e = predict_cv(svr_1, X_train_svr_50, y_train, X_test_svr_50)
pred1_train_f, pred1_test_f = predict_cv(ridge_1, X_train_svr_50, y_train, X_test_svr_50)

pred1_train_g, pred1_test_g = predict_cv(MLP_svr_1, X_train_svr_50, y_train, X_test_svr_50)
pred1_train_h, pred1_test_h = predict_cv(MLP_gbm_1, X_train_gbm_20_scaled, y_train, X_test_gbm_20_scaled)
pred1_train_i, pred1_test_i = predict_cv(MLP_rfr_1, X_train_rfr_20_scaled, y_train, X_test_rfr_20_scaled)

pred1_train_j, pred1_test_j = predict_cv(knn_svr_1, X_train_svr_5, y_train, X_test_svr_5)
pred1_train_k, pred1_test_k = predict_cv(knn_rfr_1, X_train_rfr_5, y_train, X_test_rfr_5)
pred1_train_l, pred1_test_l = predict_cv(knn_gbm_1, X_train_gbm_5, y_train, X_test_gbm_5)

# 2層目
# -----特徴量作成
train_mean_feature1 = (pred1_train_a+pred1_train_b+pred1_train_c+pred1_train_d+pred1_train_e+pred1_train_f+pred1_train_g+pred1_train_h+pred1_train_i+pred1_train_j+pred1_train_k+pred1_train_l)/12
test_mean_feature1 = (pred1_test_a+pred1_test_b+pred1_test_c+pred1_test_d+pred1_test_e+pred1_test_f+pred1_test_g+pred1_test_h+pred1_test_i+pred1_test_j+pred1_test_k+pred1_test_l)/12
train_mean_feature2 = (pred1_train_g+pred1_train_h+pred1_train_i+pred1_train_c+pred1_train_a)/5
test_mean_feature2 = (pred1_test_g+pred1_test_h+pred1_test_i+pred1_test_c+pred1_test_a)/5
train_dif_feature1 = (pred1_train_c+pred1_train_a)/2 - (pred1_train_j+pred1_train_k+pred1_train_l)/3
test_dif_feature1 = (pred1_test_c+pred1_test_a)/2 - (pred1_test_j+pred1_test_k+pred1_test_l)/3
train_dif_feature2 = (pred1_train_c+pred1_train_a)/2 - (pred1_train_g+pred1_train_h+pred1_train_i)/3
test_dif_feature2 = (pred1_test_c+pred1_test_a)/2 - (pred1_test_g+pred1_test_h+pred1_test_i)/3
train_dif_feature3 = (pred1_train_g+pred1_train_h+pred1_train_i)/3 - (pred1_train_j+pred1_train_k+pred1_train_l)/3
test_dif_feature3 = (pred1_test_g+pred1_test_h+pred1_test_i)/3 - (pred1_test_j+pred1_test_k+pred1_test_l)/3
# -----

X_train2 = pd.DataFrame({
    "1a": pred1_train_a,"1b": pred1_train_b,"1c": pred1_train_c,"1d": pred1_train_d,"1e": pred1_train_e, "1f": pred1_train_f,"1g": pred1_train_g,"1h": pred1_train_h,"1i": pred1_train_i,"1j": pred1_train_j,"1k": pred1_train_k,"1l": pred1_train_l,
    "1mean_a": train_mean_feature1,"1mean_b": train_mean_feature2,"1dif_a": train_dif_feature1,"1dif_b": train_dif_feature2,"1dif_c": train_dif_feature3,
    })
X_test2 = pd.DataFrame({
    "1a": pred1_test_a,"1b": pred1_test_b,"1c": pred1_test_c,"1d": pred1_test_d,"1e": pred1_test_e,"1f": pred1_test_f,"1g": pred1_test_g,"1h": pred1_test_h,"1i": pred1_test_i,"1j": pred1_test_j,"1k": pred1_test_k,"1l": pred1_test_l,
    "1mean_a": test_mean_feature1, "1mean_b": test_mean_feature2,"1dif_a": test_dif_feature1,"1dif_b": test_dif_feature2,"1dif_c": test_dif_feature3
    })
print("----- 1層目 -----")
print(" valid mean MAE: ",  MAE(train_mean_feature1, y_train))
print(" valid gbm MAE: ",  MAE((pred1_train_c+pred1_train_a)/2, y_train))
print(" valid rfr MAE: ",  MAE((pred1_train_b+pred1_train_d)/2, y_train))
print(" valid knn MAE: ",  MAE((pred1_train_j+pred1_train_k+pred1_train_l)/3, y_train))
print(" valid MLP MAE: ",  MAE((pred1_train_g+pred1_train_h+pred1_train_i)/3, y_train))
print(" valid svr MAE: ",  MAE(pred1_train_e, y_train))
print(" valid ridge MAE: ",  MAE(pred1_train_f, y_train))
print(" valid (MLP+gbm) MAE: ",  MAE((pred1_train_g+pred1_train_h+pred1_train_i+pred1_train_c+pred1_train_a)/5, y_train))
display(X_train2.head(2))
display(X_test2.head(2))
X_train2_gbm = X_train_gbm_5.reset_index().join(X_train2)
X_test2_gbm = X_test_gbm_5.reset_index().join(X_test2)
X_train2_rfr = X_train_rfr_5.reset_index().join(X_train2)
X_test2_rfr = X_test_rfr_5.reset_index().join(X_test2)
X_train2_svr = X_train_svr_5.reset_index().join(X_train2)
X_test2_svr = X_test_svr_5.reset_index().join(X_test2)

pred2_train_a, pred2_test_a = predict_cv(gbm_2, X_train2_gbm, y_train, X_test2_gbm)
pred2_train_b, pred2_test_b = predict_cv(rfr_2, X_train2_rfr, y_train, X_test2_rfr)

# 3層目
# -----特徴量作成
train_mean_feature2 = (pred2_train_a+pred2_train_b)/2
test_mean_feature2 = (pred2_test_a+pred2_test_b)/2
train_add_feature2 = pred2_train_a + pred2_train_b
test_add_feature2 = pred2_test_a + pred2_test_b
train_dif_feature2 = pred2_train_a - pred2_train_b
test_dif_feature2 = pred2_test_a - pred2_test_b
train_mul_feature2 = pred2_train_a * pred2_train_b
test_mul_feature2 = pred2_test_a * pred2_test_b
# -----

X_train3 = pd.DataFrame({
    "2a": pred2_train_a, "2b": pred2_train_b, 
    "2mean": train_mean_feature2, "2add": train_add_feature2, "2dif": train_dif_feature2, "2mul": train_mul_feature2
})
X_test3 = pd.DataFrame({
    "2a": pred2_test_a, "2b": pred2_test_b, 
    "2mean": test_mean_feature2, "2add": test_add_feature2, "2dif": test_dif_feature2, "2mul": test_mul_feature2
})
print("----- 2層目 -----")
print(" valid mean MAE: ",  MAE(train_mean_feature2, y_train))
print(" valid gbm MAE: ",  MAE(pred2_train_a, y_train))
print(" valid rfr MAE: ",  MAE(pred2_train_b, y_train))
display(X_train3.head(2))
display(X_test3.head(2))

pred3_train, pred3_test = predict_cv(gbm_3, X_train3, y_train, X_test3)
print("----- 3層目 -----")
print(" valid MAE: ", MAE(pred3_train, y_train))
print("\n-----Finish-----")

-----Start-----

----- 1層目 -----
 valid mean MAE:  0.03326791574209489
 valid gbm MAE:  0.03288062560860551
 valid rfr MAE:  0.035324572542379404
 valid knn MAE:  0.03780631129283767
 valid MLP MAE:  0.03376210994443785
 valid svr MAE:  0.0424968182985502
 valid ridge MAE:  0.04125669022189193
 valid (MLP+gbm) MAE:  0.03208460923920183


Unnamed: 0,1a,1b,1c,1d,1e,1f,1g,1h,1i,1j,1k,1l,1mean_a,1mean_b,1dif_a,1dif_b,1dif_c
0,0.180967,0.182083,0.171778,0.173768,0.156772,0.184706,0.16271,0.161109,0.177767,0.161243,0.183092,0.181625,0.173135,0.170866,0.001053,0.009178,-0.008125
1,0.613862,0.639644,0.568659,0.649119,0.680503,0.680148,0.648308,0.655884,0.625622,0.799671,0.675354,0.671259,0.659003,0.622467,-0.124168,-0.052011,-0.072157


Unnamed: 0,1a,1b,1c,1d,1e,1f,1g,1h,1i,1j,1k,1l,1mean_a,1mean_b,1dif_a,1dif_b,1dif_c
0,0.28452,0.291739,0.29002,0.296366,0.309242,0.314528,0.294155,0.298074,0.290449,0.304181,0.306561,0.304231,0.298672,0.291443,-0.017722,-0.006956,-0.010765
1,0.224696,0.224105,0.228659,0.219196,0.246234,0.228668,0.223144,0.210961,0.224174,0.240114,0.228109,0.219812,0.226489,0.222327,-0.002667,0.007252,-0.009919


----- 2層目 -----
 valid mean MAE:  0.03158739394099304
 valid gbm MAE:  0.032495806199072254
 valid rfr MAE:  0.031403379232057306


Unnamed: 0,2a,2b,2mean,2add,2dif,2mul
0,0.16317,0.171954,0.167562,0.335124,-0.008783,0.028058
1,0.600898,0.639799,0.620348,1.240697,-0.038901,0.384454


Unnamed: 0,2a,2b,2mean,2add,2dif,2mul
0,0.291024,0.292918,0.291971,0.583942,-0.001893,0.085246
1,0.218697,0.219747,0.219222,0.438443,-0.00105,0.048058


----- 3層目 -----
 valid MAE:  0.03288962479481427

-----Finish-----


### 一番スコアの良い2層目のrfr (pred2_train_b)を採用

## 提出用のデータの後処理
目的変数は離散値なので,モデルの予測値が離散値に近い値を予測していたならばその離散値に合わせる
<br><br>
目的変数 = (maxPlace - 最終順位) / (maxPlace - 1)
<br>
幅 = 1/(maxPlace-1)

In [39]:
pred_train = pred2_train_b.copy()
cnt=0
for i, pre in enumerate(pred2_train_b):
    gap = 1.0 / (train_maxPlace[i] - 1)
    winPlacePerc = round(pre / gap) * gap
    if abs(winPlacePerc - pre) < (gap/8):
        cnt+=1
        pred_train[i] = winPlacePerc
print("valid MAE: ", MAE(pred_train, y_train))
print("cnt: ", cnt)

valid MAE:  0.03139289003410715
cnt:  2265


In [40]:
pred = pred2_test_b.copy()
cnt=0
for i, pre in enumerate(pred2_test_b):
    gap = 1.0 / (test_maxPlace[i] - 1)
    winPlacePerc = round(pre / gap) * gap
    if abs(winPlacePerc - pre) < (gap/8):
        cnt+=1
        pred[i] = winPlacePerc
print("cnt: ", cnt)

cnt:  658


## 提出用ファイルの作成
このファイルを保存しているディレクトリに，"submission.csv"を保存しておいてください

In [41]:
submission = pd.read_csv("./submission.csv")
submission["winPlacePerc"] = pred
submission.to_csv("submission.csv", index=False)

## ご覧いただきありがとうございました
以上で提出ファイルの再現は終わりです．<br>
下記の内容は雑記なので，提出ファイルに関係はありません．<br>

## 再現性の確保について
random-stateを固定しているからといって必ずしも同じスコアになるわけではない．<br>
columnsの順番が入れ替わっているだけでスコアが変動する場合がある(今回使ったモデルの中だとMLP)．<br>
気づけなくてかなり苦労しました．

In [42]:
# 行の並び順を変える
X_train_gbm_20_sorted = X_train_gbm_20.sort_index(axis=1)

display(X_train_gbm_20.head())
display(X_train_gbm_20_sorted.head())

grid_params = {"alpha": [0.05], "hidden_layer_sizes": [(20, 20, 20, 20, 20, 20, 20, 20,)]}
MLP = MLPRegressor(random_state=42)

generator_train = GroupKFold(n_splits=5).split(X_train_rfr, y_train, train_matchId)
notSort_columns = GridSearchCV(MLP, grid_params, cv=generator_train, n_jobs=-1, verbose=1, scoring="neg_mean_absolute_error")
generator_train = GroupKFold(n_splits=5).split(X_train_rfr, y_train, train_matchId)
Sort_columns = GridSearchCV(MLP, grid_params, cv=generator_train, n_jobs=-1, verbose=1, scoring="neg_mean_absolute_error")

notSort_columns.fit(X_train_gbm_20, y_train)
Sort_columns.fit(X_train_gbm_20_sorted, y_train)
print()

Unnamed: 0,maxPlace,weaponsAcquired_mean,totalDistance_mean,killPerc_mean,killPlacePerc_mean,totalDistance_mean_rank,killPlace_max,walkDistance_max,killPerc_max,killPlacePerc_max,boosts_max_rank,killPlace_max_rank,walkDistance_max_rank,totalDistance_max_rank,killPerc_min,killPlacePerc_min,kills_Dis_min,killStreaks_min_rank,killPerc_min_rank,boosts_match_mean
0,1.0,0.002477,0.150743,0.291667,0.828125,0.208333,1.666667,0.181883,0.291667,0.833333,0.520833,0.833333,0.229167,0.1875,0.291667,0.822917,0.0,0.385417,0.385417,0.000686
1,1.021277,0.002886,0.943759,0.901042,0.119792,0.702128,0.375,0.821789,0.958333,0.1875,0.712766,0.12766,0.680851,0.659574,0.84375,0.052083,1.6e-05,0.840426,0.925532,0.000639
2,1.020833,0.002837,0.601348,0.876316,0.131579,0.583333,0.306122,0.697163,0.926316,0.157895,0.697917,0.083333,0.583333,0.583333,0.826316,0.105263,3e-05,0.84375,0.947917,0.000754
3,1.107143,0.003132,1.721997,0.52399,0.356061,0.821429,1.612903,2.017686,0.853535,0.505051,0.589286,0.392857,0.857143,0.785714,0.277778,0.171717,0.0,0.392857,0.392857,0.000774
4,1.0,0.001434,0.105125,0.4825,0.56,0.178571,2.75,0.134194,0.68,0.77,0.446429,0.785714,0.071429,0.071429,0.285,0.35,0.0,0.410714,0.410714,0.000731


Unnamed: 0,boosts_match_mean,boosts_max_rank,killPerc_max,killPerc_mean,killPerc_min,killPerc_min_rank,killPlacePerc_max,killPlacePerc_mean,killPlacePerc_min,killPlace_max,killPlace_max_rank,killStreaks_min_rank,kills_Dis_min,maxPlace,totalDistance_max_rank,totalDistance_mean,totalDistance_mean_rank,walkDistance_max,walkDistance_max_rank,weaponsAcquired_mean
0,0.000686,0.520833,0.291667,0.291667,0.291667,0.385417,0.833333,0.828125,0.822917,1.666667,0.833333,0.385417,0.0,1.0,0.1875,0.150743,0.208333,0.181883,0.229167,0.002477
1,0.000639,0.712766,0.958333,0.901042,0.84375,0.925532,0.1875,0.119792,0.052083,0.375,0.12766,0.840426,1.6e-05,1.021277,0.659574,0.943759,0.702128,0.821789,0.680851,0.002886
2,0.000754,0.697917,0.926316,0.876316,0.826316,0.947917,0.157895,0.131579,0.105263,0.306122,0.083333,0.84375,3e-05,1.020833,0.583333,0.601348,0.583333,0.697163,0.583333,0.002837
3,0.000774,0.589286,0.853535,0.52399,0.277778,0.392857,0.505051,0.356061,0.171717,1.612903,0.392857,0.392857,0.0,1.107143,0.785714,1.721997,0.821429,2.017686,0.857143,0.003132
4,0.000731,0.446429,0.68,0.4825,0.285,0.410714,0.77,0.56,0.35,2.75,0.785714,0.410714,0.0,1.0,0.071429,0.105125,0.178571,0.134194,0.071429,0.001434


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    7.9s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    6.7s finished





In [43]:
print("notSort score: ", notSort_columns.best_score_)
print("Sort score: ", Sort_columns.best_score_)

notSort score:  -0.04152552433666905
Sort score:  -0.037874443922474396


## KFold vs groupKFold (cvスコアとLeader Boardスコアの乖離)
KFoldで分けた場合とgroupKFoldで分けた場合のスコアの差を見てみる．<br>
(単純な分割(KFold)と，同じmatchIDを持つデータがtrainとtestに混在しないようにした分割(groupKFold)の比較)<br>

参考: https://scikit-learn.org/stable/auto_examples/model_selection/plot_cv_indices.html#sphx-glr-auto-examples-model-selection-plot-cv-indices-py

In [44]:
from sklearn.model_selection import KFold
generator_train = GroupKFold(n_splits=5).split(X_train_rfr, y_train, train_matchId)

grid_params = {"max_depth": [6,10,50], "max_features": [7,8,9,10,11], "min_samples_split": [2,3,4], "min_samples_leaf": [2,3,4]}
rfr = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)

# groupKFold
rfr_groupKFold = GridSearchCV(rfr, grid_params, cv=generator_train, n_jobs=-1, verbose=1, scoring="neg_mean_absolute_error")
rfr_groupKFold.fit(X_train2_rfr, y_train)

# KFold
rfr_KFold = GridSearchCV(rfr, grid_params, cv=KFold(n_splits=5), n_jobs=-1, verbose=1, scoring="neg_mean_absolute_error")
rfr_KFold.fit(X_train2_rfr, y_train)
print()

Fitting 5 folds for each of 135 candidates, totalling 675 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   17.1s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 675 out of 675 | elapsed:  7.2min finished


Fitting 5 folds for each of 135 candidates, totalling 675 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   19.1s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 675 out of 675 | elapsed:  7.5min finished





In [45]:
print("groupKFold params: ", rfr_groupKFold.best_params_)
print("gropuKFold score: ", -1*rfr_groupKFold.best_score_)
print()
print("KFold params: ", rfr_KFold.best_params_)
print("KFold score: ", -1*rfr_KFold.best_score_)

groupKFold params:  {'max_depth': 6, 'max_features': 8, 'min_samples_leaf': 4, 'min_samples_split': 2}
gropuKFold score:  0.0313624214572135

KFold params:  {'max_depth': 50, 'max_features': 10, 'min_samples_leaf': 2, 'min_samples_split': 2}
KFold score:  0.02074197664930879
