In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import sklearn.preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.grid_search import GridSearchCV
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm_notebook

In [None]:
# dataframe display 옵션
pd.set_option('display.max_columns' , 30)
pd.set_option('display.width', None)

In [None]:
raw_data = pd.read_csv('./data/final3/final3_Data(version3).csv', dtype={                  
					'matchType'                         : 'int16',
            'DBNOs_max'                         : 'float32',
            'assists_max'                       : 'float32',
            'boosts_max'                        : 'float32',
            'damageDealt_max'                   : 'float32',
            'headshotKills_max'                 : 'float32',
            'heals_max'                         : 'float32',
            'killStreaks_max'                   : 'float32',
            'kills_max'                         : 'float32',
            'longestKill_max'                   : 'float32',
            'revives_max'                       : 'float32',
            'roadKills_max'                     : 'float32',
            'teamKills_max'                     : 'float32',
            'vehicleDestroys_max'               : 'float32',
            'weaponsAcquired_max'               : 'float32',
            'DBNOs_min'                         : 'float32',
            'assists_min'                       : 'float32',
            'boosts_min'                        : 'float32',
            'damageDealt_min'                   : 'float32',
            'headshotKills_min'                 : 'float32',
            'heals_min'                         : 'float32',
            'killStreaks_min'                   : 'float32',
            'kills_min'                         : 'float32',
            'longestKill_min'                   : 'float32',
            'revives_min'                       : 'float32',
            'roadKills_min'                     : 'float32',
            'teamKills_min'                     : 'float32',
            'vehicleDestroys_min'               : 'float32',
            'weaponsAcquired_min'               : 'float32',
            'assists_mean'                      : 'float32',
            'boosts_mean'                       : 'float32',
            'damageDealt_mean'                  : 'float32',
            'DBNOs_mean'                        : 'float32',
            'headshotKills_mean'                : 'float32',
            'heals_mean'                        : 'float32',
            'kills_mean'                        : 'float32',
            'killStreaks_mean'                  : 'float32',
            'longestKill_mean'                  : 'float32',
            'revives_mean'                      : 'float32',
            'roadKills_mean'                    : 'float32',
            'teamKills_mean'                    : 'float32',
            'vehicleDestroys_mean'              : 'float32',
            'weaponsAcquired_mean'              : 'float32',
            'assists_match_mean'                : 'float32',
            'boosts_match_mean'                 : 'float32',
            'damageDealt_match_mean'            : 'float32',
            'DBNOs_match_mean'                  : 'float32',
            'headshotKills_match_mean'          : 'float32',
            'heals_match_mean'                  : 'float32',
            'kills_match_mean'                  : 'float32',
            'killStreaks_match_mean'            : 'float32',
            'longestKill_match_mean'            : 'float32',
            'revives_match_mean'                : 'float32',
            'roadKills_match_mean'              : 'float32',
            'teamKills_match_mean'              : 'float32',
            'vehicleDestroys_match_mean'        : 'float32',
            'weaponsAcquired_match_mean'        : 'float32',
            'match_size'                        : 'int16',
            'total_distance_max'                : 'float32',
            'total_distance_min'                : 'float32',
            'total_distance_mean'               : 'float32',
            'total_distance_match_mean'         : 'float32',
            'winPlacePerc'                      : 'float32'
                })

In [None]:
data = raw_data.copy()

In [None]:
slic_data = data.copy()
slic_data = slic_data.drop(columns=["Id",'matchId','groupId', 'matchType','damageDealt_mean','damageDealt_max','damageDealt_min','damageDealt_match_mean'], axis=1)
slic_data.head(1)

In [None]:
train_df, test_df = train_test_split(slic_data, train_size = 0.7)

scaler = sklearn.preprocessing.minmax_scale

train_y = np.array(train_df['winPlacePerc'])
train_x = scaler(train_df.drop(columns=['winPlacePerc'], axis=1))

test_y = np.array(test_df['winPlacePerc'])
test_x = scaler(test_df.drop(columns=['winPlacePerc'], axis=1))
print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)

In [None]:
train_df.dropna(inplace = True)
print(train_df.isnull().any().any())
test_df.dropna(inplace = True)
print(test_df.isnull().any().any())

In [None]:
regr = RandomForestRegressor(n_estimators=100, min_samples_leaf=2, max_features=0.7, n_jobs=-1,max_depth=100)
# n_estimators : 트리 개수(default=10
# min_samples_leaf : leaf 노드의 최소 개수
# max_features : If float -> int(max_features * n_features)
#                트리 feature 개수 설정
# n_jobs : 성능 관련 파라미터(-1: using all processors)

In [None]:
%%time
regr.fit(train_x, train_y)

In [None]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.7,100
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

In [10]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.5, 70
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.014936601774823961
mae test:  0.052921071564795835


In [12]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.7, 20
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.03936046654168393
mae test:  0.060682321694404504


In [10]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.7, 70
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.014642157054311003
mae test:  0.05053937675047791


In [14]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.5, 50
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.01493315982996057
mae test:  0.034938198855522996


In [10]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 3, 0.5
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.018600626430326565
mae test:  0.05516105910091987


In [10]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.7
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.014634568791349232
mae test:  0.050810893814568325


In [27]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.7
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.02713400654636574
mae test:  0.739951011870463


In [22]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.3
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.028458961355457772
mae test:  0.7329331499019792


In [19]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 3, 2
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.04904916396395539
mae test:  0.7710277819125936


In [16]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 3, 0.8, leaf 개수를 늘릴수록 과적합됨
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.026486985512584518
mae test:  0.7056412756889169


In [11]:
print('mae train: ', mean_absolute_error(regr.predict(train_x), train_y)) # 100, 2, 0.7
print('mae test: ', mean_absolute_error(regr.predict(test_x), test_y))

mae train:  0.09026902624051832
mae test:  0.22370336004896982
