In [22]:
# For compatibility across multiple platforms
import os
import numpy as np
import pandas as pd
from scipy import spatial


# Load files using DictReader in Python
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import cluster
from sklearn import preprocessing
from sklearn.model_selection import KFold
import sklearn

from fastai.imports import *
from fastai.structured import *

In [23]:
train_filepath = "pubg-finish-placement-prediction/train_V2_clean.csv"

#trainset_file = open(train_filepath,'rU')
trainset = pd.read_csv(train_filepath,index_col=0)


In [24]:
print(len(trainset))

4446965


## Remove Matchtype

In [4]:
trainset = trainset.drop(["matchType"],axis=1)

In [25]:
moddedTrain = trainset

# Normalization and feature engineering

Add a feature for the number of players joined

In [26]:
moddedTrain['playersJoined'] = moddedTrain.groupby('matchId')['matchId'].transform('count')
moddedTrain['killsNorm'] = moddedTrain['kills']*((100-moddedTrain['playersJoined'])/100 + 1)
moddedTrain['damageDealtNorm'] = moddedTrain['damageDealt']*((100-moddedTrain['playersJoined'])/100 + 1)
moddedTrain['maxPlaceNorm'] = moddedTrain['maxPlace']*((100-moddedTrain['playersJoined'])/100 + 1)
moddedTrain['matchDurationNorm'] = moddedTrain['matchDuration']*((100-moddedTrain['playersJoined'])/100 + 1)

In [27]:
moddedTrain['healsandboosts'] = moddedTrain['heals'] + moddedTrain['boosts']

In [28]:
moddedTrain['totalDistance'] = moddedTrain['rideDistance'] + moddedTrain['walkDistance'] + moddedTrain['swimDistance']
moddedTrain['killsWithoutMoving'] = ((moddedTrain['kills'] > 0) & (moddedTrain['totalDistance'] == 0))

In [29]:
moddedTrain['headshot_rate'] = moddedTrain['headshotKills'] / moddedTrain['kills']
moddedTrain['headshot_rate'] = moddedTrain['headshot_rate'].fillna(0)

In [30]:
moddedTrain.drop(moddedTrain[moddedTrain['killsWithoutMoving'] == True].index, inplace=True)
moddedTrain.drop(moddedTrain[moddedTrain['roadKills'] > 8].index, inplace=True)

In [31]:
# moddedTrain['killPlace_over_maxPlace'] = moddedTrain['killPlace'] / moddedTrain['maxPlace']
# moddedTrain['killPlace_over_maxPlace'].fillna(0, inplace=True)
# moddedTrain['killPlace_over_maxPlace'].replace(np.inf, 0, inplace=True)

In [34]:
moddedTrain = pd.get_dummies(moddedTrain, columns=['matchType'])

In [35]:
matchType_encoding = moddedTrain.filter(regex='matchType')
matchType_encoding.head()

Unnamed: 0,matchType_crashfpp,matchType_crashtpp,matchType_duo,matchType_duo-fpp,matchType_flarefpp,matchType_flaretpp,matchType_normal-duo,matchType_normal-duo-fpp,matchType_normal-solo,matchType_normal-solo-fpp,matchType_normal-squad,matchType_normal-squad-fpp,matchType_solo,matchType_solo-fpp,matchType_squad,matchType_squad-fpp
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


# Train on XGBoost

In [36]:
y = trainset["winPlacePerc"]
x = moddedTrain.drop(columns = ['winPlacePerc'])
#x = trainset


x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

In [37]:
x_train.head()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,matchType_normal-duo,matchType_normal-duo-fpp,matchType_normal-solo,matchType_normal-solo-fpp,matchType_normal-squad,matchType_normal-squad-fpp,matchType_solo,matchType_solo-fpp,matchType_squad,matchType_squad-fpp
3709879,58778296523824421,57552450899685228,50330766165524780,1,0,160.5,2,0,0,34,...,0,0,0,0,0,0,0,0,0,1
2976726,59645058070396722,67917498821870333,62270442454551656,0,2,211.7,1,0,3,30,...,0,0,0,0,0,0,0,0,0,1
1480965,45179920714392169,51560932637547329,15499807132753007,0,0,0.0,0,0,0,90,...,0,0,0,0,0,0,0,0,1,0
2364069,8170175247629326,18513762154846188,71837108627913102,0,0,79.45,0,0,0,66,...,0,0,0,0,0,0,0,0,0,0
446760,32705388938807401,64111424573105024,3085894303091897,2,2,529.5,5,1,2,6,...,0,0,0,0,0,0,0,0,0,1


In [38]:
y_train.head()

3709879    0.5769
2976726    0.6538
1480965    0.0357
2364069    0.3958
446760     0.8929
Name: winPlacePerc, dtype: float64

In [39]:
trainset_dmatrix = xgb.DMatrix(x_train.values,label=y_train.values)
valset_dmatrix = xgb.DMatrix(x_val.values,label=y_val.values)

In [43]:
params = {
    "max_depth" : 25,
    "eval_metric" : ["mae"],
    "booster" : "dart",
}

In [None]:
clf = xgb.train(params, trainset_dmatrix, evals=[(trainset_dmatrix, "train"),(valset_dmatrix, 'val')], num_boost_round = 50)

predictions = xgb.predict(valset_dmatrix)

[08:24:52] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 566996 extra nodes, 0 pruned nodes, max_depth=25
[08:24:52] /workspace/src/gbm/gbtree.cc:492: drop 0 trees, weight = 1
[0]	train-mae:0.191199	val-mae:0.194797
[08:25:25] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 694258 extra nodes, 0 pruned nodes, max_depth=25
[08:25:25] /workspace/src/gbm/gbtree.cc:492: drop 0 trees, weight = 1
[1]	train-mae:0.136782	val-mae:0.14497
[08:26:01] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 830512 extra nodes, 0 pruned nodes, max_depth=25
[08:26:01] /workspace/src/gbm/gbtree.cc:492: drop 0 trees, weight = 1
[2]	train-mae:0.098925	val-mae:0.112349
[08:26:38] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 952202 extra nodes, 0 pruned nodes, max_depth=25
[08:26:38] /workspace/src/gbm/gbtree.cc:492: drop 0 trees, weight = 1
[3]	train-mae:0.072544	val-mae:0.091505
[08:27:18] /workspace/src/tree/updater_prune.cc:7

In [None]:
.059016

# Remove non-important features

In [18]:
x = moddedTrain[['walkDistance','killPlace','kills','killsNorm','totalDistance','matchDurationNorm','playersJoined','numGroups','matchDuration','boosts','DBNOs','weaponsAcquired','assists','healsandboosts','maxPlaceNorm','killStreaks','longestKill','maxPlace','rankPoints']]

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

In [19]:
trainset_dmatrix = xgb.DMatrix(x_train.values,label=y_train.values)
valset_dmatrix = xgb.DMatrix(x_val.values,label=y_val.values)

In [20]:
params = {
    "max_depth" : 20,
    "eval_metric" : ["mae"],
}

In [21]:
clf = xgb.train(params, trainset_dmatrix, evals=[(trainset_dmatrix, "train"),(valset_dmatrix, 'val')], num_boost_round = 20)

predictions = xgb.predict(valset_dmatrix)

[08:07:25] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 225808 extra nodes, 0 pruned nodes, max_depth=20
[0]	train-mae:0.192669	val-mae:0.194264
[08:07:37] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 271808 extra nodes, 0 pruned nodes, max_depth=20
[1]	train-mae:0.139951	val-mae:0.143882
[08:07:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 317174 extra nodes, 0 pruned nodes, max_depth=20
[2]	train-mae:0.104155	val-mae:0.110848
[08:08:01] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 348058 extra nodes, 0 pruned nodes, max_depth=20
[3]	train-mae:0.080209	val-mae:0.089816
[08:08:14] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 372692 extra nodes, 0 pruned nodes, max_depth=20
[4]	train-mae:0.064277	val-mae:0.07673
[08:08:26] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 368040 extra nodes, 0 pruned nodes, max_depth=20
[5]	train-mae:0.053773	val-mae:0.

AttributeError: module 'xgboost' has no attribute 'predict'