In [1]:
# For compatibility across multiple platforms
import os
import numpy as np
import pandas as pd
from scipy import spatial


# Load files using DictReader in Python
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import cluster
from sklearn import preprocessing
from sklearn.model_selection import KFold
import sklearn

from fastai.imports import *
from fastai.structured import *

In [2]:
train_filepath = "pubg-finish-placement-prediction/train_V2_clean.csv"

#trainset_file = open(train_filepath,'rU')
trainset = pd.read_csv(train_filepath,index_col=0)


  mask |= (ar1 == a)


In [3]:
print(len(trainset))

4446965


## Remove Matchtype

In [4]:
trainset = trainset.drop(["matchType"],axis=1)

In [5]:
moddedTrain = trainset

# Normalization and feature engineering

Add a feature for the number of players joined

In [6]:
moddedTrain['playersJoined'] = moddedTrain.groupby('matchId')['matchId'].transform('count')
moddedTrain['killsNorm'] = moddedTrain['kills']*((100-moddedTrain['playersJoined'])/100 + 1)
moddedTrain['damageDealtNorm'] = moddedTrain['damageDealt']*((100-moddedTrain['playersJoined'])/100 + 1)
moddedTrain['maxPlaceNorm'] = moddedTrain['maxPlace']*((100-moddedTrain['playersJoined'])/100 + 1)
moddedTrain['matchDurationNorm'] = moddedTrain['matchDuration']*((100-moddedTrain['playersJoined'])/100 + 1)

In [7]:
moddedTrain['healsandboosts'] = moddedTrain['heals'] + moddedTrain['boosts']

In [8]:
moddedTrain['totalDistance'] = moddedTrain['rideDistance'] + moddedTrain['walkDistance'] + moddedTrain['swimDistance']
moddedTrain['killsWithoutMoving'] = ((moddedTrain['kills'] > 0) & (moddedTrain['totalDistance'] == 0))

In [9]:
moddedTrain['headshot_rate'] = moddedTrain['headshotKills'] / moddedTrain['kills']
moddedTrain['headshot_rate'] = moddedTrain['headshot_rate'].fillna(0)

In [10]:
moddedTrain.drop(moddedTrain[moddedTrain['killsWithoutMoving'] == True].index, inplace=True)
moddedTrain.drop(moddedTrain[moddedTrain['roadKills'] > 8].index, inplace=True)

# Train on XGBoost

In [11]:
y = trainset["winPlacePerc"]
x = moddedTrain.drop(columns = ['winPlacePerc'])
#x = trainset


x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

In [12]:
x_train.head()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,winPoints,playersJoined,killsNorm,damageDealtNorm,maxPlaceNorm,matchDurationNorm,healsandboosts,totalDistance,killsWithoutMoving,headshot_rate
2733863,16151320431374440,56148932761981409,33692744009766118,0,0,0.0,0,0,0,94,...,1484,98,0.0,0.0,49.98,1360.68,0,0.0,False,0.0
1828042,35487793292258225,30616487438303001,41604832181806053,0,0,0.0,0,0,0,71,...,1483,91,0.0,0.0,30.52,1491.12,0,392.8,False,0.0
4411012,13669606878203978,7126515945574285,26243540407127332,1,0,149.0,1,0,7,76,...,0,96,0.0,154.96,29.12,1941.68,7,544.8,False,0.0
4063082,42546784394683845,44497644654126391,18405619820915479,1,0,126.0,0,0,0,76,...,0,85,0.0,144.9,29.9,2202.25,0,302.2,False,0.0
500750,40315551687439033,44533641195405447,20632743139291134,0,0,84.21,0,0,0,43,...,0,98,0.0,85.8942,27.54,1464.72,0,1911.0,False,0.0


In [13]:
y_train.head()

2733863    0.0000
1828042    0.1852
4411012    0.1852
4063082    0.0800
500750     0.8462
Name: winPlacePerc, dtype: float64

In [14]:
trainset_dmatrix = xgb.DMatrix(x_train.values,label=y_train.values)
valset_dmatrix = xgb.DMatrix(x_val.values,label=y_val.values)

In [15]:
params = {
    "max_depth" : 10,
    "eval_metric" : ["mae"],
}

In [16]:
clf = xgb.train(params, trainset_dmatrix, evals=[(trainset_dmatrix, "train"),(valset_dmatrix, 'val')], num_boost_round = 10)

predictions = xgb.predict(valset_dmatrix)

[05:17:13] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 1902 extra nodes, 0 pruned nodes, max_depth=10
[0]	train-mae:0.196009	val-mae:0.195865
[05:17:20] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 1908 extra nodes, 0 pruned nodes, max_depth=10
[1]	train-mae:0.146566	val-mae:0.146534
[05:17:26] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 1932 extra nodes, 0 pruned nodes, max_depth=10
[2]	train-mae:0.113714	val-mae:0.113759
[05:17:32] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 1944 extra nodes, 0 pruned nodes, max_depth=10
[3]	train-mae:0.092746	val-mae:0.092879
[05:17:39] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 1984 extra nodes, 0 pruned nodes, max_depth=10
[4]	train-mae:0.078974	val-mae:0.079177
[05:17:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 1998 extra nodes, 0 pruned nodes, max_depth=10
[5]	train-mae:0.070697	val-mae:0.070953
[05:

AttributeError: module 'xgboost' has no attribute 'predict'

In [None]:
.059016

# Remove non-important features

In [23]:
x = moddedTrain[['walkDistance','totalDistance','killPlace',]]

In [24]:
x

Unnamed: 0,kills,assists
0,0,0
1,0,0
2,0,1
3,0,0
4,1,0
5,1,0
6,0,0
7,0,0
8,0,0
9,0,0
