#### TSV files are essentially identical to CSV files except that TSV files use "tabs (\t)" while CSV files use commas to store data in tabular structure. As a result, loading TSV files are slightly different from how we've been loading CSV files.
##### Thanks to Clara Meister for providing this tutorial.

In [1]:
# For compatibility across multiple platforms
import os
import numpy as np
import pandas as pd
from scipy import spatial


# Load files using DictReader in Python
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import cluster
from sklearn import preprocessing
from sklearn.model_selection import KFold
import sklearn

In [2]:
train_filepath = "pubg-finish-placement-prediction/train_V2.csv"

test_filepath = "pubg-finish-placement-prediction/test_V2.csv"

#trainset_file = open(train_filepath,'rU')
trainset = pd.read_csv(train_filepath)

#testset_file = open(test_filepath,'rU')
testset = pd.read_csv(test_filepath)

Unnamed: 0           int64
Id                   int64
groupId              int64
matchId              int64
assists              int64
boosts               int64
damageDealt        float64
DBNOs                int64
headshotKills        int64
heals                int64
killPlace            int64
killPoints           int64
kills                int64
killStreaks          int64
longestKill        float64
matchDuration        int64
matchType           object
maxPlace             int64
numGroups            int64
rankPoints           int64
revives              int64
rideDistance       float64
roadKills            int64
swimDistance       float64
teamKills            int64
vehicleDestroys      int64
walkDistance       float64
weaponsAcquired      int64
winPoints            int64
winPlacePerc       float64
dtype: object

In [None]:
for i in trainset.index:
    trainset.at[i,"Id"] = int(trainset.at[i,"Id"],16)
    trainset.at[i,"matchId"] = int(trainset.at[i,"matchId"],16)
    trainset.at[i,"groupId"] = int(trainset.at[i,"groupId"],16)

trainset.head()

In [5]:
trainset.to_csv("pubg-finish-placement-prediction/train_V2_clean.csv")

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,...,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0,35913017459246474,21756414768994750,45321147693812369,0,0,0.0,0,0,0,...,0,0.0,0,0.0,0,0,244.8,1,1466,0.4444
1,1,67264846101073980,29358430787743646,49173965273764108,0,0,91.47,0,0,0,...,0,0.0045,0,11.04,0,0,1434.0,5,0,0.64
2,2,8637285204745842,29917998133566068,4786602953643182,1,0,68.0,0,0,0,...,0,0.0,0,0.0,0,0,161.8,2,0,0.7755
3,3,19728345572649043,47622776820651809,68101503675608446,0,0,32.9,0,0,0,...,0,0.0,0,0.0,0,0,202.7,3,0,0.1667
4,4,13894076435569324,62491847359224029,30901772270576102,0,0,100.0,0,0,0,...,0,0.0,0,0.0,0,0,49.75,2,0,0.1875


In [167]:
matchTypeIndex = 15
trainset = trainset.drop(["matchType"],axis=1)
train_columns = list(trainset.columns[:-1])


x = trainset[train_columns]
y = trainset["winPlacePerc"]


x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

In [180]:
x_train.head()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints
1292256,63854951985661453,600250450733236,58790947321507903,0,0,0.0,0,0,0,86,...,1527,0,0.0,0,0.0,0,0,161.9,3,0
130376,30797503102741404,66137564618002392,31638120576856294,0,0,392.3,0,1,0,11,...,1497,0,0.0,0,0.0,0,0,111.6,2,0
531321,20500043549537120,67703441911112710,63956714187954744,0,1,0.0,0,0,1,68,...,-1,0,0.0,0,0.0,0,0,573.0,4,1497
3553841,53991784096945239,43602287311196324,66532421895858817,0,3,93.14,0,0,1,53,...,-1,0,2966.0,0,0.0,0,0,2435.0,3,1536
2531761,55471076664019257,52081892748649858,57819140417290411,0,0,0.0,0,0,0,42,...,-1,0,0.0,0,0.0,0,0,331.2,2,1464


In [None]:
def includeFeatures(trainset,featuresToInclude):
    featuresToInclude = set(featuresToInclude)
    for feature in trainset.columns:
        if feature not in featuresToInclude:
            trainset.drop(feature,axis=1)

In [169]:
print(trainset.columns)

Index([u'Id', u'groupId', u'matchId', u'assists', u'boosts', u'damageDealt',
       u'DBNOs', u'headshotKills', u'heals', u'killPlace', u'killPoints',
       u'kills', u'killStreaks', u'longestKill', u'matchDuration', u'maxPlace',
       u'numGroups', u'rankPoints', u'revives', u'rideDistance', u'roadKills',
       u'swimDistance', u'teamKills', u'vehicleDestroys', u'walkDistance',
       u'weaponsAcquired', u'winPoints', u'winPlacePerc'],
      dtype='object')


In [170]:
trainset_dmatrix = xgb.DMatrix(x_train.values,label=y_train.values)
valset_dmatrix = xgb.DMatrix(x_val.values,label=y_val.values)

In [171]:
params = {
    "max_depth" : 5,
    "eval_metric" : ["mae"],
}

In [172]:
clf = xgb.train(params, trainset_dmatrix, evals=[(trainset_dmatrix, "train"),(valset_dmatrix, 'val')], num_boost_round = 50)

predictions = xgb.predict(valset_dmatrix)

[18:49:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[0]	train-mae:0.201157	val-mae:nan
[18:49:24] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1]	train-mae:0.156014	val-mae:nan
[18:49:34] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2]	train-mae:0.126411	val-mae:nan
[18:49:44] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3]	train-mae:0.106182	val-mae:nan
[18:49:53] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[4]	train-mae:0.092854	val-mae:nan
[18:50:03] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[5]	train-mae:0.085187	val-mae:nan
[18:50:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[6]	tr

KeyboardInterrupt: 

[40]	train-mae:0.061916	val-mae:nan
[18:56:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[41]	train-mae:0.061776	val-mae:nan
[18:56:15] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[42]	train-mae:0.061705	val-mae:nan


KeyboardInterrupt: 

[43]	train-mae:0.061652	val-mae:nan


KeyboardInterrupt: 

[44]	train-mae:0.06163	val-mae:nan
[18:56:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[45]	train-mae:0.061516	val-mae:nan
[18:56:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[46]	train-mae:0.061459	val-mae:nan
[18:57:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[47]	train-mae:0.061409	val-mae:nan


KeyboardInterrupt: 

[48]	train-mae:0.06132	val-mae:nan
[18:57:25] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[49]	train-mae:0.061281	val-mae:nan


AttributeError: 'module' object has no attribute 'predict'