#### TSV files are essentially identical to CSV files except that TSV files use "tabs (\t)" while CSV files use commas to store data in tabular structure. As a result, loading TSV files are slightly different from how we've been loading CSV files.
##### Thanks to Clara Meister for providing this tutorial.

In [1]:
# For compatibility across multiple platforms
import os
import numpy as np
import pandas as pd
from scipy import spatial


# Load files using DictReader in Python
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import cluster
from sklearn import preprocessing
from sklearn.model_selection import KFold
import sklearn

In [25]:
train_filepath = "pubg-finish-placement-prediction/train_V2_clean.csv"

test_filepath = "pubg-finish-placement-prediction/test_V2.csv"

#trainset_file = open(train_filepath,'rU')
trainset = pd.read_csv(train_filepath,index_col=0)

#testset_file = open(test_filepath,'rU')
testset = pd.read_csv(test_filepath)

In [26]:
trainset[trainset['winPlacePerc'].isnull()]
trainset.drop(2744604,inplace=True)

In [27]:
print(len(trainset))

4446965


In [28]:
trainset.to_csv("pubg-finish-placement-prediction/train_V2_clean.csv")

In [29]:
matchTypeIndex = 15
trainset = trainset.drop(["matchType"],axis=1)
train_columns = list(trainset.columns[:-1])


x = trainset[train_columns]
y = trainset["winPlacePerc"]


x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

In [30]:
x_train.head()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints
2714566,58472278889380755,1657434101040035,35597498315359076,1,0,212.1,2,1,2,8,...,-1,0,0.0,0,0.0,0,0,1910.0,3,1458
1985953,53977881098339660,64113720106816327,66431226524456095,0,0,200.5,2,0,0,90,...,1497,0,0.0,0,0.0,0,0,54.52,2,0
1036367,29935273402517032,51006396439529609,64259306844803646,0,2,291.3,2,0,5,12,...,-1,0,0.0,0,0.0,0,0,440.4,5,1336
2172329,52830069417329935,57443433677191739,26979526710759587,0,0,235.7,2,1,2,11,...,1500,0,0.0,0,0.0,0,0,2689.0,3,0
637302,34171362148143934,23116162434332375,50978066672431266,0,0,0.0,0,0,0,87,...,-1,1,0.0,0,0.0,0,0,66.2,1,1513


In [31]:
def includeFeatures(trainset,featuresToInclude):
    featuresToInclude = set(featuresToInclude)
    for feature in trainset.columns:
        if feature not in featuresToInclude:
            trainset.drop(feature,axis=1)

In [32]:
print(trainset.columns)

Index([u'Id', u'groupId', u'matchId', u'assists', u'boosts', u'damageDealt',
       u'DBNOs', u'headshotKills', u'heals', u'killPlace', u'killPoints',
       u'kills', u'killStreaks', u'longestKill', u'matchDuration', u'maxPlace',
       u'numGroups', u'rankPoints', u'revives', u'rideDistance', u'roadKills',
       u'swimDistance', u'teamKills', u'vehicleDestroys', u'walkDistance',
       u'weaponsAcquired', u'winPoints', u'winPlacePerc'],
      dtype='object')


In [33]:
trainset_dmatrix = xgb.DMatrix(x_train.values,label=y_train.values)
valset_dmatrix = xgb.DMatrix(x_val.values,label=y_val.values)

In [None]:
params = {
    "max_depth" : 5,
    "eval_metric" : ["mae"],
}

In [None]:
clf = xgb.train(params, trainset_dmatrix, evals=[(trainset_dmatrix, "train"),(valset_dmatrix, 'val')], num_boost_round = 50)

predictions = xgb.predict(valset_dmatrix)

[12:49:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[0]	train-mae:0.201139	val-mae:0.201276
[12:50:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1]	train-mae:0.156018	val-mae:0.15618
[12:50:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2]	train-mae:0.12643	val-mae:0.126615
[12:50:24] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3]	train-mae:0.10606	val-mae:0.106287
[12:50:33] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[4]	train-mae:0.092867	val-mae:0.093095
[12:50:43] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[5]	train-mae:0.085254	val-mae:0.085486
[12:50:53] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 prune