#### TSV files are essentially identical to CSV files except that TSV files use "tabs (\t)" while CSV files use commas to store data in tabular structure. As a result, loading TSV files are slightly different from how we've been loading CSV files.
##### Thanks to Clara Meister for providing this tutorial.

In [130]:
# For compatibility across multiple platforms
import os
import numpy as np
import pandas as pd
from scipy import spatial


# Load files using DictReader in Python
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import cluster
from sklearn import preprocessing
from sklearn.model_selection import KFold
import sklearn


In [142]:
train_filepath = "pubg-finish-placement-prediction/train_V2.csv"

test_filepath = "pubg-finish-placement-prediction/test_V2.csv"

#trainset_file = open(train_filepath,'rU')
trainset = pd.read_csv(train_filepath)

#testset_file = open(test_filepath,'rU')
testset = pd.read_csv(test_filepath)

trainset.dtypes

Id                  object
groupId             object
matchId             object
assists              int64
boosts               int64
damageDealt        float64
DBNOs                int64
headshotKills        int64
heals                int64
killPlace            int64
killPoints           int64
kills                int64
killStreaks          int64
longestKill        float64
matchDuration        int64
matchType           object
maxPlace             int64
numGroups            int64
rankPoints           int64
revives              int64
rideDistance       float64
roadKills            int64
swimDistance       float64
teamKills            int64
vehicleDestroys      int64
walkDistance       float64
weaponsAcquired      int64
winPoints            int64
winPlacePerc       float64
dtype: object

In [147]:
for row in trainset.iterrows():
    row["Id"] = int(row["Id"],16)
    row["matchId"] = int(row["matchId"],16)
    row["groupId"] = int(row["groupId"],16)



trainset["Id"] = pd.to_numeric(trainset["Id"], errors='coerce').fillna(0).astype(np.int64)
trainset["matchId"] = pd.to_numeric(trainset["matchId"], errors='coerce').fillna(0).astype(np.int64)
trainset["groupId"] = pd.to_numeric(trainset["groupId"], errors='coerce').fillna(0).astype(np.int64)
trainset.head()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0,0,0,0,0,0.0,0,0,0,60,...,0,0.0,0,0.0,0,0,244.8,1,1466,0.4444
1,0,0,0,0,0,91.47,0,0,0,57,...,0,0.0045,0,11.04,0,0,1434.0,5,0,0.64
2,0,0,0,1,0,68.0,0,0,0,47,...,0,0.0,0,0.0,0,0,161.8,2,0,0.7755
3,0,0,0,0,0,32.9,0,0,0,75,...,0,0.0,0,0.0,0,0,202.7,3,0,0.1667
4,0,0,0,0,0,100.0,0,0,0,45,...,0,0.0,0,0.0,0,0,49.75,2,0,0.1875


In [132]:
matchTypeIndex = 15
trainset = trainset.drop(["matchType"],axis=1)
train_columns = list(trainset.columns[:-1])


x = trainset[train_columns]
y = trainset["winPlacePerc"]

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)


In [133]:
def includeFeatures(trainset,featuresToInclude):
    featuresToInclude = set(featuresToInclude)
    for feature in trainset.columns:
        if feature not in featuresToInclude:
            trainset.drop(feature,axis=1)

In [134]:
print(trainset.columns)

Index([u'assists', u'boosts', u'damageDealt', u'DBNOs', u'headshotKills',
       u'heals', u'killPlace', u'killPoints', u'kills', u'killStreaks',
       u'longestKill', u'matchDuration', u'maxPlace', u'numGroups',
       u'rankPoints', u'revives', u'rideDistance', u'roadKills',
       u'swimDistance', u'teamKills', u'vehicleDestroys', u'walkDistance',
       u'weaponsAcquired', u'winPoints', u'winPlacePerc'],
      dtype='object')


In [135]:
trainset_dmatrix = xgb.DMatrix(x_train.values,label=y_train.values)
valset_dmatrix = xgb.DMatrix(x_val.values,label=y_val.values)

In [136]:
params = {
    "max_depth" : 5,
    "eval_metric" : ["mae"],
}

In [137]:

clf = xgb.train(params, trainset_dmatrix, evals=[(trainset_dmatrix, "train"),(valset_dmatrix, 'val')], num_boost_round = 50)

predictions = xgb.predict(valset_dmatrix)

[17:57:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[0]	train-mae:nan	val-mae:0.269341
[17:57:58] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[1]	train-mae:nan	val-mae:0.269341
[17:58:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[2]	train-mae:nan	val-mae:0.269341
[17:58:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[3]	train-mae:nan	val-mae:0.269341
[17:58:07] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[4]	train-mae:nan	val-mae:0.269341
[17:58:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[5]	train-mae:nan	val-mae:0.269341
[17:58:12] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 0 pruned nodes, max_depth=0
[6]	train-mae

AttributeError: 'module' object has no attribute 'predict'