# Import Important Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import psutil
import gc
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor

gc.collect()
sns.set_style("darkgrid")

# Ignore Warnings

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 
import warnings
warnings.filterwarnings(action='ignore')

# Import Dataset

In [2]:
TrainData = pd.read_csv('train_V2.csv')

# Group Data By Match Id

In [None]:
TrainData.groupby(['matchId']).mean()

# Dataset Information

In [None]:
TrainData.shape

In [None]:
TrainData.head()

In [None]:
TrainData.tail()

In [None]:
TrainData.info(memory_usage = "deep")

In [None]:
# Memory Optimization
for column in TrainData:
 if TrainData[column].dtype == 'float64':
    TrainData[column]=pd.to_numeric(TrainData[column], downcast='float')
 if TrainData[column].dtype == 'int64':
    TrainData[column]=pd.to_numeric(TrainData[column], downcast='integer')

In [None]:
TrainData.info(memory_usage = "deep")

In [None]:
TrainData.describe().T

In [None]:
TrainData.columns

In [None]:
TrainData.nunique()

# Data Correlation

In [None]:
TrainData.corr().abs()

In [None]:
# Colerration Matrix
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(TrainData.corr().abs(),annot=True, cmap="BrBG", ax=ax);

# Dealing With Missing Values

In [3]:
TrainData.isnull().sum()

Id                 0
groupId            0
matchId            0
assists            0
boosts             0
damageDealt        0
DBNOs              0
headshotKills      0
heals              0
killPlace          0
killPoints         0
kills              0
killStreaks        0
longestKill        0
matchDuration      0
matchType          0
maxPlace           0
numGroups          0
rankPoints         0
revives            0
rideDistance       0
roadKills          0
swimDistance       0
teamKills          0
vehicleDestroys    0
walkDistance       0
weaponsAcquired    0
winPoints          0
winPlacePerc       1
dtype: int64

In [4]:
TrainData.dropna(inplace=True)

# Detect Duplicated Data

In [None]:
TrainData.duplicated().sum()

# Stripplot

In [None]:
fig,((ax1, ax2), (ax3, ax4), (ax5, ax6),(ax7, ax8))= plt.subplots(ncols=2, nrows=4, figsize=(22,22))

sns.stripplot(data=TrainData, x='assists', ax=ax1).set_title("Assists Stripplot");

sns.stripplot(data=TrainData, x= 'damageDealt', ax=ax2).set_title("Damage Dealt Stripplot");

sns.stripplot(data= TrainData, x= 'boosts', ax=ax3).set_title('Boosts Stripplot');

sns.stripplot(data=TrainData, x='DBNOs', ax=ax4).set_title('DBNOs Stripplot');

sns.stripplot(data=TrainData, x='headshotKills', ax=ax5).set_title('Head Shot Kills Strippolt');

sns.stripplot(data= TrainData, x='heals', ax=ax6).set_title('Heals Stripplot');

sns.stripplot(data=TrainData, x='killPlace', ax=ax7).set_title('Kill Place Stripplot');

sns.stripplot(data=TrainData, x='killPoints', ax=ax8).set_title('Kill Points Stripplot');

In [None]:
fig,((ax9, ax10),(ax11, ax12),(ax13, ax14))= plt.subplots(ncols=2, nrows=3, figsize=(22, 22))

sns.stripplot(data=TrainData, x='kills', ax=ax9).set_title('Kills Stripplot');

sns.stripplot(data=TrainData, x='killStreaks', ax=ax10).set_title('Kill Streaks Stripplot');

sns.stripplot(data=TrainData, x = 'longestKill', ax=ax11).set_title('Kills Stripplot');

sns.stripplot(data=TrainData, x='matchDuration', ax=ax12).set_title('Match Duration Stripplot');

sns.stripplot(data=TrainData, x = 'maxPlace', ax=ax13).set_title('Max Place Stripplot');

sns.stripplot(data=TrainData, x= 'rankPoints', ax=ax14).set_title('Rank Points Stripplot');

In [None]:
fig,( (ax15,ax16, ax17), (ax18, ax19, ax20), (ax21, ax22, ax23))= plt.subplots(ncols=3, nrows=3, figsize=(22,22))

sns.stripplot(data=TrainData, x='revives', ax=ax15).set_title('Revives Stripplot');

sns.stripplot(data=TrainData, x='rideDistance', ax=ax16).set_title("Ride Distance Stripplot");

sns.stripplot(data=TrainData, x= 'roadKills', ax=ax17).set_title("Road Kills Stripplot");

sns.stripplot(data= TrainData, x= 'swimDistance', ax=ax18).set_title('Swim Distance Stripplot');

sns.stripplot(data=TrainData, x='teamKills', ax=ax19).set_title('Team Kills Stripplot');

sns.stripplot(data=TrainData, x='vehicleDestroys', ax=ax20).set_title('Vehicle Destroys Strippolt');

sns.stripplot(data= TrainData, x='walkDistance', ax=ax21).set_title('Walk Distance Stripplot');

sns.stripplot(data=TrainData, x='weaponsAcquired', ax=ax22).set_title('Weapons Acquired Stripplot');

sns.stripplot(data=TrainData, x='winPoints', ax=ax23).set_title('Win Points Stripplot');

# Count Plot

In [None]:
sns.countplot(data=TrainData, x='matchType');
plt.xticks(rotation=60);

# Scatter Plot

In [None]:
fig,((ax1, ax2), (ax3, ax4), (ax5, ax6),(ax7, ax8))= plt.subplots(ncols=2, nrows=4, figsize=(22,22))

sns.scatterplot(data=TrainData, x='assists', y='winPlacePerc', ax=ax1).set_title("Relationship Between Assists And Win Place Perc");

sns.scatterplot(data=TrainData, x= 'damageDealt',  y='winPlacePerc', ax=ax2).set_title("Relationship Between Damage Dealt And Win Place Perc");

sns.scatterplot(data= TrainData, x= 'boosts',  y='winPlacePerc', ax=ax3).set_title("Relationship Between Boosts And Win Place Perc");

sns.scatterplot(data=TrainData, x='DBNOs',  y='winPlacePerc', ax=ax4).set_title("Relationship BetweenDBNOs And Win Place Perc");

sns.scatterplot(data=TrainData, x='headshotKills',  y='winPlacePerc', ax=ax5).set_title("Relationship Between Head Shot Kills And Win Place Perc");

sns.scatterplot(data= TrainData, x='heals',  y='winPlacePerc', ax=ax6).set_title("Relationship Between Heals And Win Place Perc");

sns.scatterplot(data=TrainData, x='killPlace',  y='winPlacePerc', ax=ax7).set_title("Relationship Between Kill Place And Win Place Perc");

sns.scatterplot(data=TrainData, x='killPoints',  y='winPlacePerc', ax=ax8).set_title("Relationship Between Kill Points And Win Place Perc");

In [None]:
fig, ((ax9, ax10), (ax11, ax12), (ax13, ax14)) = plt.subplots(ncols=2, nrows=3, figsize=(22, 22))

sns.scatterplot(data=TrainData, x='kills', y='winPlacePerc', ax=ax9).set_title("Relationship Between Kills And Win Place Perc");

sns.scatterplot(data=TrainData, x='killStreaks', y='winPlacePerc',  ax=ax10).set_title("Relationship Between Kill Streaks And Win Place Perc");

sns.scatterplot(data=TrainData, x = 'longestKill',y='winPlacePerc',  ax=ax11).set_title("Relationship Between Longest Kill And Win Place Perc");

sns.scatterplot(data=TrainData, x='matchDuration', y='winPlacePerc',  ax=ax12).set_title("Relationship Between Match Duration And Win Place Perc");

sns.scatterplot(data=TrainData, x = 'maxPlace', y='winPlacePerc', ax=ax13).set_title("Relationship Between Max Place And Win Place Perc");

sns.scatterplot(data=TrainData, x= 'rankPoints', y='winPlacePerc', ax=ax14).set_title("Relationship Between Rank Points And Win Place Perc");


In [None]:
fig, ((ax15, ax16, ax17), (ax18, ax19, ax20), (ax21, ax22, ax23)) = plt.subplots(nrows=3, ncols=3, figsize=(22, 22));

sns.scatterplot(data=TrainData, x='rideDistance', y='winPlacePerc', ax=ax15).set_title('Relationship Between Ride Distance And Win Place Perc');

sns.scatterplot(data=TrainData, x='roadKills', y='winPlacePerc', ax=ax16).set_title('Relationship Betweeen Road Kills And Win Place Perc');

sns.scatterplot(data=TrainData, x='swimDistance', y='winPlacePerc', ax=ax17).set_title('Relationship Between Swim Distance And Win Place Perc');

sns.scatterplot(data=TrainData, x='teamKills', y='winPlacePerc', ax=ax18).set_title('Relationship Between Team Kills And Win Place Perc');

sns.scatterplot(data=TrainData, x='vehicleDestroys', y='winPlacePerc', ax=ax19).set_title('Relationship Between Vehicle Destroys And Win Place Perc');

sns.scatterplot(data=TrainData, x='walkDistance', y='winPlacePerc', ax=ax20).set_title('Relationship Between Walk Distance And Win Place Perc');

sns.scatterplot(data=TrainData, x='weaponsAcquired', y='winPlacePerc', ax=ax21).set_title('Relationship Between Weapons Acquired And Win Place Perc');

sns.scatterplot(data=TrainData, x='winPoints', y='winPlacePerc', ax=ax22).set_title('Relationship Between Win Points And Win Place Perc');

sns.scatterplot(data=TrainData, x='revives', y='winPlacePerc', ax=ax23).set_title("Relationship Between Revives And Win Place Perc");

# Player Travel Distance

In [None]:
walk = TrainData["walkDistance"] == 0
ride = TrainData["rideDistance"] == 0
swim = TrainData["swimDistance"] == 0
print("{} of Players walk for 0 meter, {} players drive for 0 meter and {} swim for 0 meter." .format(walk.sum(),ride.sum(),swim.sum()))

# Player Using Heals

In [None]:
heals = TrainData["heals"] > 0
print("{} of Players use heals ." .format(heals.sum()))

# Player Killd With Head Shot

In [None]:
headshot = TrainData["headshotKills"] > 0
print("{} of Players killed with headshot ." .format(headshot.sum()))

# Kills

In [None]:
kills = TrainData["kills"] > 1
print("{} of player kill at least one enemy ." .format(kills.sum()))

# Weapons Acquired

In [None]:
weapon = TrainData["weaponsAcquired"] > 1
print("{} of player picked up at least one weapon ." .format(weapon.sum()))

# Head Shot Rate

In [None]:
HeadShotRate =( sum(TrainData['headshotKills']) / sum(TrainData['kills'])) 
HeadShotRate

# Longest Kill Rate

In [None]:
LongestKillRate =( sum(TrainData['longestKill']) / sum(TrainData['kills'])) 
LongestKillRate

# Analyzing Winner Players

In [None]:
Winner = TrainData[TrainData['winPlacePerc']==1]

# Winner Players Data Correlation

In [None]:
Winner.corr().abs()

In [None]:
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(Winner.corr().abs(), square=True,annot=True, cmap="BrBG", ax=ax);

# Winner Player Visualization

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8)) = plt.subplots(nrows=4, ncols=2, figsize=(20,20))

sns.histplot(Winner['assists'], ax=ax1);
ax1.set(title = 'Assist Histogram Plot For Winner Players', xlabel='Assist', ylabel='Count');

sns.histplot(Winner['boosts'],ax=ax2)
ax2.set(title = 'Boots Histogram Plot For Winner Players', xlabel = 'Boots', ylabel='Count');

sns.histplot(Winner['damageDealt'],ax=ax3)
ax3.set(title = 'Damage Dealt Histogram Plot For Winner Players', xlabel='Damage Dealt', ylabel='Count');

sns.histplot(Winner['DBNOs'],ax=ax4)
ax4.set(title='DBNOs Histogram Plot For Winner Players', xlabel='DBNOs', ylabel='Count');

sns.histplot(Winner['headshotKills'],ax=ax5)
ax5.set(title='Head Shot Kills Histogram Plot For Winner Players', xlabel='Head Shot Kills', ylabel='Count');

sns.histplot(Winner['heals'],ax=ax6)
ax6.set(title='Heals Histogram Plot For Winner Player', xlabel='heals', ylabel='Count');

sns.histplot(Winner['killPlace'],ax=ax7)
ax7.set(title='Kill Place Histogram Plot For Winner Player', xlabel='Kill Place', ylabel='Count');

sns.histplot(Winner['killPoints'],ax=ax8)
ax8.set(title='Kill Points Histogram Plot For Winner Player', xlabel='Kill Points', ylabel='Count');


In [None]:
fig, ((ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16)) = plt.subplots(nrows=4, ncols=2, figsize=(20,20))

sns.histplot(Winner['kills'], ax=ax9);
ax9.set(title = 'Kills Histogram Plot For Winner Players', xlabel='Kills', ylabel='Count');

sns.histplot(Winner['killStreaks'],ax=ax10)
ax10.set(title = 'Kill Streaks Histogram Plot For Winner Players', xlabel = 'Kill Streaks', ylabel='Count');

sns.histplot(Winner['longestKill'],ax=ax11)
ax11.set(title = 'Longest Kill Histogram Plot For Winner Players', xlabel='Longest Kill', ylabel='Count');

sns.histplot(Winner['matchDuration'],ax=ax12)
ax12.set(title='Match Duration Histogram Plot For Winner Players', xlabel='Match Duration', ylabel='Count');

sns.histplot(Winner['maxPlace'],ax=ax13)
ax13.set(title='Max Place Histogram Plot For Winner Player', xlabel='Max Place', ylabel='Count');

sns.histplot(Winner['rankPoints'],ax=ax14)
ax14.set(title='Rank Points Histogram Plot For Winner Player', xlabel='Rank Points', ylabel='Count');

sns.histplot(Winner['revives'],ax=ax15)
ax15.set(title='Revives Histogram Plot For Winner Player', xlabel='Revives', ylabel='Count');

sns.histplot(Winner['matchType'],ax=ax16)
ax16.set(title='Match Type For Winner Player', xlabel='Match Type', ylabel='Count');

plt.xticks(rotation=60);

In [None]:
fig, ((ax17, ax18), (ax19, ax20), (ax21, ax22), (ax23, ax24)) = plt.subplots(nrows=4, ncols=2, figsize=(20,20))

sns.histplot(Winner['rideDistance'], ax=ax17);
ax17.set(title = 'Ride Distance Histogram Plot For Winner Players', xlabel='Ride Distance', ylabel='Count');

sns.histplot(Winner['roadKills'],ax=ax18)
ax18.set(title = 'Road Kills Histogram Plot For Winner Players', xlabel = 'Road Kills', ylabel='Count');

sns.histplot(Winner['swimDistance'],ax=ax19)
ax19.set(title = 'Swim Distance Histogram Plot For Winner Players', xlabel=' Swim Distance', ylabel='Count');

sns.histplot(Winner['teamKills'],ax=ax20)
ax20.set(title='Team Kills Histogram Plot For Winner Players', xlabel='Team Kills', ylabel='Count');

sns.histplot(Winner['vehicleDestroys'],ax=ax21)
ax21.set(title='Vehicle Destroys Histogram Plot For Winner Players', xlabel='Vehicle Destroys', ylabel='Count');

sns.histplot(Winner['walkDistance'],ax=ax22)
ax22.set(title='Walk Distance Histogram Plot For Winner Player', xlabel='Walk Distance', ylabel='Count');

sns.histplot(Winner['weaponsAcquired'],ax=ax23)
ax23.set(title='Weapons Acquired Histogram Plot For Winner Player', xlabel='Weapons Acquired', ylabel='Count');

sns.histplot(Winner['winPoints'],ax=ax24)
ax24.set(title='Win Points Histogram Plot For Winner Player', xlabel='Win Points', ylabel='Count');


# Player Win Without Acquired Any Weapons

In [None]:
weapon = Winner['weaponsAcquired'] == 0
print("{} of Players win without acquired any weapon " .format(weapon.sum()))

# Player Win Without Kill Any One

In [None]:
Kill = Winner['kills'] == 0
print("{} of Players win without kill anyone" .format(Kill.sum()))

# Player Win Without Use Any Heals

In [None]:
heals = Winner['heals'] == 0
print("{} of Players win without using heals" .format(heals.sum()))

# Player Win Without Travel

In [None]:
walk1 = Winner["walkDistance"] == 0
ride1 = Winner["rideDistance"] == 0
swim1 = Winner["swimDistance"] == 0
print("{} of Players win without walk at all, {} players win without drive and {} win without swim." .format(walk1.sum(),ride1.sum(),swim1.sum()))

# Analyzing Losser Player 

In [None]:
Losser = TrainData[TrainData['winPlacePerc']==0]

# Losser Player Data Correlation

In [None]:
Losser.corr().abs()

In [None]:
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(Winner.corr().abs(), square=True,annot=True, cmap="BrBG", ax=ax);

# Losser Player Data Visualization

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8)) = plt.subplots(nrows=4, ncols=2, figsize=(20,20))

sns.histplot(Losser['assists'], ax=ax1);
ax1.set(title = 'Assist Histogram Plot For Losser Players', xlabel='Assist', ylabel='Count');

sns.histplot(Losser['boosts'],ax=ax2)
ax2.set(title = 'Boots Histogram Plot For Losser Players', xlabel = 'Boots', ylabel='Count');

sns.histplot(Losser['damageDealt'],ax=ax3)
ax3.set(title = 'Damage Dealt Histogram Plot For Losser Players', xlabel='Damage Dealt', ylabel='Count');

sns.histplot(Losser['DBNOs'],ax=ax4)
ax4.set(title='DBNOs Histogram Plot For Losser Players', xlabel='DBNOs', ylabel='Count');

sns.histplot(Losser['headshotKills'],ax=ax5)
ax5.set(title='Head Shot Kills Histogram Plot For Losser Players', xlabel='Head Shot Kills', ylabel='Count');

sns.histplot(Losser['heals'],ax=ax6)
ax6.set(title='Heals Histogram Plot For Losser Player', xlabel='heals', ylabel='Count');

sns.histplot(Losser['killPlace'],ax=ax7)
ax7.set(title='Kill Place Histogram Plot For Losser Player', xlabel='Kill Place', ylabel='Count');

sns.histplot(Losser['killPoints'],ax=ax8)
ax8.set(title='Kill Points Histogram Plot For Losser Player', xlabel='Kill Points', ylabel='Count');


In [None]:
fig, ((ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16)) = plt.subplots(nrows=4, ncols=2, figsize=(20,20))

sns.histplot(Losser['kills'], ax=ax9,);
ax9.set(title = 'Kills Histogram Plot For Losser Players', xlabel='Kills', ylabel='Count');

sns.histplot(Losser['killStreaks'],ax=ax10)
ax10.set(title = 'Kill Streaks Histogram Plot For Losser Players', xlabel = 'Kill Streaks', ylabel='Count');

sns.histplot(Losser['longestKill'],ax=ax11)
ax11.set(title = 'Longest Kill Histogram Plot For Losser Players', xlabel='Longest Kill', ylabel='Count');

sns.histplot(Losser['matchDuration'],ax=ax12)
ax12.set(title='Match Duration Histogram Plot For Losser Players', xlabel='Match Duration', ylabel='Count');

sns.histplot(Losser['maxPlace'],ax=ax13)
ax13.set(title='Max Place Histogram Plot For Losser Player', xlabel='Max Place', ylabel='Count');

sns.histplot(Losser['rankPoints'],ax=ax14)
ax14.set(title='Rank Points Histogram Plot For Losser Player', xlabel='Rank Points', ylabel='Count');

sns.histplot(Losser['revives'],ax=ax15)
ax15.set(title='Revives Histogram Plot For Losser Player', xlabel='Revives', ylabel='Count');

sns.histplot(Losser['matchType'],ax=ax16)
ax16.set(title='Match Type For Losser Player', xlabel='Match Type', ylabel='Count');

plt.xticks(rotation=60);

In [None]:
fig, ((ax17, ax18), (ax19, ax20), (ax21, ax22), (ax23, ax24)) = plt.subplots(nrows=4, ncols=2, figsize=(20,20))

sns.histplot(Losser['rideDistance'], ax=ax17);
ax17.set(title = 'Ride Distance Histogram Plot For Losser Players', xlabel='Ride Distance', ylabel='Count');

sns.histplot(Losser['roadKills'],ax=ax18)
ax18.set(title = 'Road Kills Histogram Plot For Losser Players', xlabel = 'Road Kills', ylabel='Count');

sns.histplot(Losser['swimDistance'],ax=ax19)
ax19.set(title = 'Swim Distance Histogram Plot For Losser Players', xlabel=' Swim Distance', ylabel='Count');

sns.histplot(Losser['teamKills'],ax=ax20)
ax20.set(title='Team Kills Histogram Plot For Losser Players', xlabel='Team Kills', ylabel='Count');

sns.histplot(Losser['vehicleDestroys'],ax=ax21)
ax21.set(title='Vehicle Destroys Histogram Plot For Losser Players', xlabel='Vehicle Destroys', ylabel='Count');

sns.histplot(Losser['walkDistance'],ax=ax22)
ax22.set(title='Walk Distance Histogram Plot For Losser Player', xlabel='Walk Distance', ylabel='Count');

sns.histplot(Losser['weaponsAcquired'],ax=ax23)
ax23.set(title='Weapons Acquired Histogram Plot For Losser Player', xlabel='Weapons Acquired', ylabel='Count');

sns.histplot(Losser['winPoints'],ax=ax24)
ax24.set(title='Win Points Histogram Plot For Losser Player', xlabel='Win Points', ylabel='Count');


# Analyzing Kills 

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7,ax8), (ax9,ax10), (ax11, ax12)) = plt.subplots(nrows=6, ncols=2, figsize=(30,30))

sns.scatterplot(data=TrainData, x='killPlace', y='kills', ax=ax1).set_title('Relationship Between Kills And Kill Place');

sns.scatterplot(data=TrainData, x='killPoints', y='kills', ax=ax2).set_title('Relationship Between Kills And Kill Points');

sns.scatterplot(data=TrainData, x='headshotKills', y='kills', ax=ax3).set_title('Relationship Between Kills And  Head shot Kills');

sns.scatterplot(data=TrainData, x='longestKill', y='kills', ax=ax4).set_title('Relationship Between Kills And Longest Kill');

sns.scatterplot(data=TrainData, x='killStreaks', y='kills', ax=ax5).set_title('Relationship Between Kills And kill Streaks');

sns.scatterplot(data=TrainData, x='roadKills', y='kills', ax=ax6).set_title('Relationship Between Kills And Road Kills ');

sns.scatterplot(data=TrainData, x='heals', y='kills', ax=ax7).set_title('Relationship Between Kills And Heals');

sns.scatterplot(data=TrainData, x='swimDistance', y='kills', ax=ax8).set_title('Relationship Between Kills And Swim Distance');

sns.scatterplot(data=TrainData, x='walkDistance', y='kills', ax=ax9).set_title('Relationship Between Kills And Walk Distance');

sns.scatterplot(data=TrainData, x='rideDistance', y='kills', ax=ax10).set_title('Relationship Between Kills And Ride Distance');

sns.scatterplot(data=TrainData, x='weaponsAcquired', y='kills', ax=ax11).set_title('Relationship Between Kill And Weapons Acquired');

sns.scatterplot(data=TrainData, x='vehicleDestroys', y='kills', ax=ax12).set_title('Relationship Between Kills And Vehicle Destroys');

# Drop Features

In [5]:
TrainData.drop(['Id', 'groupId', 'matchId', 'numGroups'], inplace=True, axis=1)

# Label Encoding

In [6]:
LabelEncoder = LabelEncoder()
TrainData['matchType'] = LabelEncoder.fit_transform(TrainData['matchType'] )

# Normalization

In [7]:
Norm = MinMaxScaler()
TrainData['assists'] = Norm.fit_transform(np.array(TrainData['assists']).reshape(-1,1))
TrainData['boosts'] = Norm.fit_transform(np.array(TrainData['boosts']).reshape(-1,1))
TrainData['damageDealt'] = Norm.fit_transform(np.array(TrainData['damageDealt']).reshape(-1, 1))
TrainData['DBNOs'] = Norm.fit_transform(np.array(TrainData['DBNOs']).reshape(-1, 1))
TrainData['headshotKills'] = Norm.fit_transform(np.array(TrainData['headshotKills']).reshape(-1,1))
TrainData['heals'] = Norm.fit_transform(np.array(TrainData['heals']).reshape(-1,1))
TrainData['killPlace'] = Norm.fit_transform(np.array(TrainData['killPlace']).reshape(-1,1))
TrainData['killPoints'] = Norm.fit_transform(np.array(TrainData['killPoints']).reshape(-1, 1))
TrainData['kills'] = Norm.fit_transform(np.array(TrainData['kills']).reshape(-1,1))
TrainData['killStreaks'] = Norm.fit_transform(np.array(TrainData['killStreaks']).reshape(-1,1))
TrainData['longestKill'] = Norm.fit_transform(np.array(TrainData['longestKill']).reshape(-1,1))
TrainData['matchDuration'] = Norm.fit_transform(np.array(TrainData['matchDuration']).reshape(-1, 1))
TrainData['maxPlace'] = Norm.fit_transform(np.array(TrainData['maxPlace']).reshape(-1,1))
TrainData['rankPoints'] = Norm.fit_transform(np.array(TrainData['rankPoints']).reshape(-1, 1))
TrainData['revives'] = Norm.fit_transform(np.array(TrainData['revives']).reshape(-1,1))
TrainData['rideDistance'] = Norm.fit_transform(np.array(TrainData['rideDistance']).reshape(-1,1))
TrainData['roadKills'] = Norm.fit_transform(np.array(TrainData['roadKills']).reshape(-1,1))
TrainData['swimDistance'] = Norm.fit_transform(np.array(TrainData['swimDistance']).reshape(-1,1))
TrainData['teamKills'] = Norm.fit_transform(np.array(TrainData['teamKills']).reshape(-1,1))
TrainData['vehicleDestroys'] = Norm.fit_transform(np.array(TrainData['vehicleDestroys']).reshape(-1,1))
TrainData['walkDistance'] = Norm.fit_transform(np.array(TrainData['walkDistance']).reshape(-1, 1))
TrainData['weaponsAcquired'] = Norm.fit_transform(np.array(TrainData['weaponsAcquired']).reshape(-1,1))
TrainData['winPoints'] = Norm.fit_transform(np.array(TrainData['winPoints']).reshape(-1,1))
TrainData['matchType'] = Norm.fit_transform(np.array(TrainData['matchType']).reshape(-1,1))

# Detect Outliers


In [None]:
fig,((ax1, ax2), (ax3, ax4), (ax5, ax6))= plt.subplots(ncols=2, nrows=3, figsize=(22,22))

sns.boxplot(data=TrainData, x='assists', ax=ax1);

sns.boxplot(data=TrainData, x= 'damageDealt', ax=ax2);

sns.boxplot(data= TrainData, x= 'boosts', ax=ax3);

sns.boxplot(data=TrainData, x='DBNOs', ax=ax4);

sns.boxplot(data=TrainData, x='headshotKills', ax=ax5);

sns.boxplot(data= TrainData, x='heals', ax=ax6);




In [None]:
fig,((ax7, ax8, ax9), (ax10, ax11, ax12), (ax13, ax14, ax15))= plt.subplots(ncols=3, nrows=3, figsize=(22, 22))

sns.boxplot(data=TrainData, x='killPlace', ax=ax7);

sns.boxplot(data=TrainData, x='kills', ax=ax8);

sns.boxplot(data=TrainData, x='killStreaks', ax=ax9);

sns.boxplot(data=TrainData, x = 'longestKill', ax=ax10);

sns.boxplot(data=TrainData, x='matchDuration', ax=ax11);

sns.boxplot(data=TrainData, x = 'maxPlace', ax=ax12);

sns.boxplot(data=TrainData, x= 'rankPoints', ax=ax13);

sns.boxplot(data=TrainData, x='revives', ax=ax14);

sns.boxplot(data=TrainData, x= 'revives', ax=ax15);

In [None]:
fig,((ax16, ax17), (ax18, ax19), (ax20, ax21),(ax22, ax23))= plt.subplots(ncols=2, nrows=4, figsize=(22,22))

sns.boxplot(data=TrainData, x='rideDistance', ax=ax16).set_title("Ride Distance Stripplot");

sns.boxplot(data=TrainData, x= 'roadKills', ax=ax17).set_title("Road Kills Stripplot");

sns.boxplot(data= TrainData, x= 'swimDistance', ax=ax18).set_title('Swim Distance Stripplot');

sns.boxplot(data=TrainData, x='teamKills', ax=ax19).set_title('Team Kills Stripplot');

sns.boxplot(data=TrainData, x='vehicleDestroys', ax=ax20).set_title('Vehicle Destroys Strippolt');

sns.boxplot(data= TrainData, x='walkDistance', ax=ax21).set_title('Walk Distance Stripplot');

sns.boxplot(data=TrainData, x='weaponsAcquired', ax=ax22).set_title('Weapons Acquired Stripplot');

sns.boxplot(data=TrainData, x='winPoints', ax=ax23).set_title('Win Points Stripplot');

# Remove Outliers

In [8]:
for i in TrainData:
          q75,q25 = np.percentile(TrainData.loc[:,i],[75,25])
          intr_qr = q75-q25
 
          max = q75+(1.5*intr_qr)
          min = q25-(1.5*intr_qr)
 
          TrainData.loc[TrainData[i] < min,i] = np.nan
          TrainData.loc[TrainData[i] > max,i] = np.nan

In [9]:
TrainData.dropna(inplace=True)
TrainData.shape

(1644128, 25)

# Split The Data Into X And y

In [10]:
X = TrainData.drop(['winPlacePerc'], inplace=False, axis=1)
y = TrainData['winPlacePerc']

# Models

**A. Decision Tree**

In [11]:
kf = KFold(n_splits=3)
meanabsoluteerrorDT = []
meansquarederrorDT = []


for train_index, test_index in kf.split(X,y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    DTParam = {
                    'max_depth':[ 5, 6, 7, 8, 9, 10],     
              }
 
    DTGrid = GridSearchCV(estimator=DecisionTreeRegressor(random_state = 0, criterion='squared_error'), 
                                param_grid=DTParam,
                                cv=2,
                                refit = True,
                                verbose = -1,
                                return_train_score = True,
                                )

    # Fit The Decision Tree
    DTGrid.fit(X_train, y_train)
    y_pred = DTGrid.predict(X_test)

    meanabsoluteerrorDT.append(mean_absolute_error(y_test, y_pred))
    maeDT = pd.DataFrame(meanabsoluteerrorDT, columns=['Mean Absulote Error'])


    meansquarederrorDT.append(mean_squared_error(y_test, y_pred))
    mseDT = pd.DataFrame(meansquarederrorDT, columns=['r'])


print(" ")
print("The Best Decision Tree Parameters Are:", DTGrid.best_params_)
print(" ")
print("Decision Tree Mean Absolute Error:", np.mean(maeDT))
print('')
print('Decision Tree Mean Squred Error:', np.mean(mseDT))

 
The Best Decision Tree Parameters Are: {'max_depth': 10}
 
Decision Tree Mean Absolute Error: Mean Absulote Error    0.054078
dtype: float64

Decision Tree Mean Squred Error: r    0.006246
dtype: float64


  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


**C. Linear Regression**

In [12]:
kf = KFold(n_splits=3)
meanabsoluteerrorLR = []
meansquarederrorLR = []

for train_index, test_index in kf.split(X,y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    lr =LinearRegression()

    # Fit The Decision Tree
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)

    meanabsoluteerrorLR.append(mean_absolute_error(y_test, y_pred))
    maeLR = pd.DataFrame(meanabsoluteerrorLR, columns=['Mean Absulote Error'])


    meansquarederrorLR.append(mean_squared_error(y_test, y_pred))
    mseLR = pd.DataFrame(meansquarederrorLR, columns=['Mean Squred Error'])

print(" ")
print('Linear Regression Mean Absolutle Error Is', maeLR.mean())
print(" ")
print('Linear Regression Mean Squrad Error Is', mseLR.mean())

 
Linear Regression Mean Absolutle Error Is Mean Absulote Error    0.071917
dtype: float64
 
Linear Regression Mean Squrad Error Is Mean Squred Error    0.012144
dtype: float64


**C. Random forest**

In [13]:
kf = KFold(n_splits=3)
meanabsoluteerrorRF = []
meansquarederrorRF = []

for train_index, test_index in kf.split(X,y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    RFDTParam = {
                    
                    'max_depth':[ 5, 6, 7, 8, 9, 10],     
              }
 
    RFGrid = GridSearchCV(estimator=RandomForestRegressor(random_state = 0, n_estimators=10, criterion='squared_error'), 
                                param_grid=RFDTParam,
                                cv=2,
                                refit = True,
                                verbose = -1,
                                return_train_score = True,
                                )

    
    RFGrid.fit(X_train, y_train)
    y_pred = RFGrid.predict(X_test)

    meanabsoluteerrorRF.append(mean_absolute_error(y_test, y_pred))
    maeRF = pd.DataFrame(meanabsoluteerrorRF, columns=['Mean Absulote Error'])

    meansquarederrorRF.append(mean_squared_error(y_test, y_pred))
    mseRF = pd.DataFrame(meansquarederrorRF, columns=['Mean Squred Error'])


print(" ")
print("The Best Random Forest Parameters Are:", DTGrid.best_params_)
print(" ")
print('Random Forest Mean Absolutle Error Is', maeRF.mean())
print(" ")
print('Random Forest Mean Squred Erroe Error Is', mseRF.mean())


 
The Best Random Forest Parameters Are: {'max_depth': 10}
 
Random Forest Mean Absolutle Error Is Mean Absulote Error    0.053207
dtype: float64
 
Random Forest Mean Squred Erroe Error Is Mean Squred Error    0.006011
dtype: float64
