In [1]:
!pip install pandas
!pip install seaborn
!pip install plotly
!pip install -U scikit-learn
!pip install xgboost



In [2]:
import pandas as pd
import joblib
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler

In [3]:
#obtain dataset
file_path = "match_data.csv"
df = pd.read_csv(file_path)

In [4]:
df.describe()

Unnamed: 0,gameId,winner,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,t1_towerKills,t1_inhibitorKills,...,t2_dragonKills,t2_riftHeraldKills,t2_totalGold,t2_totalLevel,t2_avgLevel,t2_totalMinionKills,t2_totalJungleMinionKills,t2_totalKills,t2_totalDeath,t2_totalAssist
count,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,...,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0,49063.0
mean,4830716000.0,1.503271,1.491348,1.157797,1.247926,0.86978,1.199234,1.157736,4.793408,0.847482,...,1.484275,0.635224,59233.734301,73.732793,14.746559,528.604753,126.812914,34.559179,34.524734,59.432587
std,6403314.0,0.499994,0.512254,0.761903,0.719554,0.838967,0.782533,0.738703,3.420099,1.064008,...,1.406972,0.741077,16479.825777,12.636222,2.527244,205.527186,77.241291,17.046219,17.085266,44.278045
min,4745276000.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,12773.0,25.0,5.0,13.0,0.0,0.0,0.0,0.0
25%,4829145000.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,0.0,...,0.0,0.0,48322.5,65.0,13.0,355.0,87.0,22.0,22.0,28.0
50%,4832694000.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,0.0,...,1.0,0.0,60094.0,75.0,15.0,564.0,139.0,33.0,33.0,47.0
75%,4834641000.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,8.0,1.0,...,3.0,1.0,70183.0,84.0,16.8,680.0,182.0,44.0,44.0,74.0
max,4836633000.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,11.0,9.0,...,6.0,2.0,153277.0,90.0,18.0,1325.0,433.0,134.0,129.0,289.0


In [5]:
df.columns

Index(['gameId', 'winner', 'firstBlood', 'firstTower', 'firstInhibitor',
       'firstBaron', 'firstDragon', 'firstRiftHerald', 't1_champ1Name',
       't1_champ2Name', 't1_champ3Name', 't1_champ4Name', 't1_champ5Name',
       't1_towerKills', 't1_inhibitorKills', 't1_baronKills', 't1_dragonKills',
       't1_riftHeraldKills', 't1_totalGold', 't1_totalLevel', 't1_avgLevel',
       't1_totalMinionKills', 't1_totalJungleMinionKills', 't1_totalKills',
       't1_totalDeath', 't1_totalAssist', 't2_champ1Name', 't2_champ2Name',
       't2_champ3Name', 't2_champ4Name', 't2_champ5Name', 't2_towerKills',
       't2_inhibitorKills', 't2_baronKills', 't2_dragonKills',
       't2_riftHeraldKills', 't2_totalGold', 't2_totalLevel', 't2_avgLevel',
       't2_totalMinionKills', 't2_totalJungleMinionKills', 't2_totalKills',
       't2_totalDeath', 't2_totalAssist'],
      dtype='object')

In [None]:
# Proportions of winning team objectives

In [6]:
first_team = df[df.winner==1]
second_team = df[df.winner==2]

In [None]:
# Calculate the number of wins for each team and first blood combination
win_counts = df.groupby(["winner", "firstBlood"]).size().reset_index(name="count")

# Calculate the total number of wins for each team
team_1_wins = win_counts[win_counts["winner"] == 1]["count"].sum()
team_2_wins = win_counts[win_counts["winner"] == 2]["count"].sum()

# Calculate the proportions of wins for team 1 when they had first blood, opponent had first blood, or in rematches
team_1_win_first_blood_for_team_1 = win_counts[(win_counts["winner"] == 1) & (win_counts["firstBlood"] == 1)]["count"].values[0] / team_1_wins
team_1_win_first_blood_for_team_2 = win_counts[(win_counts["winner"] == 1) & (win_counts["firstBlood"] == 2)]["count"].values[0] / team_1_wins
team_1_win_first_blood_for_remake = win_counts[(win_counts["winner"] == 1) & (win_counts["firstBlood"] == 0)]["count"].values[0] / team_1_wins

# Calculate the proportions of wins for team 2 when they had first blood, opponent had first blood, or in rematches
team_2_win_first_blood_for_team_1 = win_counts[(win_counts["winner"] == 2) & (win_counts["firstBlood"] == 1)]["count"].values[0] / team_2_wins
team_2_win_first_blood_for_team_2 = win_counts[(win_counts["winner"] == 2) & (win_counts["firstBlood"] == 2)]["count"].values[0] / team_2_wins
team_2_win_first_blood_for_remake = win_counts[(win_counts["winner"] == 2) & (win_counts["firstBlood"] == 0)]["count"].values[0] / team_2_wins

# Print the results
print(f"Proportion of wins for team 1 when they had the first blood: {team_1_win_first_blood_for_team_1:.1%}"
,f"Proportion of wins for team 1 when the opponent had the first blood: {team_1_win_first_blood_for_team_2:.1%}"
,f"Proportion of wins for team 1 in rematches: {team_1_win_first_blood_for_remake:.1%}",sep='\n')
print(f"Proportion of wins for team 2 when they had the first blood: {team_2_win_first_blood_for_team_1:.1%}"
,f"Proportion of wins for team 2 when the opponent had the first blood: {team_2_win_first_blood_for_team_2:.1%}"
,f"Proportion of wins for team 2 in rematches: {team_2_win_first_blood_for_remake:.1%}",sep='\n')