In [2]:
import os
from urllib.request import urlretrieve
from zipfile import ZipFile

# constants
URL = "https://www.kaggle.com/api/v1/datasets/download/datasnaek/league-of-legends"
DATA_DIR = "data/"
ZIP_NAME = "compressed.zip"

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

os.chdir(os.path.join(os.getcwd(), DATA_DIR))

if not os.path.exists(ZIP_NAME):
    # scarica il file dall'URL indicato
    urlretrieve(URL, ZIP_NAME)
    # apri il file zip ed estrai tutto il contenuto nella directory corrente
    with ZipFile(ZIP_NAME) as f:
        f.extractall()

In [3]:
import pandas as pd

games_data = pd.read_csv('games.csv', index_col=['gameId'])

In [4]:
import json

# Carica il file
with open("champion_info.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Accedi alla parte utile dei dati (data["data"] è un dizionario con campioni)
champion_data = data["data"]

# Trasforma il dizionario in DataFrame
df_champions_keyID = pd.DataFrame.from_dict(champion_data, orient="index")
# Carica il file
with open("champion_info_2.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Accedi alla parte utile dei dati (data["data"] è un dizionario con campioni)
champion_data = data["data"]

# Trasforma il dizionario in DataFrame
df_champions_nameID = pd.DataFrame.from_dict(champion_data, orient="index")

In [5]:
games_data["t1_hasDragonSoul"] = games_data["t1_dragonKills"] >= 4
games_data["t2_hasDragonSoul"] = games_data["t2_dragonKills"] >= 4

In [6]:
def win_conditional_prob(data: pd.DataFrame, on_column: str):
    intersect_count = len(data[(data["winner"] == 1) & (data[on_column] == 1)])
    on_column_count = len(data[data[on_column] == 1])
    
    return intersect_count / on_column_count

In [None]:
import numpy as np
import matplotlib.pyplot as plt

corr_frame = games_data.copy()
columns = ['winner', 'firstBlood', 'firstTower', 'firstInhibitor', 'firstBaron', 'firstDragon', 'firstRiftHerald', 't1_hasDragonSoul', 't2_hasDragonSoul']

for column in columns:
    corr_frame[column] = corr_frame[column] == 1
    

corr_frame[columns].corr().style.format('{:.2f}', na_rep="")\
    .bar(align=0, vmin=-1, vmax=1, cmap="RdYlGn", height=50, width=60)

firstBlood: 59.48%
firstTower: 70.84%
firstInhibitor: 90.87%
firstBaron: 81.16%
firstDragon: 68.60%
firstRiftHerald: 69.45%
t1_hasDragonSoul: 81.57%
t2_hasDragonSoul: 18.27%


Unnamed: 0,winner,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,t1_hasDragonSoul,t2_hasDragonSoul
winner,1.0,0.18,0.41,0.72,0.39,0.34,0.22,0.14,-0.15
firstBlood,0.18,1.0,0.22,0.16,0.06,0.14,0.11,0.03,-0.02
firstTower,0.41,0.22,1.0,0.4,0.15,0.32,0.26,0.08,-0.08
firstInhibitor,0.72,0.16,0.4,1.0,0.42,0.32,0.24,0.16,-0.11
firstBaron,0.39,0.06,0.15,0.42,1.0,0.17,0.13,0.16,-0.02
firstDragon,0.34,0.14,0.32,0.32,0.17,1.0,0.19,0.21,-0.2
firstRiftHerald,0.22,0.11,0.26,0.24,0.13,0.19,1.0,0.06,-0.04
t1_hasDragonSoul,0.14,0.03,0.08,0.16,0.16,0.21,0.06,1.0,-0.05
t2_hasDragonSoul,-0.15,-0.02,-0.08,-0.11,-0.02,-0.2,-0.04,-0.05,1.0


In [None]:
win_probabilities = pd.DataFrame(
    {
        column: win_conditional_prob(games_data, column) for column in columns[1:]
    }, index=["Win Rate"]
)
win_probabilities.style.format('{:.2f}')\
    .bar(align=0, vmin=0, vmax=1, cmap="YlGn", height=50, width=50)

Unnamed: 0,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,t1_hasDragonSoul,t2_hasDragonSoul
Win Rate,0.59,0.71,0.91,0.81,0.69,0.69,0.82,0.18


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
# Naive Bayes model for predicting win based on objectives

from sklearn.naive_bayes import GaussianNB

model_data = games_data.copy()

for column in columns[1:]:
    model_data[column] = model_data[column] == 1
    model_data[column] = model_data[column]

model_columns = columns[1:] + ["t1_dragonKills", "t2_dragonKills", "t1_baronKills", "t2_baronKills", "t1_towerKills", "t2_towerKills"]

X, y = (games_data[model_columns], games_data["winner"] == 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
win_predictor = GaussianNB()
y_pred = win_predictor.fit(X_train, y_train).predict(X_test)

# learning score e training score

print((y_test == y_pred).sum() / len(X_test))

0.9305366737877905


In [10]:
# Bernoulli Naive Bayes for predicting win based on first objectives

from sklearn.naive_bayes import BernoulliNB

model_columns = columns[1:]

X, y = (model_data[model_columns], model_data["winner"] == 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
win_predictor = BernoulliNB()
y_pred = win_predictor.fit(X_train, y_train).predict(X_test)

print((y_test == y_pred).sum() / len(X_test))

0.8493688094775684


In [11]:
games_data.columns

Index(['creationTime', 'gameDuration', 'seasonId', 'winner', 'firstBlood',
       'firstTower', 'firstInhibitor', 'firstBaron', 'firstDragon',
       'firstRiftHerald', 't1_champ1id', 't1_champ1_sum1', 't1_champ1_sum2',
       't1_champ2id', 't1_champ2_sum1', 't1_champ2_sum2', 't1_champ3id',
       't1_champ3_sum1', 't1_champ3_sum2', 't1_champ4id', 't1_champ4_sum1',
       't1_champ4_sum2', 't1_champ5id', 't1_champ5_sum1', 't1_champ5_sum2',
       't1_towerKills', 't1_inhibitorKills', 't1_baronKills', 't1_dragonKills',
       't1_riftHeraldKills', 't1_ban1', 't1_ban2', 't1_ban3', 't1_ban4',
       't1_ban5', 't2_champ1id', 't2_champ1_sum1', 't2_champ1_sum2',
       't2_champ2id', 't2_champ2_sum1', 't2_champ2_sum2', 't2_champ3id',
       't2_champ3_sum1', 't2_champ3_sum2', 't2_champ4id', 't2_champ4_sum1',
       't2_champ4_sum2', 't2_champ5id', 't2_champ5_sum1', 't2_champ5_sum2',
       't2_towerKills', 't2_inhibitorKills', 't2_baronKills', 't2_dragonKills',
       't2_riftHeraldKills',