In [3]:
import os
from urllib.request import urlretrieve
from zipfile import ZipFile

# constants
URL = "https://www.kaggle.com/api/v1/datasets/download/datasnaek/league-of-legends"
DATA_DIR = "data/"
ZIP_NAME = "compressed.zip"

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

os.chdir(os.path.join(os.getcwd(), DATA_DIR))

if not os.path.exists(ZIP_NAME):
    # scarica il file dall'URL indicato
    urlretrieve(URL, ZIP_NAME)
    # apri il file zip ed estrai tutto il contenuto nella directory corrente
    with ZipFile(ZIP_NAME) as f:
        f.extractall()

In [4]:
import pandas as pd

games_data = pd.read_csv('games.csv', index_col=['gameId'])

In [5]:
import json

# Carica il file
with open("champion_info.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Accedi alla parte utile dei dati (data["data"] è un dizionario con campioni)
champion_data = data["data"]

# Trasforma il dizionario in DataFrame
df_champions_keyID = pd.DataFrame.from_dict(champion_data, orient="index")
# Carica il file
with open("champion_info_2.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Accedi alla parte utile dei dati (data["data"] è un dizionario con campioni)
champion_data = data["data"]

# Trasforma il dizionario in DataFrame
df_champions_nameID = pd.DataFrame.from_dict(champion_data, orient="index")

In [47]:
games_data["t1_hasDragonSoul"] = games_data["t1_dragonKills"] >= 4
games_data["t2_hasDragonSoul"] = games_data["t2_dragonKills"] >= 4

In [None]:
def win_conditional_prob(data: pd.DataFrame, on_column: str):
    intersect_count = len(data[(data["winner"] == 1) & (data[on_column] == 1)])
    on_column_count = len(data[data[on_column] == 1])
    
    return intersect_count / on_column_count

In [38]:
import numpy as np

corr_frame = games_data.copy()
columns = ['winner', 'firstBlood', 'firstTower', 'firstInhibitor', 'firstBaron', 'firstDragon', 'firstRiftHerald', 't1_hasDragonSoul', 't2_hasDragonSoul']

for column in columns:
    corr_frame[column] = corr_frame[column] == 1

for column in columns[1:]:
    print(f"{column}: {win_conditional_prob(games_data, column)*100:.2f}%")

corr_frame[columns].corr()

firstBlood: 59.48%
firstTower: 70.84%
firstInhibitor: 90.87%
firstBaron: 81.16%
firstDragon: 68.60%
firstRiftHerald: 69.45%
t1_hasDragonSoul: 81.62%
t2_hasDragonSoul: 17.88%


Unnamed: 0,winner,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,t1_hasDragonSoul,t2_hasDragonSoul
winner,1.0,0.179262,0.405767,0.724428,0.386913,0.34468,0.218066,0.129437,-0.142178
firstBlood,0.179262,1.0,0.217051,0.16126,0.064734,0.141406,0.109546,0.021655,-0.01988
firstTower,0.405767,0.217051,1.0,0.397682,0.147643,0.315932,0.259083,0.067933,-0.070053
firstInhibitor,0.724428,0.16126,0.397682,1.0,0.419108,0.323766,0.23808,0.147592,-0.104515
firstBaron,0.386913,0.064734,0.147643,0.419108,1.0,0.166733,0.134819,0.147085,-0.019177
firstDragon,0.34468,0.141406,0.315932,0.323766,0.166733,1.0,0.185238,0.185268,-0.177488
firstRiftHerald,0.218066,0.109546,0.259083,0.23808,0.134819,0.185238,1.0,0.056644,-0.033515
t1_hasDragonSoul,0.129437,0.021655,0.067933,0.147592,0.147085,0.185268,0.056644,1.0,-0.045336
t2_hasDragonSoul,-0.142178,-0.01988,-0.070053,-0.104515,-0.019177,-0.177488,-0.033515,-0.045336,1.0


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# Naive Bayes model for predicting win based on objectives

from sklearn.naive_bayes import GaussianNB

model_data = games_data.copy()

for column in columns[1:]:
    model_data[column] = model_data[column] == 1
    model_data[column] = model_data[column]

model_columns = columns[1:] + ["t1_dragonKills", "t2_dragonKills", "t1_baronKills", "t2_baronKills", "t1_towerKills", "t2_towerKills"]

X, y = (games_data[model_columns], games_data["winner"] == 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
win_predictor = GaussianNB()
y_pred = win_predictor.fit(X_train, y_train).predict(X_test)

# learning score e training score

print((y_test == y_pred).sum() / len(X_test))

0.9304719362983104


In [None]:
# Bernoulli Naive Bayes for predicting win based on first objectives

from sklearn.naive_bayes import BernoulliNB

model_columns = columns[1:]

X, y = (model_data[model_columns], model_data["winner"] == 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
win_predictor = BernoulliNB()
y_pred = win_predictor.fit(X_train, y_train).predict(X_test)

print((y_test == y_pred).sum() / len(X_test))


0.8493688094775684


In [25]:
games_data.columns

Index(['creationTime', 'gameDuration', 'seasonId', 'winner', 'firstBlood',
       'firstTower', 'firstInhibitor', 'firstBaron', 'firstDragon',
       'firstRiftHerald', 't1_champ1id', 't1_champ1_sum1', 't1_champ1_sum2',
       't1_champ2id', 't1_champ2_sum1', 't1_champ2_sum2', 't1_champ3id',
       't1_champ3_sum1', 't1_champ3_sum2', 't1_champ4id', 't1_champ4_sum1',
       't1_champ4_sum2', 't1_champ5id', 't1_champ5_sum1', 't1_champ5_sum2',
       't1_towerKills', 't1_inhibitorKills', 't1_baronKills', 't1_dragonKills',
       't1_riftHeraldKills', 't1_ban1', 't1_ban2', 't1_ban3', 't1_ban4',
       't1_ban5', 't2_champ1id', 't2_champ1_sum1', 't2_champ1_sum2',
       't2_champ2id', 't2_champ2_sum1', 't2_champ2_sum2', 't2_champ3id',
       't2_champ3_sum1', 't2_champ3_sum2', 't2_champ4id', 't2_champ4_sum1',
       't2_champ4_sum2', 't2_champ5id', 't2_champ5_sum1', 't2_champ5_sum2',
       't2_towerKills', 't2_inhibitorKills', 't2_baronKills', 't2_dragonKills',
       't2_riftHeraldKills',