In [1]:
import os
import pandas as pd
import numpy as np
from scipy import stats
import json

In [2]:
def get_regional_csvs(region: str):
    """
    Returns a list of dataframes of by region
    """
    dfs = {}
    type_dict = {'Champion1': 'int16', 'Champion2': 'int16',
             'Champion3': 'int16', 'Champion4': 'int16',
             'Champion5': 'int16'}
    for f in os.listdir('data'):
        if region in f:
            dfs[f.split('_')[1].split('.')[0]] = pd.read_csv(f'data/{f}', dtype=type_dict)
    return dfs

In [2]:
def get_top3_dfs(region: str):
    """
    Returns at list of dataframes of by region of the top 3 champions per lane per player.
    """
    dfs = {}
    for f in os.listdir('data/top3'):
        if region in f:
            dfs[f.split('_')[1].split('.')[0]] = pd.read_csv(f'data/top3/{f}')
    return dfs

In [6]:
na_top3 = get_top3_dfs('na')
na_top3['diamond'].head()

Unnamed: 0,Champion1,Mastery1,Lane1,Champion2,Mastery2,Lane2,Champion3,Mastery3,Lane3,Champion4,...,Lane12,Champion13,Mastery13,Lane13,Champion14,Mastery14,Lane14,Champion15,Mastery15,Lane15
0,51,272295,bot,145,88254,bot,103,74100,mid,84,...,jg,99,9746,sup,254,7431,jg,114,1286,top
1,55,67222,mid,238,51605,mid,3,51363,mid,39,...,sup,16,10862,sup,202,9299,bot,21,8998,bot
2,39,956886,top,90,109817,mid,98,74811,top,245,...,mid,350,16135,sup,1,14686,mid,18,13146,bot
3,64,154967,jg,517,97859,mid,238,66095,mid,104,...,bot,145,17146,bot,497,6812,sup,555,6105,sup
4,131,514349,jg,103,214265,mid,99,163929,sup,25,...,mid,38,10785,mid,11,9387,jg,113,6642,jg


In [8]:
na_top3_combined = pd.concat(na_top3.values(), axis=0)
na_top3_combined.head()

Unnamed: 0,Champion1,Mastery1,Lane1,Champion2,Mastery2,Lane2,Champion3,Mastery3,Lane3,Champion4,...,Lane12,Champion13,Mastery13,Lane13,Champion14,Mastery14,Lane14,Champion15,Mastery15,Lane15
0,11,228838,jg,141,127298,jg,107,75772,top,555,...,sup,12,26501,sup,84,21316,top,51,12740,bot
1,99,158190,sup,16,108973,sup,267,101700,sup,21,...,jg,163,17509,jg,39,16153,top,10,11956,top
2,84,134256,top,145,124395,bot,157,87764,mid,99,...,sup,245,27580,jg,254,25642,jg,875,24216,top
3,267,107035,sup,350,99720,sup,16,46863,sup,68,...,mid,81,9618,bot,202,8821,bot,523,5726,bot
4,21,131246,bot,75,74165,top,54,67741,top,17,...,mid,32,10750,sup,90,9949,mid,45,8198,mid


In [9]:
na_top3_combined.shape

(3665, 45)

In [13]:
na_top3_combined = na_top3_combined.replace(champion_ids_name)

In [14]:
na_top3_combined.head()

Unnamed: 0,Champion1,Mastery1,Lane1,Champion2,Mastery2,Lane2,Champion3,Mastery3,Lane3,Champion4,...,Lane12,Champion13,Mastery13,Lane13,Champion14,Mastery14,Lane14,Champion15,Mastery15,Lane15
0,MasterYi,228838,jg,Kayn,127298,jg,Rengar,75772,top,Pyke,...,sup,Alistar,26501,sup,Akali,21316,top,Caitlyn,12740,bot
1,Lux,158190,sup,Soraka,108973,sup,Nami,101700,sup,MissFortune,...,jg,Taliyah,17509,jg,Irelia,16153,top,Kayle,11956,top
2,Akali,134256,top,Kaisa,124395,bot,Yasuo,87764,mid,Lux,...,sup,Ekko,27580,jg,Vi,25642,jg,Sett,24216,top
3,Nami,107035,sup,Yuumi,99720,sup,Soraka,46863,sup,Rumble,...,mid,Ezreal,9618,bot,Jhin,8821,bot,Aphelios,5726,bot
4,MissFortune,131246,bot,Nasus,74165,top,Malphite,67741,top,Teemo,...,mid,Amumu,10750,sup,Malzahar,9949,mid,Veigar,8198,mid


In [21]:
na_top3_combined.groupby(['Champion1', 'Lane2'])[['Champion2']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Champion2
Champion1,Lane2,Unnamed: 2_level_1
Aatrox,bot,3
Aatrox,jg,3
Aatrox,mid,4
Aatrox,sup,3
Aatrox,top,28
...,...,...
Zoe,top,6
Zyra,bot,1
Zyra,jg,2
Zyra,sup,9


In [85]:
na_top3_combined[na_top3_combined['Champion1'] == 'Aatrox'][['Champion2', 'Mastery2']].groupby('Champion2').sum().sort_values('Mastery2', ascending=False)

Unnamed: 0_level_0,Mastery2
Champion2,Unnamed: 1_level_1
Kayn,562731
Mordekaiser,543204
Renekton,452821
Yasuo,436275
Sylas,429179
Sett,383784
Irelia,347417
Amumu,334748
Jayce,330000
Camille,288810


In [87]:
na_top3_combined[na_top3_combined['Champion1'] == 'Aatrox'][['Champion3', 'Mastery3']].groupby('Champion3').sum().sort_values('Mastery3', ascending=False)

Unnamed: 0_level_0,Mastery3
Champion3,Unnamed: 1_level_1
Darius,525135
Camille,401388
Pyke,384349
Sylas,347116
Rakan,283469
Yasuo,244348
Nasus,241029
Talon,240547
Fiora,208416
Volibear,205599


In [66]:
values_by_champion

{'Aatrox': [0, 0],
 'Ahri': [0, 0],
 'Akali': [0, 0],
 'Akshan': [0, 0],
 'Alistar': [0, 0],
 'Amumu': [0, 0],
 'Anivia': [0, 0],
 'Annie': [0, 0],
 'Aphelios': [0, 0],
 'Ashe': [0, 0],
 'AurelionSol': [0, 0],
 'Azir': [0, 0],
 'Bard': [0, 0],
 'Blitzcrank': [0, 0],
 'Brand': [0, 0],
 'Braum': [0, 0],
 'Caitlyn': [0, 0],
 'Camille': [0, 0],
 'Cassiopeia': [0, 0],
 'Chogath': [0, 0],
 'Corki': [0, 0],
 'Darius': [0, 0],
 'Diana': [0, 0],
 'Draven': [0, 0],
 'DrMundo': [0, 0],
 'Ekko': [0, 0],
 'Elise': [0, 0],
 'Evelynn': [0, 0],
 'Ezreal': [0, 0],
 'Fiddlesticks': [0, 0],
 'Fiora': [0, 0],
 'Fizz': [0, 0],
 'Galio': [0, 0],
 'Gangplank': [0, 0],
 'Garen': [0, 0],
 'Gnar': [0, 0],
 'Gragas': [0, 0],
 'Graves': [0, 0],
 'Gwen': [0, 0],
 'Hecarim': [0, 0],
 'Heimerdinger': [0, 0],
 'Illaoi': [0, 0],
 'Irelia': [0, 0],
 'Ivern': [0, 0],
 'Janna': [0, 0],
 'JarvanIV': [0, 0],
 'Jax': [0, 0],
 'Jayce': [0, 0],
 'Jhin': [0, 0],
 'Jinx': [0, 0],
 'Kaisa': [0, 0],
 'Kalista': [0, 0],
 'Karma': 

In [52]:
na_top3_combined[na_top3_combined['Champion1'] == 'Aatrox'][[f'Champion{i}' for i in range(2, 16)]].T.stack().reset_index().set_index('level_1').drop('level_0', axis=1)

Unnamed: 0_level_0,0
level_1,Unnamed: 1_level_1
244,Ornn
452,Urgot
456,Lucian
283,Mordekaiser
327,MissFortune
...,...
595,Yasuo
706,Velkoz
741,Lucian
747,Lucian


In [42]:
pd.value_counts(na_top3_combined[na_top3_combined['Champion1'] == 'Aatrox'][[f'Champion{i}' for i in range(2, 16)]].values.flatten())

Pantheon    22
Kayn        19
Ezreal      19
Yasuo       18
Lucian      18
            ..
Teemo        1
Katarina     1
Kled         1
Kayle        1
Vi           1
Length: 127, dtype: int64

In [3]:
na_dfs = get_regional_csvs('na')
kr_dfs = get_regional_csvs('kr')
euw_dfs = get_regional_csvs('euw')

In [4]:
na_dfs['diamond'].head()

Unnamed: 0,Champion1,Mastery1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,875,392982,240,281442,245,271693,14,239201,13,161435
1,92,178830,24,168553,126,119510,164,101045,114,79445
2,55,397277,34,292924,112,96573,121,81492,432,49467
3,412,416106,74,168156,432,132571,117,87439,111,68316
4,18,98100,518,92424,222,70200,89,68836,145,36349


In [5]:
kr_dfs['diamond'].head()

Unnamed: 0,Champion1,Mastery1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,266,124094,126,77659,39,64466,164,40700,517,39502
1,238,429884,236,134557,145,130125,67,101330,64,85474
2,234,225320,10,179661,777,176145,120,161611,104,119311
3,412,360116,238,212892,92,212037,107,193670,7,182946
4,157,172559,92,118584,91,111408,64,103294,238,101800


In [6]:
euw_dfs['diamond'].head()

Unnamed: 0,Champion1,Mastery1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,117,145461,267,103061,43,96639,497,85432,235,81444
1,55,2152096,40,434336,37,163254,117,45074,16,30901
2,29,343736,145,227872,81,163469,222,156098,96,102269
3,76,75694,131,75250,104,67350,5,56997,102,44987
4,236,57761,238,34488,8,28604,103,24610,7,24509


In [7]:
na_combined = pd.concat(na_dfs.values(), axis=0)
na_combined.head()

Unnamed: 0,Champion1,Mastery1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,50,114359,114,92161,427,79025,32,63376,350,57265
1,21,98748,58,65117,13,55159,236,54129,67,53659
2,99,227286,25,90071,61,85030,103,66518,115,65364
3,58,98576,82,44299,3,41376,6,40942,86,39732
4,84,391931,145,139374,67,98345,28,93254,142,85232


In [8]:
na_combined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7944 entries, 0 to 1836
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Champion1  7944 non-null   int16
 1   Mastery1   7944 non-null   int64
 2   Champion2  7944 non-null   int16
 3   Mastery2   7944 non-null   int64
 4   Champion3  7944 non-null   int16
 5   Mastery3   7944 non-null   int64
 6   Champion4  7944 non-null   int16
 7   Mastery4   7944 non-null   int64
 8   Champion5  7944 non-null   int16
 9   Mastery5   7944 non-null   int64
dtypes: int16(5), int64(5)
memory usage: 450.0 KB


In [17]:
na_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']] = na_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']].div(na_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']].sum(axis=1), axis=0)

In [19]:
# Don't need the first champion's mastery
na_combined = na_combined.drop(['Mastery1'], axis=1)
na_combined.head()

Unnamed: 0,Champion1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,50,114,0.315807,427,0.270794,32,0.21717,350,0.196229
1,21,58,0.285521,13,0.241858,236,0.237341,67,0.23528
2,99,25,0.293407,61,0.276986,103,0.216683,115,0.212924
3,58,82,0.266302,3,0.24873,6,0.246121,86,0.238847
4,84,145,0.334869,67,0.23629,28,0.224058,142,0.204784


In [24]:
import json

In [10]:
raw_champions = None
with open('data/champions.json', encoding="cp866") as f:
    raw_champions = json.load(f)

In [11]:
champion_ids_name = {}
for champ in raw_champions['data'].keys():
    champion_ids_name[int(raw_champions['data'][champ]['key'])] = champ

In [34]:
champion_ids_name

{266: 'Aatrox',
 103: 'Ahri',
 84: 'Akali',
 166: 'Akshan',
 12: 'Alistar',
 32: 'Amumu',
 34: 'Anivia',
 1: 'Annie',
 523: 'Aphelios',
 22: 'Ashe',
 136: 'AurelionSol',
 268: 'Azir',
 432: 'Bard',
 53: 'Blitzcrank',
 63: 'Brand',
 201: 'Braum',
 51: 'Caitlyn',
 164: 'Camille',
 69: 'Cassiopeia',
 31: 'Chogath',
 42: 'Corki',
 122: 'Darius',
 131: 'Diana',
 119: 'Draven',
 36: 'DrMundo',
 245: 'Ekko',
 60: 'Elise',
 28: 'Evelynn',
 81: 'Ezreal',
 9: 'Fiddlesticks',
 114: 'Fiora',
 105: 'Fizz',
 3: 'Galio',
 41: 'Gangplank',
 86: 'Garen',
 150: 'Gnar',
 79: 'Gragas',
 104: 'Graves',
 887: 'Gwen',
 120: 'Hecarim',
 74: 'Heimerdinger',
 420: 'Illaoi',
 39: 'Irelia',
 427: 'Ivern',
 40: 'Janna',
 59: 'JarvanIV',
 24: 'Jax',
 126: 'Jayce',
 202: 'Jhin',
 222: 'Jinx',
 145: 'Kaisa',
 429: 'Kalista',
 43: 'Karma',
 30: 'Karthus',
 38: 'Kassadin',
 55: 'Katarina',
 10: 'Kayle',
 141: 'Kayn',
 85: 'Kennen',
 121: 'Khazix',
 203: 'Kindred',
 240: 'Kled',
 96: 'KogMaw',
 7: 'Leblanc',
 64: 'LeeSi

In [35]:
na_combined[[f'Champion{i}' for i in range(1, 6)]] = na_combined[[f'Champion{i}' for i in range(1, 6)]].replace(champion_ids_name)
na_combined.head()

Unnamed: 0,Champion1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,Swain,Fiora,0.315807,Ivern,0.270794,Amumu,0.21717,Yuumi,0.196229
1,MissFortune,Renekton,0.285521,Ryze,0.241858,Lucian,0.237341,Vayne,0.23528
2,Lux,Morgana,0.293407,Orianna,0.276986,Ahri,0.216683,Ziggs,0.212924
3,Renekton,Mordekaiser,0.266302,Galio,0.24873,Urgot,0.246121,Garen,0.238847
4,Akali,Kaisa,0.334869,Vayne,0.23629,Evelynn,0.224058,Zoe,0.204784


In [54]:
pd.value_counts(na_combined[na_combined.isin(['Ziggs']).any(axis=1)][[f'Champion{i}' for i in range(1, 6)]].values.flatten())

Ziggs           47
Lux              8
Xerath           6
Morgana          6
Diana            5
                ..
Maokai           1
Rakan            1
Fiddlesticks     1
Zoe              1
Sivir            1
Length: 97, dtype: int64

In [40]:
na_combined.groupby('Champion1')[[f'Champion{i}' for i in range(2, 6)]].agg(pd.Series.mode)

Unnamed: 0_level_0,Champion2,Champion3,Champion4,Champion5
Champion1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aatrox,"[Darius, Mordekaiser]",Darius,Sett,Camille
Ahri,Lux,Lux,Neeko,Lux
Akali,Yasuo,Yasuo,Ahri,Ezreal
Akshan,"[Katarina, Khazix, Sett, Yorick]","[Kayn, Seraphine, Swain, Viego]","[Graves, Illaoi, Thresh, Zac]","[Camille, Ekko, Morgana, Sylas]"
Alistar,Nautilus,Blitzcrank,Blitzcrank,Rakan
...,...,...,...,...
Zeri,"[Ezreal, Lux]","[Kaisa, MissFortune]","[Teemo, Tristana]","[Caitlyn, Morgana]"
Ziggs,"[Diana, Galio, Neeko, Rumble, Vi]",Akali,"[Corki, Katarina, Malphite, Neeko, Sylas]","[Ahri, Fizz, Lux, Morgana, Xerath]"
Zilean,Thresh,"[Janna, Morgana, Thresh]",Morgana,Janna
Zoe,Lux,Neeko,Akali,"[Ezreal, Qiyana]"


In [37]:
pd.value_counts(na_combined[na_combined.isin(['Swain']).any(axis=1)][[f'Champion{i}' for i in range(2, 6)]].values.flatten())

Swain          169
Lux             23
Jhin            19
Xerath          18
Mordekaiser     18
              ... 
Hecarim          1
Xayah            1
Shyvana          1
Elise            1
Quinn            1
Length: 142, dtype: int64

In [81]:
na_combined.value_counts()

Champion1  Champion2  Champion3  Champion4  Champion5
99         267        16         37         40           2
103        84         99         142        147          2
1          3          74         497        267          1
145        84         157        81         39           1
           81         523        236        202          1
                                                        ..
75         157        83         62         38           1
           145        81         235        134          1
           141        45         91         86           1
                      8          121        238          1
888        412        43         432        16           1
Length: 6894, dtype: int64

In [90]:
na_combined.groupby('Champion1').size()

Champion1
1      22
2      11
3      30
4      25
5      17
       ..
777    33
875    88
876     9
887     4
888     1
Length: 159, dtype: int64

In [95]:
champion_ids = sorted(na_combined['Champion1'].unique())
champion_ids[:5]

[1, 2, 3, 4, 5]

In [116]:
na_combined[na_combined.isin([1]).any(axis=1)].head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
119,1,99,246,25,518
183,133,101,53,1,6
202,22,63,1,21,145
221,90,3,74,1,86
233,7,18,222,69,1


In [125]:
pd.value_counts(na_combined[na_combined.isin([1]).any(axis=1)].values.flatten())

1      101
99      14
202     13
45      11
245     10
      ... 
15       1
76       1
41       1
56       1
34       1
Length: 124, dtype: int64

In [None]:
def most_common_ids(id):
    # get all rows with the given id
    return

In [98]:
na_combined.groupby('Champion1')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fb690641130>

In [89]:
na_combined.groupby('Champion1')['Champion2'].value_counts().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Champion2
Champion1,Champion2,Unnamed: 2_level_1
1,45,2
1,99,2
1,134,2
1,3,1
1,8,1
...,...,...
876,266,1
887,875,2
887,38,1
887,518,1


In [77]:
na_combined[na_combined['Champion1'] == 1]['Champion2'].value_counts()

99     2
134    2
45     2
18     1
31     1
11     1
77     1
143    1
53     1
58     1
8      1
202    1
117    1
30     1
131    1
157    1
63     1
875    1
3      1
Name: Champion2, dtype: int64

In [78]:
na_combined[na_combined['Champion1'] == 1]['Champion3'].value_counts()

103    2
61     2
22     2
246    1
12     1
32     1
17     1
16     1
98     1
102    1
203    1
64     1
112    1
222    1
90     1
29     1
5      1
245    1
74     1
Name: Champion3, dtype: int64

In [79]:
na_combined[na_combined['Champion1'] == 1]['Champion4'].value_counts()

25     1
29     1
497    1
420    1
103    1
245    1
30     1
58     1
203    1
127    1
201    1
82     1
421    1
98     1
81     1
18     1
63     1
11     1
31     1
120    1
202    1
59     1
Name: Champion4, dtype: int64

In [80]:
na_combined[na_combined['Champion1'] == 1]['Champion5'].value_counts()

45     3
267    2
518    1
13     1
74     1
75     1
28     1
18     1
497    1
117    1
91     1
145    1
22     1
56     1
238    1
50     1
77     1
141    1
134    1
Name: Champion5, dtype: int64

In [68]:
na_combined.pivot_table(index='Champion1', aggfunc=stats.mode)

Unnamed: 0_level_0,Champion2,Champion3,Champion4,Champion5
Champion1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,"([45], [2])","([22], [2])","([11], [1])","([45], [3])"
2,"([3], [1])","([141], [2])","([11], [1])","([5], [1])"
3,"([99], [2])","([111], [3])","([104], [2])","([68], [2])"
4,"([35], [2])","([112], [4])","([5], [2])","([22], [2])"
5,"([64], [3])","([9], [2])","([777], [2])","([11], [2])"
...,...,...,...,...
777,"([157], [6])","([36], [2])","([145], [3])","([157], [6])"
875,"([82], [5])","([82], [6])","([157], [4])","([122], [6])"
876,"([81], [2])","([3], [1])","([6], [1])","([51], [1])"
887,"([875], [2])","([23], [1])","([8], [1])","([82], [1])"


In [55]:
kr_combined = pd.concat(kr_dfs.values(), axis=0)
kr_combined.head()

Unnamed: 0,Champion1,Mastery1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,157,109353,555,80202,122,53546,81,31706,517,28081
1,266,117626,54,77185,36,60833,53,39345,14,28882
2,58,250388,875,61353,141,53829,516,48399,234,34646
3,157,188270,777,65476,555,49046,517,41462,84,38462
4,122,38797,7,32084,238,25974,875,24392,777,21579


In [81]:
kr_combined[[f'Champion{i}' for i in range(1, 6)]] = kr_combined[[f'Champion{i}' for i in range(1, 6)]].replace(champion_ids_name)

In [82]:
euw_combined = pd.concat(euw_dfs.values(), axis=0)
euw_combined.head()

Unnamed: 0,Champion1,Mastery1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5
0,86,91814,75,89144,99,77068,32,56638,117,35227
1,157,137297,777,57998,62,27081,141,24226,266,22471
2,222,395187,518,213365,350,127739,498,113112,145,79875
3,98,153711,141,79765,5,75819,19,75211,11,73228
4,143,64077,59,53913,11,46980,412,41843,126,38971


In [83]:
euw_combined[[f'Champion{i}' for i in range(1, 6)]] = euw_combined[[f'Champion{i}' for i in range(1, 6)]].replace(champion_ids_name)

In [84]:
euw_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']] = euw_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']].div(euw_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']].sum(axis=1), axis=0)

In [85]:
kr_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']] = kr_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']].div(kr_combined[['Mastery2', 'Mastery3', 'Mastery4', 'Mastery5']].sum(axis=1), axis=0)

In [86]:
all_combined = pd.concat([na_combined, euw_combined, kr_combined], axis=0)
all_combined.head()

Unnamed: 0,Champion1,Champion2,Mastery2,Champion3,Mastery3,Champion4,Mastery4,Champion5,Mastery5,Mastery1
0,Swain,Fiora,0.315807,Ivern,0.270794,Amumu,0.21717,Yuumi,0.196229,
1,MissFortune,Renekton,0.285521,Ryze,0.241858,Lucian,0.237341,Vayne,0.23528,
2,Lux,Morgana,0.293407,Orianna,0.276986,Ahri,0.216683,Ziggs,0.212924,
3,Renekton,Mordekaiser,0.266302,Galio,0.24873,Urgot,0.246121,Garen,0.238847,
4,Akali,Kaisa,0.334869,Vayne,0.23629,Evelynn,0.224058,Zoe,0.204784,


In [87]:
all_combined.shape

(31674, 10)

In [126]:
champ_value_counts = all_combined[all_combined['Champion1'] == 'Aatrox'][['Champion1', 'Champion2', 'Mastery2']][['Champion2']].value_counts().to_frame()
champ_value_counts.head()

Unnamed: 0_level_0,0
Champion2,Unnamed: 1_level_1
Darius,13
Camille,12
Mordekaiser,12
Yasuo,10
Sylas,9


In [133]:
all_combined[all_combined['Champion1'] == 'Aatrox'][['Champion1', 'Champion2', 'Mastery2']].groupby('Champion2')[['Champion2', 'Mastery2']].mean().reset_index()

Unnamed: 0,Champion2,Mastery2
0,Akali,0.326925
1,Amumu,0.400151
2,Brand,0.362340
3,Camille,0.400544
4,Chogath,0.308308
...,...,...
80,Yorick,0.441589
81,Zac,0.401014
82,Zed,0.337586
83,Zilean,0.297490


In [136]:
testing_df = all_combined[all_combined['Champion1'] == 'Aatrox'][['Champion1', 'Champion2', 'Mastery2']].groupby('Champion2')[['Champion2', 'Mastery2']].mean().reset_index().merge(champ_value_counts, on='Champion2')
testing_df.head()

Unnamed: 0,Champion2,Mastery2,0
0,Akali,0.326925,3
1,Amumu,0.400151,2
2,Brand,0.36234,1
3,Camille,0.400544,12
4,Chogath,0.308308,1


In [138]:
(testing_df['Mastery2'] * testing_df[0]).sum()

81.85010772614656

In [142]:
testing_df.loc[5]

Champion2      Darius
Mastery2     0.386705
0                  13
Name: 5, dtype: object

In [141]:
((testing_df['Mastery2'] * testing_df[0]) / (testing_df['Mastery2'] * testing_df[0]).sum()).sort_values(ascending=False)

5     0.061419
40    0.060074
3     0.058724
78    0.041676
60    0.036711
        ...   
16    0.003486
61    0.003460
42    0.003449
74    0.003226
20    0.003215
Length: 85, dtype: float64

In [121]:
all_combined[all_combined['Champion1'] == 'Aatrox'][['Champion1', 'Champion2', 'Mastery2']]['Champion2'].value_counts()

Darius         13
Camille        12
Mordekaiser    12
Yasuo          10
Sylas           9
               ..
Illaoi          1
Zilean          1
Trundle         1
Shaco           1
Zac             1
Name: Champion2, Length: 85, dtype: int64

In [112]:
all_combined.groupby(['Champion1', 'Champion2'])[['Champion1', 'Champion2', 'Mastery2']].mean().loc['Aatrox']

Unnamed: 0_level_0,Mastery2
Champion2,Unnamed: 1_level_1
Akali,0.326925
Amumu,0.400151
Brand,0.362340
Camille,0.400544
Chogath,0.308308
...,...
Yorick,0.441589
Zac,0.401014
Zed,0.337586
Zilean,0.297490


In [75]:
all_combined.groupby('Champion1')[[f'Champion{i}' for i in range(2, 6)]].agg(pd.Series.mode)['Champion5'].apply(lambda x: isinstance(x, np.ndarray)).sum()

48

In [29]:
pd.value_counts(all_combined[all_combined.isin([1]).any(axis=1)].values.flatten()).index[1:4]

Int64Index([99, 157, 45], dtype='int64')

In [35]:
def top3_by_id(id):
    return np.array(pd.value_counts(all_combined[all_combined.isin([id]).any(axis=1)].values.flatten()).index[1:4], dtype=np.int16)

In [37]:
import json

In [38]:
raw_champions = None
with open('data/champions.json') as f:
    raw_champions = json.load(f)

In [43]:
int(raw_champions['data']['Aatrox']['key'])

'266'

In [45]:
champion_ids = []
for champ in raw_champions['data'].keys():
    champion_ids.append(int(raw_champions['data'][champ]['key']))
champion_ids = np.array(champion_ids, dtype=np.int16)

In [48]:
champion_ids

array([266, 103,  84, 166,  12,  32,  34,   1, 523,  22, 136, 268, 432,
        53,  63, 201,  51, 164,  69,  31,  42, 122, 131, 119,  36, 245,
        60,  28,  81,   9, 114, 105,   3,  41,  86, 150,  79, 104, 887,
       120,  74, 420,  39, 427,  40,  59,  24, 126, 202, 222, 145, 429,
        43,  30,  38,  55,  10, 141,  85, 121, 203, 240,  96,   7,  64,
        89, 876, 127, 236, 117,  99,  54,  90,  57,  11,  21,  62,  82,
        25, 267,  75, 111, 518,  76,  56,  20,   2,  61, 516,  80,  78,
       555, 246, 133, 497,  33, 421, 526, 888,  58, 107,  92,  68,  13,
       360, 113, 235, 147, 875,  35,  98, 102,  27,  14,  15,  72,  37,
        16,  50, 517, 134, 223, 163,  91,  44,  17, 412,  18,  48,  23,
         4,  29,  77,   6, 110,  67,  45, 161, 711, 254, 234, 112,   8,
       106,  19, 498, 101,   5, 157, 777,  83, 350, 154, 238, 221, 115,
        26, 142, 143], dtype=int16)

In [36]:
top3_by_id(2)

array([ 64, 104, 141], dtype=int16)

In [51]:
champion_ids = sorted(champion_ids)

In [52]:
connections = []
for id in champion_ids:
    connections.append(top3_by_id(id))

In [54]:
connections_df = pd.DataFrame(connections, index=champion_ids, columns=['Common1', 'Common2', 'Common3'])
connections_df.head()

Unnamed: 0,Common1,Common2,Common3
1,99,157,45
2,64,104,141
3,517,99,157
4,81,64,157
5,64,11,141


In [56]:
connections_df.shape

(159, 3)

In [58]:
connections_df.to_csv('out_data/champion_connections.csv')