In [1]:
import os
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
def get_regional_csvs(region: str):
    """
    Returns a list of dataframes of by region
    """
    dfs = {}
    type_dict = {'Champion1': 'int16', 'Champion2': 'int16',
             'Champion3': 'int16', 'Champion4': 'int16',
             'Champion5': 'int16'}
    for f in os.listdir('data'):
        if region in f:
            dfs[f.split('_')[1].split('.')[0]] = pd.read_csv(f'data/{f}', dtype=type_dict)
    return dfs

In [15]:
na_dfs = get_regional_csvs('na')
kr_dfs = get_regional_csvs('kr')
euw_dfs = get_regional_csvs('euw')

In [5]:
na_dfs['diamond'].head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,145,8,555,81,67
1,84,147,157,235,62
2,76,64,104,121,131
3,432,25,38,30,79
4,29,555,350,235,267


In [6]:
kr_dfs['diamond'].head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,45,9,57,60,35
1,7,64,421,101,69
2,24,238,122,67,64
3,35,53,81,555,101
4,81,523,517,236,777


In [7]:
euw_dfs['diamond'].head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,119,104,67,29,51
1,68,91,103,51,119
2,121,141,120,80,5
3,79,9,106,104,48
4,4,24,17,121,517


In [19]:
na_combined = pd.concat(na_dfs.values(), axis=0)
na_combined.head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,17,25,99,63,43
1,142,99,223,555,202
2,16,37,555,25,26
3,245,104,81,202,11
4,84,157,11,24,91


In [66]:
na_combined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6896 entries, 0 to 1040
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Champion1  6896 non-null   int16
 1   Champion2  6896 non-null   int16
 2   Champion3  6896 non-null   int16
 3   Champion4  6896 non-null   int16
 4   Champion5  6896 non-null   int16
dtypes: int16(5)
memory usage: 121.2 KB


In [81]:
na_combined.value_counts()

Champion1  Champion2  Champion3  Champion4  Champion5
99         267        16         37         40           2
103        84         99         142        147          2
1          3          74         497        267          1
145        84         157        81         39           1
           81         523        236        202          1
                                                        ..
75         157        83         62         38           1
           145        81         235        134          1
           141        45         91         86           1
                      8          121        238          1
888        412        43         432        16           1
Length: 6894, dtype: int64

In [90]:
na_combined.groupby('Champion1').size()

Champion1
1      22
2      11
3      30
4      25
5      17
       ..
777    33
875    88
876     9
887     4
888     1
Length: 159, dtype: int64

In [95]:
champion_ids = sorted(na_combined['Champion1'].unique())
champion_ids[:5]

[1, 2, 3, 4, 5]

In [116]:
na_combined[na_combined.isin([1]).any(axis=1)].head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
119,1,99,246,25,518
183,133,101,53,1,6
202,22,63,1,21,145
221,90,3,74,1,86
233,7,18,222,69,1


In [125]:
pd.value_counts(na_combined[na_combined.isin([1]).any(axis=1)].values.flatten())

1      101
99      14
202     13
45      11
245     10
      ... 
15       1
76       1
41       1
56       1
34       1
Length: 124, dtype: int64

In [None]:
def most_common_ids(id):
    # get all rows with the given id
    return

In [98]:
na_combined.groupby('Champion1')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fb690641130>

In [89]:
na_combined.groupby('Champion1')['Champion2'].value_counts().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Champion2
Champion1,Champion2,Unnamed: 2_level_1
1,45,2
1,99,2
1,134,2
1,3,1
1,8,1
...,...,...
876,266,1
887,875,2
887,38,1
887,518,1


In [77]:
na_combined[na_combined['Champion1'] == 1]['Champion2'].value_counts()

99     2
134    2
45     2
18     1
31     1
11     1
77     1
143    1
53     1
58     1
8      1
202    1
117    1
30     1
131    1
157    1
63     1
875    1
3      1
Name: Champion2, dtype: int64

In [78]:
na_combined[na_combined['Champion1'] == 1]['Champion3'].value_counts()

103    2
61     2
22     2
246    1
12     1
32     1
17     1
16     1
98     1
102    1
203    1
64     1
112    1
222    1
90     1
29     1
5      1
245    1
74     1
Name: Champion3, dtype: int64

In [79]:
na_combined[na_combined['Champion1'] == 1]['Champion4'].value_counts()

25     1
29     1
497    1
420    1
103    1
245    1
30     1
58     1
203    1
127    1
201    1
82     1
421    1
98     1
81     1
18     1
63     1
11     1
31     1
120    1
202    1
59     1
Name: Champion4, dtype: int64

In [80]:
na_combined[na_combined['Champion1'] == 1]['Champion5'].value_counts()

45     3
267    2
518    1
13     1
74     1
75     1
28     1
18     1
497    1
117    1
91     1
145    1
22     1
56     1
238    1
50     1
77     1
141    1
134    1
Name: Champion5, dtype: int64

In [68]:
na_combined.pivot_table(index='Champion1', aggfunc=stats.mode)

Unnamed: 0_level_0,Champion2,Champion3,Champion4,Champion5
Champion1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,"([45], [2])","([22], [2])","([11], [1])","([45], [3])"
2,"([3], [1])","([141], [2])","([11], [1])","([5], [1])"
3,"([99], [2])","([111], [3])","([104], [2])","([68], [2])"
4,"([35], [2])","([112], [4])","([5], [2])","([22], [2])"
5,"([64], [3])","([9], [2])","([777], [2])","([11], [2])"
...,...,...,...,...
777,"([157], [6])","([36], [2])","([145], [3])","([157], [6])"
875,"([82], [5])","([82], [6])","([157], [4])","([122], [6])"
876,"([81], [2])","([3], [1])","([6], [1])","([51], [1])"
887,"([875], [2])","([23], [1])","([8], [1])","([82], [1])"


In [16]:
kr_combined = pd.concat(kr_dfs.values(), axis=0)
kr_combined.head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,222,21,81,67,28
1,777,517,64,142,91
2,876,245,84,517,3
3,114,126,4,24,266
4,41,67,266,51,222


In [17]:
euw_combined = pd.concat(euw_dfs.values(), axis=0)
euw_combined.head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,555,111,245,145,89
1,238,157,64,105,875
2,21,15,25,96,143
3,202,81,84,141,121
4,122,24,19,875,82


In [20]:
all_combined = pd.concat([na_combined, euw_combined, kr_combined], axis=0)
all_combined.head()

Unnamed: 0,Champion1,Champion2,Champion3,Champion4,Champion5
0,17,25,99,63,43
1,142,99,223,555,202
2,16,37,555,25,26
3,245,104,81,202,11
4,84,157,11,24,91


In [21]:
all_combined.shape

(30630, 5)

In [29]:
pd.value_counts(all_combined[all_combined.isin([1]).any(axis=1)].values.flatten()).index[1:4]

Int64Index([99, 157, 45], dtype='int64')

In [35]:
def top3_by_id(id):
    return np.array(pd.value_counts(all_combined[all_combined.isin([id]).any(axis=1)].values.flatten()).index[1:4], dtype=np.int16)

In [37]:
import json

In [38]:
raw_champions = None
with open('data/champions.json') as f:
    raw_champions = json.load(f)

In [43]:
int(raw_champions['data']['Aatrox']['key'])

'266'

In [45]:
champion_ids = []
for champ in raw_champions['data'].keys():
    champion_ids.append(int(raw_champions['data'][champ]['key']))
champion_ids = np.array(champion_ids, dtype=np.int16)

In [48]:
champion_ids

array([266, 103,  84, 166,  12,  32,  34,   1, 523,  22, 136, 268, 432,
        53,  63, 201,  51, 164,  69,  31,  42, 122, 131, 119,  36, 245,
        60,  28,  81,   9, 114, 105,   3,  41,  86, 150,  79, 104, 887,
       120,  74, 420,  39, 427,  40,  59,  24, 126, 202, 222, 145, 429,
        43,  30,  38,  55,  10, 141,  85, 121, 203, 240,  96,   7,  64,
        89, 876, 127, 236, 117,  99,  54,  90,  57,  11,  21,  62,  82,
        25, 267,  75, 111, 518,  76,  56,  20,   2,  61, 516,  80,  78,
       555, 246, 133, 497,  33, 421, 526, 888,  58, 107,  92,  68,  13,
       360, 113, 235, 147, 875,  35,  98, 102,  27,  14,  15,  72,  37,
        16,  50, 517, 134, 223, 163,  91,  44,  17, 412,  18,  48,  23,
         4,  29,  77,   6, 110,  67,  45, 161, 711, 254, 234, 112,   8,
       106,  19, 498, 101,   5, 157, 777,  83, 350, 154, 238, 221, 115,
        26, 142, 143], dtype=int16)

In [36]:
top3_by_id(2)

array([ 64, 104, 141], dtype=int16)

In [51]:
champion_ids = sorted(champion_ids)

In [52]:
connections = []
for id in champion_ids:
    connections.append(top3_by_id(id))

In [54]:
connections_df = pd.DataFrame(connections, index=champion_ids, columns=['Common1', 'Common2', 'Common3'])
connections_df.head()

Unnamed: 0,Common1,Common2,Common3
1,99,157,45
2,64,104,141
3,517,99,157
4,81,64,157
5,64,11,141


In [56]:
connections_df.shape

(159, 3)

In [58]:
connections_df.to_csv('out_data/champion_connections.csv')