In [1]:
import pandas as pd
from tqdm import tqdm
import os


class DatasetLoader():
    def __init__(self, dataset_directory_path="./12.12.-splits/") -> None:
        self.dataset_directory_path = dataset_directory_path
        self.splits_csv_list = self._get_list_of_files_splitted(
            self.dataset_directory_path
        )

    def _get_list_of_files_splitted(self, data_dir: str):
        dic_of_files = {}
        for split in ["train", "val", "test"]:
            dic_of_files[split] = []
            split_data_dir = os.path.join(data_dir, split)
            for root, dic, files in os.walk(split_data_dir, topdown=False):
                for name in files:
                    path = os.path.join(root, name)
                    dic_of_files[split].append(path)
        return dic_of_files

    def _get_X_Y(
        self, 
        csv_list: str
    ):
        all_X = None
        all_y = None
        for csv_path in tqdm(csv_list):
            df_raw = pd.read_csv(csv_path)
            X = df_raw.drop('teamVictory',axis=1)
            Y = df_raw['teamVictory']

            if type(all_X) == type(None):
                all_X = X
                all_y = Y
            else:
                all_X = pd.concat([all_X, X])
                all_y = pd.concat([all_y, Y])

        return all_X.reset_index().drop(['index','Unnamed: 0'],axis=1), all_y.reset_index().drop(['index'],axis=1)

    def load_datasets(self, split="train"):
        csv_list = self.splits_csv_list[split]
        print(f"Loading {len(csv_list)} files from {split} split")
        return self._get_X_Y(csv_list=csv_list)


dl = DatasetLoader(dataset_directory_path="./datasets/12.12.-splits")

In [2]:
X_train, Y_train = dl.load_datasets(split="train")
X_val,   Y_val   = dl.load_datasets(split="val")
X_test,  Y_test  = dl.load_datasets(split="test")

Loading 87 files from train split


100%|██████████| 87/87 [00:03<00:00, 26.57it/s]


Loading 5 files from val split


100%|██████████| 5/5 [00:00<00:00, 59.06it/s]


Loading 5 files from test split


100%|██████████| 5/5 [00:00<00:00, 57.40it/s]


In [18]:
X_train['Player_3_pick'].unique().shape

(160,)

In [3]:
df = X_train.loc[(X_train['Player_1_position']=='TOP') 
& (X_train['Player_2_position']=='JUNGLE')
&(X_train['Player_3_position']=='MIDDLE') 
& (X_train['Player_4_position']=='BOTTOM')
&(X_train['Player_5_position']=='UTILITY') 
& (X_train['Player_6_position']=='TOP') 
& (X_train['Player_7_position']=='JUNGLE')
&(X_train['Player_8_position']=='MIDDLE') 
& (X_train['Player_9_position']=='BOTTOM')
&(X_train['Player_10_position']=='UTILITY')]

In [4]:
top_counter = df[['Player_1_pick','Player_1_gold','Player_6_pick','Player_6_gold']].groupby(['Player_1_pick','Player_6_pick']).mean().reset_index()
top_counter['counter_Score'] = top_counter.apply(lambda x: x['Player_1_gold']-x['Player_6_gold'], axis=1)
jug_counter = df[['Player_2_pick','Player_2_gold','Player_7_pick','Player_7_gold']].groupby(['Player_2_pick','Player_7_pick']).mean().reset_index()
jug_counter['counter_Score'] = jug_counter.apply(lambda x: x['Player_2_gold']-x['Player_7_gold'], axis=1)
mid_counter = df[['Player_3_pick','Player_3_gold','Player_8_pick','Player_8_gold']].groupby(['Player_3_pick','Player_8_pick']).mean().reset_index()
mid_counter['counter_Score'] = mid_counter.apply(lambda x: x['Player_3_gold']-x['Player_8_gold'], axis=1)
bot_counter = df[['Player_4_pick','Player_4_gold','Player_9_pick','Player_9_gold']].groupby(['Player_4_pick','Player_9_pick']).mean().reset_index()
bot_counter['counter_Score'] = bot_counter.apply(lambda x: x['Player_4_gold']-x['Player_9_gold'], axis=1)
sup_counter = df[['Player_5_pick','Player_5_gold','Player_10_pick','Player_10_gold']].groupby(['Player_5_pick','Player_10_pick']).mean().reset_index()
sup_counter['counter_Score'] = sup_counter.apply(lambda x: x['Player_5_gold']-x['Player_10_gold'], axis=1)



In [20]:
top_counter.loc[top_counter['Player_1_pick']== 266].sort_values("counter_Score",inplace=False)
# top_counter.sort_values("counter_Score",inplace=False)

Unnamed: 0,Player_1_pick,Player_6_pick,Player_1_gold,Player_6_gold,counter_Score
5405,266,103,12821.000000,18657.000000,-5836.000000
5364,266,34,8399.000000,14154.000000,-5755.000000
5419,266,145,15046.000000,19999.000000,-4953.000000
5406,266,104,10352.666667,14908.333333,-4555.666667
5437,266,429,6061.000000,10013.000000,-3952.000000
...,...,...,...,...,...
5367,266,38,20893.000000,15439.000000,5454.000000
5344,266,5,10810.000000,5159.000000,5651.000000
5366,266,37,13056.000000,7079.000000,5977.000000
5363,266,33,13818.000000,6951.000000,6867.000000


In [6]:
jug_counter

Unnamed: 0,Player_2_pick,Player_7_pick,Player_2_gold,Player_7_gold,counter_Score
0,1,203,10071.0,17524.000000,-7453.000000
1,2,5,12568.0,8272.000000,4296.000000
2,2,9,10704.0,8959.500000,1744.500000
3,2,11,12913.0,12946.000000,-33.000000
4,2,17,13362.0,8970.000000,4392.000000
...,...,...,...,...,...
3059,887,254,12165.0,12384.000000,-219.000000
3060,887,266,12061.0,8490.000000,3571.000000
3061,887,421,5352.0,7663.000000,-2311.000000
3062,887,427,12776.0,9112.000000,3664.000000


In [15]:
df['Player_1_pick'].unique().shape

(156,)