In [214]:
import pandas as pd
from tqdm import tqdm
import os

1. The strength of champion is represented by a comparison of its win rates
2. The data with too few bureaus are not removed

In [215]:
df_train_pre = pd.read_csv("../datasets/train_pre.csv")
df_train_mid = pd.read_csv("../datasets/train_mid.csv")
df_train_late = pd.read_csv("../datasets/train_late.csv")
championID = pd.read_csv("../datasets/championID.csv")

In [216]:
def pos_win_rate(df,pos,role):
    df_win100 = df[df['teamVictory'] == 100]
    df_win200 = df[df['teamVictory'] == 200]

    # Calculate the total number of games for each champion
    sum = []
    for i in [pos,pos+5]:
        sum_part = df.groupby(["Player_"+str(i)+"_pick"]).size().reset_index(name='sum')
        sum_part.rename(columns = {"Player_"+str(i)+"_pick":'id'},inplace=True)
        sum.append(sum_part)
    df_sum = sum[0].merge(sum[1],on='id',how='outer')
    df_sum.fillna(0,inplace=True)
    df_sum[role +'_sum'] = df_sum['sum_x'] + df_sum['sum_y']
    df_sum.drop(['sum_x','sum_y'],axis=1, inplace=True)
    # print(df_sum)

    # Calculate the number of wins for each champion
    win_part1 = df_win100.groupby(["Player_"+str(pos)+"_pick"]).size().reset_index(name='count')
    win_part2 = df_win200.groupby(["Player_"+str(pos+5)+"_pick"]).size().reset_index(name='count')
    win_part1.rename(columns = {"Player_"+str(pos)+"_pick":'id'},inplace=True)
    win_part2.rename(columns = {"Player_"+str(pos+5)+"_pick":'id'},inplace=True)

    df_win =win_part1.merge(win_part2,on='id',how='outer')
    df_win.fillna(0,inplace=True)
    df_win[role +'_win'] = df_win['count_x']+ df_win['count_y']
    df_win.drop(['count_x','count_y'],axis=1, inplace=True)
    # print(df_win)

    # Calculate the win rate for each champion
    win_rate = df_sum.merge(df_win,on='id',how='left')
    win_rate.fillna(0,inplace=True)
    win_rate = win_rate.astype('int')
    win_rate[role +'_winRate'] = win_rate[role +'_win']/win_rate[role +'_sum'] * 100 # The unit is percent

    return win_rate.round({role +'_winRate':2})
    

In [217]:
def win_rate(df):
    TOP_win_rate = pos_win_rate(df,1,"T")
    JUNGLE_win_rate = pos_win_rate(df,2,"J")
    MIDDLE_win_rate = pos_win_rate(df,3,"M")
    BOTTOM_win_rate = pos_win_rate(df,4,"B")
    UTILITY_win_rate = pos_win_rate(df,5,"U")

    # Win rates at 5 different positions
    champion_winRate= pd.merge(championID,TOP_win_rate,how = 'left', on ='id')
    champion_winRate = pd.merge(champion_winRate,JUNGLE_win_rate,how = 'left', on ='id')
    champion_winRate = pd.merge(champion_winRate,MIDDLE_win_rate,how = 'left', on ='id')
    champion_winRate = pd.merge(champion_winRate,BOTTOM_win_rate,how = 'left', on ='id')
    champion_winRate = pd.merge(champion_winRate,UTILITY_win_rate,how = 'left', on ='id')
    champion_winRate.fillna(0,inplace=True)

    # OverAll win rates
    champion_winRate['Overall_sum'] = champion_winRate['T_sum']+ champion_winRate['J_sum']+champion_winRate['M_sum']+champion_winRate['B_sum']+champion_winRate['U_sum']
    champion_winRate['Overall_win'] = champion_winRate['T_win']+ champion_winRate['J_win']+champion_winRate['M_win']+champion_winRate['B_win']+champion_winRate['U_win']
    champion_winRate['Overall_winRate'] = champion_winRate['Overall_win']/champion_winRate['Overall_sum'] * 100
    champion_winRate = champion_winRate.round({'Overall_winRate':2})    

    # Better looking
    columns = ['U_winRate','B_winRate','M_winRate','J_winRate','T_winRate','Overall_winRate','name','id']
    for c in columns:
        df_c = champion_winRate[c]
        champion_winRate = champion_winRate.drop(c,axis=1)
        champion_winRate.insert(0,c,df_c)

    return champion_winRate


In [218]:
champion_winRate_pre = win_rate(df_train_pre)
champion_winRate_mid = win_rate(df_train_mid)
champion_winRate_late = win_rate(df_train_late)

champion_winRate_pre.to_csv("../Useful Features/initial/3.championWinRate_pre.csv", index=False)
champion_winRate_mid.to_csv("../Useful Features/initial/3.championWinRate_mid.csv", index=False)
champion_winRate_late.to_csv("../Useful Features/initial/3.championWinRate_late.csv", index=False)