# 플레이어 데이터 전처리

In [46]:
import pandas as pd
import numpy as np



In [47]:
useful_attributes=['Player', 'Team', 'Pos', 'W%', 'KDA','KP', 'GD10', 'XPD10', 'CSD10','DPM']

In [48]:
data_list=[]

player_tier = 5 

In [49]:
year=['15','16','17','18','19','20','21']
season=['Spring','Summer']
file_directory='./data_before/player/'

## 팀 이름 교체 함수

In [50]:
def change_team_name(x):
    if x=='Liiv SANDBOX':
        return 'SANDBOX Gaming'
    if x=='DWG KIA':
        return 'DAMWON Gaming'
    else:
        return x
    
    
def change_season(x):
    if x=='Spring':
        return 1
    elif x=='Summer':
        return 2
    else:
        return -1

In [51]:
def minmax_norm(df_input):
    return (df_input - df_input.min()) / ( df_input.max() - df_input.min())

def normalize(df_input):
    return (df_input - df_input.mean()) / df_input.std()



'''
P(Z>=0.84) = 약 0.2
P(Z>=0.25) = 약 0.4
P(Z>=-0.25) = 약 0.6
P(Z>=-0.84) = 약 0.8
'''
#이 함수에서 x는 정규분포의 Z값이라 가정
def give_level(x):
    if x>=0.84:
        return 1
    elif x>=0.25:
        return 2
    elif x>=-0.25:
        return 3
    elif x>=-0.84:
        return 4
    else:
        return 5

def player_preprocess(df):
    Win = minmax_norm(df['W%'])
    KDA=minmax_norm(df['KDA'])
    CS = minmax_norm(df['CS'])
    Gold = minmax_norm(df['Gold'])
    laning = gold + XP + CS
    laning = normalize(laning)
    laning = pd.DataFrame({'Laning':laning})

    laning = laning['Laning'].apply(give_level)
    df = pd.concat([df,laning],axis=1)

    KP = normalize(df['KP'])
    KDA = minmax_norm(df['KDA'])
    DPM = minmax_norm(df['DPM'])
    Win = minmax_norm(df['W%'])
    

    
    engage = 0.4 * KP + 0.6 * Win
    engage = normalize(engage)
    engage = pd.DataFrame({'Engage':engage})
    engage = engage['Engage'].apply(give_level)
    df = pd.concat([df,engage],axis=1)
    
    fight = DPM + KDA
    fight = normalize(fight)
    fight = pd.DataFrame({'Fight':fight})
    fight = fight['Fight'].apply(give_level)
    df = pd.concat([df,fight],axis=1)



    total_ability = ((laning + engage + fight) / 3)

    tier = pd.DataFrame({'Tier':total_ability})
    tier['Tier'] = tier['Tier']=np.floor(tier['Tier'].astype('int64'))
    
    df = pd.concat([df,tier],axis=1)
    df = df.drop(columns = ['GD10','XPD10','CSD10'])
    print("처리된 데이터프레임은?")
    print(df)
    return df
        
        
    

## 전처리 과정

In [52]:

for elem1 in year:
    for elem2 in season:
        
        if elem1=='21' and elem2=='Summer':
            continue

        data=pd.read_csv("{0}{1}{2}Player.csv".format(file_directory,elem1,elem2),usecols=useful_attributes)
        print(elem1,elem2)
        print(data.info())
        
        data['KP']=data['KP'].apply(lambda x:str(x).split('%')[0])
        data['W%']=data['W%'].apply(lambda x:str(x).split('%')[0])
        
        
        if elem1=='20':
            data['Team']=data['Team'].apply(change_team_name)
        
        data=data.astype({'KP':'float64','W%':'float64'})
        
        data['Year']=[elem1]*data.shape[0]
        data['Season']=[elem2]*data.shape[0]
        
        middle = data.loc[data['Pos']=='Middle']
        jungle = data.loc[data['Pos']=='Jungle']
        adc = data.loc[data['Pos']=='ADC']
        support = data.loc[data['Pos']=='Support']
        top = data.loc[data['Pos']=='Top']
        
        middle = player_preprocess(middle)
        data_list.append(middle)
        jungle = player_preprocess(jungle)
        data_list.append(jungle)
        adc = player_preprocess(adc)
        data_list.append(adc)
        support = player_preprocess(support)
        data_list.append(support)
        top = player_preprocess(top)
        data_list.append(top)
        
        
        
    
        

15 Spring
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  53 non-null     object 
 1   Team    53 non-null     object 
 2   Pos     53 non-null     object 
 3   W%      53 non-null     object 
 4   KDA     53 non-null     float64
 5   KP      53 non-null     object 
 6   GD10    53 non-null     int64  
 7   XPD10   53 non-null     int64  
 8   CSD10   53 non-null     float64
 9   DPM     53 non-null     int64  
dtypes: float64(2), int64(3), object(5)
memory usage: 4.3+ KB
None
처리된 데이터프레임은?
      Player                 Team     Pos    W%  KDA    KP  DPM Year  Season  \
0        Ace       Samsung Galaxy  Middle  40.0  2.1  67.3  478   15  Spring   
6      BlisS       Samsung Galaxy  Middle   9.0  1.4  63.3  325   15  Spring   
10      Coco             CJ Entus  Middle  63.0  4.7  69.2  571   15  Spring   
14  Easyhoon        SK Telecom T1  Middle  

처리된 데이터프레임은?
    Player                 Team     Pos    W%  KDA     KP  DPM Year  Season  \
3      Bdd             CJ Entus  Middle  41.0  3.2   83.3  500   16  Spring   
9     Coco       Longzhu Gaming  Middle  45.0  2.9   68.4  574   16  Spring   
13   Crown       Samsung Galaxy  Middle  50.0  3.5   75.1  557   16  Spring   
18    Edge      Kongdoo Monster  Middle  20.0  2.9   73.5  553   16  Spring   
19    Edge              e-mFire  Middle  17.0  2.6   70.0  463   16  Spring   
21   Faker        SK Telecom T1  Middle  65.0  4.2   74.1  638   16  Spring   
24     Fly           KT Rolster  Middle  67.0  4.6   72.6  497   16  Spring   
25  Frozen       Longzhu Gaming  Middle  60.0  4.7   69.6  482   16  Spring   
36    Kuro           ROX Tigers  Middle  83.0  7.2   68.8  530   16  Spring   
37   Kuzan  Jin Air Green Wings  Middle  55.0  4.3   73.2  493   16  Spring   
40  Mickey       Afreeca Freecs  Middle  52.0  3.3   69.8  504   16  Spring   
49   SaSin          SBENU Korea  Middle

처리된 데이터프레임은?
        Player                 Team     Pos     W%  KDA    KP  DPM Year  \
1     Ambition       Samsung Galaxy  Jungle   58.0  3.0  66.0  323   17   
3       Beyond                  MVP  Jungle   51.0  2.6  69.9  331   17   
4        Blank        SK Telecom T1  Jungle  100.0  9.2  71.3  367   17   
5        Bless          bbq Olivers  Jungle   36.0  2.2  60.0  259   17   
7        Crash       Longzhu Gaming  Jungle   44.0  2.7  64.7  289   17   
19        Haru       Samsung Galaxy  Jungle   77.0  4.2  68.5  305   17   
33  Mightybear           ROX Tigers  Jungle   63.0  3.9  65.3  304   17   
34      Mowgli       Afreeca Freecs  Jungle   67.0  3.5  61.1  265   17   
37      Peanut        SK Telecom T1  Jungle   72.0  5.1  65.0  294   17   
40       Punch      Kongdoo Monster  Jungle   27.0  2.0  70.1  237   17   
41       Raise  Jin Air Green Wings  Jungle   57.0  2.8  63.4  243   17   
45       Score           KT Rolster  Jungle   66.0  4.6  69.5  331   17   
47   SeongHw

처리된 데이터프레임은?
       Player                 Team  Pos    W%  KDA    KP  DPM Year  Season  \
0         ADD                  MVP  Top  33.0  1.9  64.7  503   18  Spring   
10      Crazy          bbq Olivers  Top  39.0  3.3  62.7  434   18  Spring   
12      CuVee          KSV eSports  Top  50.0  2.2  64.3  441   18  Spring   
25       Khan     Kingzone DragonX  Top  88.0  5.9  60.9  570   18  Spring   
26       Kiin       Afreeca Freecs  Top  68.0  3.9  59.2  425   18  Spring   
31  Lindarang           ROX Tigers  Top  49.0  2.8  63.0  418   18  Spring   
42     Rascal     Kingzone DragonX  Top  57.0  4.1  55.4  410   18  Spring   
43      Roach      Kongdoo Monster  Top  22.0  1.7  54.1  346   18  Spring   
50       Smeb           KT Rolster  Top  65.0  3.9  67.7  481   18  Spring   
51     SoHwan  Jin Air Green Wings  Top  43.0  3.1  54.8  383   18  Spring   
56       Thal        SK Telecom T1  Top  60.0  3.6  56.4  489   18  Spring   
61     Untara        SK Telecom T1  Top  38.0  2.3 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  71 non-null     object 
 1   Team    71 non-null     object 
 2   Pos     71 non-null     object 
 3   W%      71 non-null     object 
 4   KDA     71 non-null     float64
 5   KP      71 non-null     object 
 6   GD10    71 non-null     int64  
 7   XPD10   71 non-null     int64  
 8   CSD10   71 non-null     float64
 9   DPM     71 non-null     int64  
dtypes: float64(2), int64(3), object(5)
memory usage: 5.7+ KB
None
처리된 데이터프레임은?
       Player                 Team     Pos    W%  KDA    KP  DPM Year  Season  \
1         Bdd           KT Rolster  Middle  36.0  3.4  71.2  438   19  Summer   
5     CheonGo  Jin Air Green Wings  Middle  10.0  1.2  53.6  312   19  Summer   
6       Chovy              Griffin  Middle  69.0  5.9  59.8  378   19  Summer   
12       Dove         Liiv SANDBOX  Middle  60.0 

68       3       4      4   3.0  
처리된 데이터프레임은?
     Player                 Team      Pos    W%  KDA    KP  DPM Year  Season  \
2       Ben       Afreeca Freecs  Support  54.0  3.3  66.1  141   20  Summer   
3     BeryL        DAMWON Gaming  Support  87.0  5.2  60.1  222   20  Summer   
20   Effort                   T1  Support  68.0  4.0  64.0  123   20  Summer   
27  GorillA       SANDBOX Gaming  Support  43.0  2.3  65.0  126   20  Summer   
28    GuGer        Team Dynamics  Support  36.0  2.3  65.7  194   20  Summer   
32    Jelly       Afreeca Freecs  Support   0.0  0.6  43.8  172   20  Summer   
33   Kabbie       SANDBOX Gaming  Support   0.0  0.8  63.6   85   20  Summer   
35   Kellin                Gen.G  Support  57.0  4.1  68.1  160   20  Summer   
36    Keria                  DRX  Support  71.0  5.5  72.3  192   20  Summer   
39     Kuri                   T1  Support  50.0  1.3  31.0   94   20  Summer   
43  Lehends  Hanwha Life Esports  Support  19.0  2.4  69.1  137   20  Sum

In [53]:
all_data=pd.concat(data_list,axis=0,ignore_index=True)
print(all_data)

       Player                 Team     Pos    W%  KDA    KP  DPM Year  Season  \
0         Ace       Samsung Galaxy  Middle  40.0  2.1  67.3  478   15  Spring   
1       BlisS       Samsung Galaxy  Middle   9.0  1.4  63.3  325   15  Spring   
2        Coco             CJ Entus  Middle  63.0  4.7  69.2  571   15  Spring   
3    Easyhoon        SK Telecom T1  Middle  75.0  9.2  71.0  533   15  Spring   
4       Faker        SK Telecom T1  Middle  70.0  4.4  71.2  687   15  Spring   
..        ...                  ...     ...   ...  ...   ...  ...  ...     ...   
901    Morgan  Hanwha Life Esports     Top  69.0  4.1  58.3  416   21  Spring   
902    Rascal                Gen.G     Top  66.0  3.1  59.2  396   21  Spring   
903      Rich    Nongshim RedForce     Top  43.0  2.3  60.1  482   21  Spring   
904    Summit         Liiv SANDBOX     Top  38.0  2.3  62.9  490   21  Spring   
905      Zeus                   T1     Top  63.0  3.1  59.3  421   21  Spring   

     Laning  Engage  Fight 

## 이름 바꾸는 함수

In [54]:
def change_name(x):
    if x=='Ssol':
        return 'SS'
    if x=='Yeongjae':
        return 'YoungJae'
    else:
        return x

In [55]:
all_data['Player']=all_data['Player'].apply(change_name)

In [56]:
all_data=all_data.set_index(['Year','Season'])

In [57]:
all_data.to_csv('./data_after/player.csv')