# 플레이어 데이터 전처리

In [1]:
import pandas as pd
import numpy as np



In [2]:
useful_attributes=['Player', 'Team', 'Pos', 'W%', 'KDA','KP', 'GD10', 'XPD10', 'CSD10','DPM']

In [3]:
data_list=[]

player_tier = 5 

In [4]:
year=['15','16','17','18','19','20','21']
season=['Spring','Summer']
file_directory='./data_before/player/'

## 팀 이름 교체 함수

In [5]:
def change_team_name(x):
    if x=='Liiv SANDBOX':
        return 'SANDBOX Gaming'
    if x=='DWG KIA':
        return 'DAMWON Gaming'
    else:
        return x
    
    
def change_season(x):
    if x=='Spring':
        return 1
    elif x=='Summer':
        return 2
    else:
        return -1

In [6]:
def minmax_norm(df_input):
    return (df_input - df_input.min()) / ( df_input.max() - df_input.min())

def normalize(df_input):
    return (df_input - df_input.mean()) / df_input.std()



'''
P(Z>=0.84) = 약 0.2
P(Z>=0.25) = 약 0.4
P(Z>=-0.25) = 약 0.6
P(Z>=-0.84) = 약 0.8
'''
#이 함수에서 x는 정규분포의 Z값이라 가정
def give_level(x):
    if x>=0.84:
        return 1
    elif x>=0.25:
        return 2
    elif x>=-0.25:
        return 3
    elif x>=-0.84:
        return 4
    else:
        return 5


            
        
    

## 전처리 과정

In [7]:

for elem1 in year:
    for elem2 in season:
        
        if elem1=='21' and elem2=='Summer':
            continue

        data=pd.read_csv("{0}{1}{2}Player.csv".format(file_directory,elem1,elem2),usecols=useful_attributes)
        print(elem1,elem2)
        print(data.info())
        
        data['KP']=data['KP'].apply(lambda x:str(x).split('%')[0])
        data['W%']=data['W%'].apply(lambda x:str(x).split('%')[0])
        
        
        if elem1=='20':
            data['Team']=data['Team'].apply(change_team_name)
        
        data=data.astype({'KP':'float64','W%':'float64'})
        
        data['Year']=[elem1]*data.shape[0]
        data['Season']=[elem2]*data.shape[0]
        
        middle = df.loc[df['Pos']=='Middle']
        jungle = df.loc[df['Pos']=='Jungle']
        adc = df.loc[df['Pos']=='ADC']
        support = df.loc[df['Pos']=='Support']
        top = df.loc[df['Pos']=='Top']
        
        gold = minmax_norm(data['GD10'])
        XP =minmax_norm(data['XPD10'])
        CS = minmax_norm(data['CSD10'])
        
        laning = gold + XP + CS
        laning = normalize(laning)
        laning = pd.DataFrame({'Laning':laning})
        
        
        
        laning = laning['Laning'].apply(give_level)
        data = pd.concat([data,laning],axis=1)
        
        KP = normalize(data['KP'])
        KDA = normalize(data['KDA'])
        
        engage = pd.DataFrame({'Engage':KP})
        engage = engage['Engage'].apply(give_level)
        data = pd.concat([data,engage],axis=1)
        
        fight = pd.DataFrame({'Fight':KDA})
        fight = fight['Fight'].apply(give_level)
        data = pd.concat([data,fight],axis=1)
        
        
        
        total_ability = ((laning + engage + fight) / 3)
        
        tier = pd.DataFrame({'Tier':total_ability})
        tier['Tier'] = tier['Tier']=np.floor(tier['Tier'].astype('int64'))
        
        data = pd.concat([data,tier],axis=1)
        
        data = data.drop(columns = ['GD10','XPD10','CSD10'])
    
        data_list.append(data)
        

15 Spring
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  53 non-null     object 
 1   Team    53 non-null     object 
 2   Pos     53 non-null     object 
 3   W%      53 non-null     object 
 4   KDA     53 non-null     float64
 5   KP      53 non-null     object 
 6   GD10    53 non-null     int64  
 7   XPD10   53 non-null     int64  
 8   CSD10   53 non-null     float64
 9   DPM     53 non-null     int64  
dtypes: float64(2), int64(3), object(5)
memory usage: 4.3+ KB
None
15 Summer
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  77 non-null     object 
 1   Team    77 non-null     object 
 2   Pos     77 non-null     object 
 3   W%      77 non-null     object 
 4   KDA     77 non-null     float64
 5  

In [8]:
all_data=pd.concat(data_list,axis=0,ignore_index=True)
print(all_data)

       Player                 Team      Pos    W%  KDA    KP  DPM Year  \
0         Ace       Samsung Galaxy   Middle  40.0  2.1  67.3  478   15   
1    Ambition             CJ Entus   Jungle  63.0  4.0  72.1  229   15   
2        Ares   Incredible Miracle   Jungle   7.0  1.7  79.1  157   15   
3       Arrow           KT Rolster      ADC  45.0  3.8  73.7  438   15   
4        Bang        SK Telecom T1      ADC  72.0  6.4  69.7  547   15   
..        ...                  ...      ...   ...  ...   ...  ...  ...   
901      Vsta  Hanwha Life Esports  Support  59.0  3.0  65.8  177   21   
902  Yaharong         Fredit BRION   Middle  44.0  4.6  73.1  453   21   
903     yoHan  Hanwha Life Esports   Jungle  71.0  3.1  63.3  329   21   
904      Zeus                   T1      Top  63.0  3.1  59.3  421   21   
905      Zzus           KT Rolster  Support  40.0  2.3  66.0   98   21   

     Season  Laning  Engage  Fight  Tier  
0    Spring       3       4      5   4.0  
1    Spring       1      

## 이름 바꾸는 함수

In [9]:
def change_name(x):
    if x=='Ssol':
        return 'SS'
    if x=='Yeongjae':
        return 'YoungJae'
    else:
        return x

In [10]:
all_data['Player']=all_data['Player'].apply(change_name)

In [11]:
all_data=all_data.set_index(['Year','Season'])

In [12]:
all_data.to_csv('./data_after/player.csv')