In [1]:
import numpy as np
import pandas as pd

In [2]:
# FIP계산에 쓰일 계수인 C는 STATIZ사이트에서 cFIP 크롤링해서 사용
import requests
from bs4 import BeautifulSoup

req = requests.get('http://www.statiz.co.kr/constant.php')
html = req.text
soup = BeautifulSoup(html, 'html.parser')
tables = soup.select('body > div.wrapper > div.content-wrapper > div > section.content > div > div.col-md-12.col-xs-12.col-sm-12.col-lg-9 > div > div > div > div.box-body.no-padding.table-responsive > table')
table = tables[0]

table_html=str(table)
table_df_list=pd.read_html(table_html)
table_df = table_df_list[0]

idx = table_df.loc[0:4, ['연도', 'cFIP']]
idx.rename(columns = {'연도':'GYEAR'}, inplace = True)

In [3]:
def add_variables(data, year):
    data = pd.read_csv(data)
    data['H1']= data['HIT']-data['H2']-data['H3']-data['HR']
    
    data['CUM_ER'] = None
    data['CUM_INN2'] = None
    data['CUM_KK'] = None
    data['CUM_BB'] = None
    data['CUM_HIT'] = None
    data['CUM_HR'] = None
    data['CUM_AB'] = None
    data['CUM_SF'] = None
    data['CUM_HP'] = None
    data['CUM_R'] = None
    data['CUM_H1']= None
    data['CUM_H2']= None
    data['CUM_H3']= None
    data['GYEAR']=None
    
    data.loc[data['G_ID'].str.contains('%d' % year), 'GYEAR'] = year
    data = pd.merge(data, idx, on='GYEAR')
    
    data = data.sort_values(by=['P_ID', 'GDAY_DS'])
    data= data.drop('GYEAR', axis=1 )
    
    temp = []
    
    for i in range(len(data['P_ID'].unique())):
        temp.append(list(data['P_ID']).index(data['P_ID'].unique()[i]))
    
    temp.append(len(data))
    
    for i in range(len(data['P_ID'].unique())):
        for j in range(temp[i], temp[i+1]):
            data.iloc[j, 39] = np.cumsum(data.iloc[j, 34])
            data.iloc[j, 40] = np.cumsum(data.iloc[j, 13])
            data.iloc[j, 41] = np.cumsum(data.iloc[j, 28])
            data.iloc[j, 42] = np.cumsum(data.iloc[j, 25])
            data.iloc[j, 43] = np.cumsum(data.iloc[j, 17])
            data.iloc[j, 44] = np.cumsum(data.iloc[j, 20])
            data.iloc[j, 45] = np.cumsum(data.iloc[j, 16])
            data.iloc[j, 46] = np.cumsum(data.iloc[j, 24])
            data.iloc[j, 47] = np.cumsum(data.iloc[j, 27])
            data.iloc[j, 48] = np.cumsum(data.iloc[j, 33])
            data.iloc[j, 49] = np.cumsum(data.iloc[j, 38])
            data.iloc[j, 50] = np.cumsum(data.iloc[j, 18])
            data.iloc[j, 51] = np.cumsum(data.iloc[j, 19])
    
    data = data.sort_index()
    
    data['ERA'] = 9*data['CUM_ER']/(data['CUM_INN2']/3)
    data['K/9'] = 9*data['CUM_KK']/(data['CUM_INN2']/3)
    data['BB/9'] = 9*data['CUM_BB']/(data['CUM_INN2']/3)
    data['K/BB'] = data['CUM_KK']/data['CUM_BB']
    data['WHIP'] = (data['CUM_HIT'] + data['CUM_BB'])/(data['CUM_INN2']/3)
    data['BABIP'] = (data['CUM_HIT'] - data['CUM_HR'])/(data['CUM_AB'] - data['CUM_KK'] - data['CUM_HR'] + data['CUM_SF'])
    data['DICE']= 3.00 + (13*data['CUM_HR'] + 3*(data['CUM_BB'] + data['HP']) - 2*data['CUM_KK'])/(data['CUM_INN2']/3)
    data['FIP'] = data['cFIP'] + (13*data['CUM_HR'] + 3*(data['CUM_BB'] + data['CUM_HP']) - 2*data['CUM_KK'])/(data['CUM_INN2']/3)
    data['kFIP'] = data['cFIP'] + (14* data['CUM_HR'] + 3*(data['CUM_BB'] + data['CUM_HP']) - data['CUM_KK'])/(data['CUM_INN2']/3)
    data['RA9'] = 9 * data['CUM_R']/(data['CUM_INN2']/3)
    data['HR/9'] = 9 * data['CUM_HR']/(data['CUM_INN2']/3)
    data['H/9'] = 9 * data['CUM_HIT']/(data['CUM_INN2']/3)
    data['OAVG'] = data['CUM_HIT']/data['CUM_AB']
    data['OOBP'] = (data['CUM_HIT'] + data['CUM_BB'] + data['CUM_HP'])/(data['CUM_AB'] + data['CUM_BB'] +data['CUM_HP'] + data['CUM_SF'])
    data['OSLG'] = (data['CUM_H1'] + 2*data['CUM_H2'] + 3*data['CUM_H3'] + 4*data['CUM_HR'])/data['CUM_AB']
    data['OOPS'] = data['OOBP'] + data['OSLG']

    data = data.replace([np.nan, np.inf, -np.inf], 0)
    data.to_csv('pitcher_revised_%d.csv' % year, index=False)

In [4]:
add_variables('2020빅콘테스트_스포츠투아이_제공데이터_개인투수_2016.csv', 2016)

In [5]:
add_variables('2020빅콘테스트_스포츠투아이_제공데이터_개인투수_2017.csv', 2017)

In [6]:
add_variables('2020빅콘테스트_스포츠투아이_제공데이터_개인투수_2018.csv', 2018)

In [7]:
add_variables('2020빅콘테스트_스포츠투아이_제공데이터_개인투수_2019.csv', 2019)

In [8]:
add_variables('2020빅콘테스트_스포츠투아이_제공데이터_개인투수_2020.csv', 2020)