In [1]:
import numpy as np
import pandas as pd

In [2]:
private_pitcher = pd.read_csv('private_pitcher_report_xgb.csv')
private_batter = pd.read_csv('private_batter_report_rf.csv')

In [3]:
private_pitcher.head()

Unnamed: 0.1,Unnamed: 0,T_ID,P_ID,TB_SC,INN2,PA-AB,H1,H2,H3,HR,SB_SR,WP,BABIP,KK9,BB9,ER
0,0,OB,77263,B,2.77,0.72,0.81,0.21,0.08,0.01,0.01,0.03,0.37,8.28,10.23,0.31
1,1,NC,63959,B,2.09,0.43,0.45,0.05,0.0,0.01,0.0,0.0,0.26,6.03,7.02,-0.05
2,2,NC,61295,T,3.26,0.52,0.64,0.08,0.0,0.26,0.05,0.0,0.2,6.17,3.99,0.01
3,3,SK,60841,T,15.77,3.26,4.08,0.74,0.11,0.43,0.63,0.08,0.33,8.07,6.16,1.23
4,4,LT,64021,T,14.93,2.5,3.96,0.91,0.05,0.69,0.36,0.66,0.31,6.81,4.59,3.36


In [4]:
private_batter.head()

Unnamed: 0.1,Unnamed: 0,T_ID,P_ID,TB_SC,PA-AB,AB,RUN,RBI,SH+SF,KK,SB_trial,BABIP,HIT
0,0,NC,62907,B,0.37,4.01,0.73,0.45,0.08,0.38,0.07,0.35,1.0
1,1,NC,62934,B,0.14,2.08,0.31,0.33,0.03,0.37,0.26,0.15,1.0
2,2,HT,64646,T,0.21,3.74,0.61,0.28,0.03,0.46,0.14,0.31,1.0
3,3,NC,78122,B,0.14,1.58,0.19,0.26,0.05,0.23,0.0,0.24,1.0
4,4,KT,64006,T,0.2,3.07,0.24,0.34,0.07,0.82,0.2,0.17,1.0


## 타율(AVG)

In [5]:
hit_by_team = private_batter['HIT'].groupby(private_batter['T_ID']).sum()

hit_by_team

T_ID
HH    18.00
HT    10.98
KT     9.00
LG     9.03
LT    12.98
NC    14.00
OB     6.00
SK    10.00
SS    18.00
WO    14.00
Name: HIT, dtype: float64

In [6]:
ab_by_team = private_batter['AB'].groupby(private_batter['T_ID']).sum()

ab_by_team

T_ID
HH    37.16
HT    26.82
KT    23.77
LG    21.92
LT    36.63
NC    36.05
OB    11.94
SK    27.52
SS    40.63
WO    32.31
Name: AB, dtype: float64

In [7]:
avg_by_team = hit_by_team / ab_by_team

avg_by_team

T_ID
HH    0.484392
HT    0.409396
KT    0.378629
LG    0.411953
LT    0.354354
NC    0.388350
OB    0.502513
SK    0.363372
SS    0.443022
WO    0.433302
dtype: float64

## 방어율(ERA)

In [8]:
er_by_team = private_pitcher['ER'].groupby(private_pitcher['T_ID']).sum()

er_by_team

T_ID
HH    16.18
HT    12.67
KT    26.89
LG     9.56
LT    17.74
NC    11.26
OB     9.99
SK    15.04
SS    18.35
WO    16.56
Name: ER, dtype: float64

In [9]:
inning_by_team = private_pitcher['INN2'].groupby(private_pitcher['T_ID']).sum() / 3

inning_by_team

T_ID
HH    35.293333
HT    38.790000
KT    46.303333
LG    33.503333
LT    38.440000
NC    32.926667
OB    35.440000
SK    40.913333
SS    37.496667
WO    47.540000
Name: INN2, dtype: float64

In [10]:
era_by_team = er_by_team / inning_by_team * 9

era_by_team

T_ID
HH    4.125992
HT    2.939675
KT    5.226622
LG    2.568103
LT    4.153486
NC    3.077749
OB    2.536964
SK    3.308457
SS    4.404392
WO    3.135044
dtype: float64

## 승률(피타고리안)

* 피타고리안 승률 = $\frac {득점^2} {득점^2 + 실점^2}$<br>
<br>
득점 = RUN<br>
실점 = R(제거했으니 ER로 대체)

In [11]:
run_by_team = private_batter['RUN'].groupby(private_batter['T_ID']).sum()

run_by_team

T_ID
HH    3.40
HT    3.99
KT    3.76
LG    3.95
LT    4.26
NC    5.06
OB    2.09
SK    3.59
SS    5.64
WO    4.18
Name: RUN, dtype: float64

In [12]:
wr_by_team = (run_by_team ** 2) / ((run_by_team ** 2) + (er_by_team ** 2))

wr_by_team

T_ID
HH    0.042290
HT    0.090225
KT    0.019177
LG    0.145823
LT    0.054521
NC    0.168012
OB    0.041933
SK    0.053905
SS    0.086314
WO    0.059897
dtype: float64

## 팀별 타율, 방어율, 승률

In [13]:
team_data = pd.DataFrame({'타율': avg_by_team,
                          '방어율': era_by_team,
                          '승률': wr_by_team})

team_data

Unnamed: 0_level_0,타율,방어율,승률
T_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HH,0.484392,4.125992,0.04229
HT,0.409396,2.939675,0.090225
KT,0.378629,5.226622,0.019177
LG,0.411953,2.568103,0.145823
LT,0.354354,4.153486,0.054521
NC,0.38835,3.077749,0.168012
OB,0.502513,2.536964,0.041933
SK,0.363372,3.308457,0.053905
SS,0.443022,4.404392,0.086314
WO,0.433302,3.135044,0.059897


In [14]:
wr_by_team.sum()

0.7620978858490889