In [1]:
import numpy as np
import pandas as pd

In [2]:
private_batter = pd.read_csv('private_batter_report.csv')
private_pitcher = pd.read_csv('private_pitcher_report.csv')

In [3]:
private_batter.head()

Unnamed: 0.1,Unnamed: 0,T_ID,P_ID,TB_SC,PA-AB,AB,RUN,RBI,SH+SF,KK,SB_trial,BABIP,HIT
0,0,NC,62907,B,0.37,4.01,0.73,0.45,0.08,0.38,0.07,0.35,0.99
1,1,NC,62934,B,0.14,2.08,0.31,0.33,0.03,0.37,0.26,0.15,0.91
2,2,HT,64646,T,0.21,3.74,0.61,0.28,0.03,0.46,0.14,0.31,0.99
3,3,NC,78122,B,0.14,1.58,0.19,0.26,0.05,0.23,0.0,0.24,0.97
4,4,KT,64006,T,0.2,3.07,0.24,0.34,0.07,0.82,0.2,0.17,0.97


In [4]:
private_pitcher.head()

Unnamed: 0.1,Unnamed: 0,T_ID,P_ID,TB_SC,INN2,BF,PA-AB,AB,HIT,H1,...,H3,HR,SB_SR,KK,WP,SLG,BABIP,KK9,BB9,ER
0,0,OB,77263,B,2.77,18.24,0.72,3.78,1.12,0.81,...,0.08,0.01,0.01,0.98,0.03,0.3,0.37,8.28,10.23,0.22
1,1,NC,63959,B,2.09,12.21,0.43,2.55,0.51,0.45,...,0.0,0.01,0.0,0.43,0.0,0.06,0.26,6.03,7.02,-0.1
2,2,NC,61295,T,3.26,16.05,0.52,3.9,0.98,0.64,...,0.0,0.26,0.05,0.73,0.0,0.34,0.2,6.17,3.99,0.02
3,3,SK,60841,T,15.77,94.08,3.26,20.18,5.36,4.08,...,0.11,0.43,0.63,4.85,0.08,1.28,0.33,8.07,6.16,1.5
4,4,LT,64021,T,14.93,86.98,2.5,20.1,5.62,3.96,...,0.05,0.69,0.36,3.8,0.66,1.66,0.31,6.81,4.59,3.26


## 타율(AVG)

In [5]:
hit_by_team = private_batter['HIT'].groupby(private_batter['T_ID']).sum()

hit_by_team

T_ID
HH    17.33
HT    10.68
KT    10.54
LG    11.16
LT    14.40
NC    13.51
OB     6.41
SK    11.50
SS    17.37
WO    12.40
Name: HIT, dtype: float64

In [6]:
ab_by_team = private_batter['AB'].groupby(private_batter['T_ID']).sum()

ab_by_team

T_ID
HH    37.16
HT    26.82
KT    23.77
LG    21.92
LT    36.63
NC    36.05
OB    11.94
SK    27.52
SS    40.63
WO    32.31
Name: AB, dtype: float64

In [7]:
avg_by_team = hit_by_team / ab_by_team

avg_by_team

T_ID
HH    0.466362
HT    0.398210
KT    0.443416
LG    0.509124
LT    0.393120
NC    0.374757
OB    0.536851
SK    0.417878
SS    0.427517
WO    0.383782
dtype: float64

## 방어율(ERA)

In [8]:
er_by_team = private_pitcher['ER'].groupby(private_pitcher['T_ID']).sum()

er_by_team

T_ID
HH    17.59
HT    14.75
KT    27.28
LG    12.44
LT    17.52
NC    12.15
OB    12.29
SK    17.44
SS    18.58
WO    15.84
Name: ER, dtype: float64

In [9]:
inning_by_team = private_pitcher['INN2'].groupby(private_pitcher['T_ID']).sum()

inning_by_team

T_ID
HH    105.88
HT    116.37
KT    138.91
LG    100.51
LT    115.32
NC     98.78
OB    106.32
SK    122.74
SS    112.49
WO    142.62
Name: INN2, dtype: float64

In [10]:
era_by_team = er_by_team / inning_by_team

era_by_team

T_ID
HH    0.166131
HT    0.126751
KT    0.196386
LG    0.123769
LT    0.151925
NC    0.123001
OB    0.115594
SK    0.142089
SS    0.165170
WO    0.111064
dtype: float64

## 승률(피타고리안)

* 피타고리안 승률 = $\frac {득점^2} {득점^2 + 실점^2}$<br>
<br>
득점 = RUN<br>
실점 = R(제거했으니 ER로 대체)

In [11]:
run_by_team = private_batter['RUN'].groupby(private_batter['T_ID']).sum()

run_by_team

T_ID
HH    3.40
HT    3.99
KT    3.76
LG    3.95
LT    4.26
NC    5.06
OB    2.09
SK    3.59
SS    5.64
WO    4.18
Name: RUN, dtype: float64

In [12]:
wr_by_team = (run_by_team ** 2) / ((run_by_team ** 2) + (er_by_team ** 2))

wr_by_team

T_ID
HH    0.036016
HT    0.068185
KT    0.018643
LG    0.091588
LT    0.055822
NC    0.147805
OB    0.028107
SK    0.040651
SS    0.084370
WO    0.065104
dtype: float64

## 팀별 타율, 방어율, 승률

In [13]:
team_data = pd.DataFrame({'타율': avg_by_team,
                          '방어율': era_by_team,
                          '승률': wr_by_team})

team_data

Unnamed: 0_level_0,타율,방어율,승률
T_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HH,0.466362,0.166131,0.036016
HT,0.39821,0.126751,0.068185
KT,0.443416,0.196386,0.018643
LG,0.509124,0.123769,0.091588
LT,0.39312,0.151925,0.055822
NC,0.374757,0.123001,0.147805
OB,0.536851,0.115594,0.028107
SK,0.417878,0.142089,0.040651
SS,0.427517,0.16517,0.08437
WO,0.383782,0.111064,0.065104


## 시각화

In [14]:
import matplotlib.pyplot as plt ; plt.rcParams['font.family']='NanumGothic' ; plt.rc('axes', unicode_minus=False)
import seaborn as sns
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
import ipywidgets as widgets
from ipywidgets import interact

In [15]:
output_notebook()

### 팀 당 선수별 타율

In [16]:
player_avg_team = pd.DataFrame({'P_ID': private_batter['P_ID'],
                                'AVG': private_batter['HIT'] / private_batter['AB'],
                                'T_ID': private_batter['T_ID']})

player_avg_team.head()

Unnamed: 0,P_ID,AVG,T_ID
0,62907,0.246883,NC
1,62934,0.4375,NC
2,64646,0.264706,HT
3,78122,0.613924,NC
4,64006,0.315961,KT


In [17]:
player_avg_team.isnull().sum()

P_ID     0
AVG     31
T_ID     0
dtype: int64

In [18]:
player_avg_team = player_avg_team.dropna()

In [19]:
add_team = pd.DataFrame({'P_ID': '팀 전체',
                         'AVG': team_data['타율'],
                         'T_ID': team_data.index})

In [20]:
player_avg_team = player_avg_team.append(add_team)

In [21]:
t = list(player_avg_team['T_ID'].unique())

In [22]:
sns.set(font='NanumGothic', font_scale=1.3, style='whitegrid')
@interact(team = t)

def update(team):
    avg = player_avg_team[player_avg_team['T_ID']==team]
    a = sns.catplot(x='P_ID', y='AVG', data=avg, kind='bar',
                    height=5, aspect=3)
    plt.show()

interactive(children=(Dropdown(description='team', options=('NC', 'HT', 'KT', 'WO', 'SS', 'OB', 'SK', 'LG', 'H…

### 팀 당 선수별 방어율

In [23]:
player_era_team = pd.DataFrame({'P_ID': private_pitcher['P_ID'],
                                'ERA': private_pitcher['ER'] / private_pitcher['INN2'],
                                'T_ID': private_pitcher['T_ID']})

player_era_team.head()

Unnamed: 0,P_ID,ERA,T_ID
0,77263,0.079422,OB
1,63959,-0.047847,NC
2,61295,0.006135,NC
3,60841,0.095117,SK
4,64021,0.218352,LT


In [24]:
player_era_team.isnull().sum()

P_ID    0
ERA     0
T_ID    0
dtype: int64

In [25]:
add_team = pd.DataFrame({'P_ID': '팀 전체',
                         'ERA': team_data['방어율'],
                         'T_ID': team_data.index})

In [26]:
player_era_team = player_era_team.append(add_team)

In [27]:
t = list(player_era_team['T_ID'].unique())

In [28]:
sns.set(font='NanumGothic', font_scale=1.3, style='whitegrid')
@interact(team = t)

def update(team):
    era = player_era_team[player_era_team['T_ID']==team]
    a = sns.catplot(x='P_ID', y='ERA', data=era, kind='bar',
                    height=5, aspect=3)
    plt.show()

interactive(children=(Dropdown(description='team', options=('OB', 'NC', 'SK', 'LT', 'SS', 'WO', 'HH', 'KT', 'L…