Analyzing hitter data with wRC+ and BABIP

In [1]:
import pandas as pd
import numpy as np

Load data

In [2]:
hitter_data = pd.read_csv('../datasets/hitter_detailed_stats.csv')

In [3]:
hitter_data.head()

Unnamed: 0,name,id,birthday,연도,팀,나이,P,G,타석,타수,...,희타,희비,타율,출루,장타,OPS,wOBA,wRC+,WAR*,WPA
0,차영화,10005,1957-06-27,1982,해태,25,SS,76,323,282,...,5,2,0.259,0.337,0.309,0.645,0.305,82.5,1.63,
1,차영화,10005,1957-06-27,1983,해태,26,SS,82,252,229,...,5,3,0.266,0.308,0.323,0.631,0.291,81.5,1.01,
2,차영화,10005,1957-06-27,1984,해태,27,SS,69,185,162,...,6,1,0.204,0.274,0.235,0.508,0.241,43.4,-0.02,
3,차영화,10005,1957-06-27,1985,해태,28,SS,106,266,238,...,15,0,0.24,0.276,0.269,0.545,0.246,43.5,-0.0,
4,차영화,10005,1957-06-27,1986,해태,29,SS,85,192,176,...,6,0,0.199,0.242,0.222,0.464,0.22,30.8,-0.46,


In [4]:
hitter_data.columns


Index(['name', 'id', 'birthday', '연도', '팀', '나이', 'P', 'G', '타석', '타수', '득점',
       '안타', '2타', '3타', '홈런', '루타', '타점', '도루', '도실', '볼넷', '사구', '고4', '삼진',
       '병살', '희타', '희비', '타율', '출루', '장타', 'OPS', 'wOBA', 'wRC+', 'WAR*',
       'WPA'],
      dtype='object')

# Preprocessing

## Calculate K%, BB% and HR/AB

In [5]:
hitter_data['삼진 비율'] = (hitter_data['삼진']/hitter_data['타석']).round(3)
hitter_data['볼넷 비율'] = (hitter_data['볼넷']/hitter_data['타석']).round(3)
hitter_data['타석 당 홈런'] = (hitter_data['홈런']/hitter_data['타석']).round(4)
hitter_data.columns

Index(['name', 'id', 'birthday', '연도', '팀', '나이', 'P', 'G', '타석', '타수', '득점',
       '안타', '2타', '3타', '홈런', '루타', '타점', '도루', '도실', '볼넷', '사구', '고4', '삼진',
       '병살', '희타', '희비', '타율', '출루', '장타', 'OPS', 'wOBA', 'wRC+', 'WAR*',
       'WPA', '삼진 비율', '볼넷 비율', '타석 당 홈런'],
      dtype='object')

## Calculate BABIP

In [6]:
def calc_BABIP(hit, HR, AB, K, SF ):
    numerator = hit - HR
    denominator = AB - K - HR + SF
    return(numerator/denominator)

In [7]:
hitter_data['BABIP'] = round(calc_BABIP(hitter_data['안타'], hitter_data['홈런'], hitter_data['타수'],
                                        hitter_data['삼진'], hitter_data['희비'] ), 3)

In [8]:
hitter_data.head()

Unnamed: 0,name,id,birthday,연도,팀,나이,P,G,타석,타수,...,장타,OPS,wOBA,wRC+,WAR*,WPA,삼진 비율,볼넷 비율,타석 당 홈런,BABIP
0,차영화,10005,1957-06-27,1982,해태,25,SS,76,323,282,...,0.309,0.645,0.305,82.5,1.63,,0.062,0.096,0.0031,0.274
1,차영화,10005,1957-06-27,1983,해태,26,SS,82,252,229,...,0.323,0.631,0.291,81.5,1.01,,0.067,0.052,0.004,0.28
2,차영화,10005,1957-06-27,1984,해태,27,SS,69,185,162,...,0.235,0.508,0.241,43.4,-0.02,,0.13,0.081,0.0,0.237
3,차영화,10005,1957-06-27,1985,해태,28,SS,106,266,238,...,0.269,0.545,0.246,43.5,-0.0,,0.079,0.041,0.0,0.263
4,차영화,10005,1957-06-27,1986,해태,29,SS,85,192,176,...,0.222,0.464,0.22,30.8,-0.46,,0.047,0.047,0.0,0.21


---
# Analysis

Check Hanwha Eagles hitters

In [9]:
hanwha_hitter_2023 = hitter_data[(hitter_data['팀']=='한화') & (hitter_data['연도']==2023)]
hanwha_hitter_2023

Unnamed: 0,name,id,birthday,연도,팀,나이,P,G,타석,타수,...,장타,OPS,wOBA,wRC+,WAR*,WPA,삼진 비율,볼넷 비율,타석 당 홈런,BABIP
114,박정현,50709,2001-07-27,2023,한화,22,SS,53,113,105,...,0.295,0.513,0.234,34.6,-0.31,-1.52,0.319,0.027,0.0177,0.254
116,강재민,50705,1997-04-03,2023,한화,26,P,1,0,0,...,,,,,0.0,0.0,,,,
119,최인호,50707,2000-01-30,2023,한화,23,LF,41,148,131,...,0.427,0.791,0.364,124.5,1.0,0.33,0.149,0.068,0.0135,0.343
199,장지승,51706,1998-07-04,2023,한화,25,,3,3,3,...,0.0,0.0,0.0,-123.9,-0.07,-0.02,0.0,0.0,0.0,0.0
268,이상혁,52704,2001-09-14,2023,한화,22,CF,7,2,2,...,0.0,0.0,0.0,-123.9,-0.07,0.04,0.5,0.0,0.0,0.0
270,유상빈,52705,2000-05-09,2023,한화,23,DH,3,8,6,...,0.0,0.25,0.186,0.4,-0.08,-0.11,0.125,0.125,0.0,0.0
272,박준영,52731,2003-03-02,2023,한화,20,P,1,0,0,...,,,,,0.0,0.0,,,,
275,권광민,52769,1997-12-12,2023,한화,26,LF,66,81,73,...,0.274,0.499,0.235,36.1,-0.3,-0.84,0.296,0.074,0.0247,0.191
304,오그레디,53712,1992-05-17,2023,한화,31,LF,22,86,80,...,0.163,0.337,0.167,-10.4,-0.9,-1.42,0.465,0.058,0.0,0.244
305,윌리엄스,53716,1993-09-08,2023,한화,30,LF,68,273,258,...,0.403,0.678,0.31,86.0,0.32,0.0,0.231,0.029,0.033,0.286


## wRC+ Analysis

Hanwha Eagles hitters with AB > 100

In [10]:
hanwha_hitter_2023[hanwha_hitter_2023['타석']>100]

Unnamed: 0,name,id,birthday,연도,팀,나이,P,G,타석,타수,...,장타,OPS,wOBA,wRC+,WAR*,WPA,삼진 비율,볼넷 비율,타석 당 홈런,BABIP
114,박정현,50709,2001-07-27,2023,한화,22,SS,53,113,105,...,0.295,0.513,0.234,34.6,-0.31,-1.52,0.319,0.027,0.0177,0.254
119,최인호,50707,2000-01-30,2023,한화,23,LF,41,148,131,...,0.427,0.791,0.364,124.5,1.0,0.33,0.149,0.068,0.0135,0.343
305,윌리엄스,53716,1993-09-08,2023,한화,30,LF,68,273,258,...,0.403,0.678,0.31,86.0,0.32,0.0,0.231,0.029,0.033,0.286
307,문현빈,53764,2004-04-20,2023,한화,19,CF,137,481,428,...,0.362,0.686,0.316,90.8,1.55,-0.21,0.175,0.069,0.0104,0.317
1880,이도윤,65703,1996-10-07,2023,한화,27,SS,106,346,309,...,0.317,0.619,0.282,68.1,0.82,-1.72,0.159,0.052,0.0029,0.304
2029,이진영,66657,1997-07-21,2023,한화,26,RF,121,422,358,...,0.394,0.738,0.343,108.8,1.93,1.58,0.301,0.126,0.0237,0.35
2047,김태연,66704,1997-06-10,2023,한화,26,RF,91,280,245,...,0.363,0.7,0.329,99.2,0.75,0.03,0.211,0.096,0.0143,0.323
2053,박상언,66707,1997-03-03,2023,한화,26,C,86,161,145,...,0.269,0.522,0.245,43.0,-0.23,-1.48,0.236,0.062,0.0062,0.259
2058,장진혁,66706,1993-09-30,2023,한화,30,CF,68,178,162,...,0.265,0.552,0.269,59.3,-0.18,-0.69,0.23,0.084,0.0,0.295
2066,김인환,66715,1994-01-28,2023,한화,29,1B,112,365,325,...,0.338,0.64,0.301,80.2,-0.33,-0.92,0.249,0.096,0.0192,0.287


hitters whose wRC+ > 100 among their AB > 100  
리그 평균보다 공격력이 좋은 타자가 5명 밖에 없다. 심지어 200타석 이상으로 보면 4명.  
한화의 공격력은 그만큼 처참했음을 알 수 있다.


In [11]:
hanwha_hitter_2023[(hanwha_hitter_2023['타석']>100) & (hanwha_hitter_2023['wRC+']>100)]

Unnamed: 0,name,id,birthday,연도,팀,나이,P,G,타석,타수,...,장타,OPS,wOBA,wRC+,WAR*,WPA,삼진 비율,볼넷 비율,타석 당 홈런,BABIP
119,최인호,50707,2000-01-30,2023,한화,23,LF,41,148,131,...,0.427,0.791,0.364,124.5,1.0,0.33,0.149,0.068,0.0135,0.343
2029,이진영,66657,1997-07-21,2023,한화,26,RF,121,422,358,...,0.394,0.738,0.343,108.8,1.93,1.58,0.301,0.126,0.0237,0.35
2528,노시환,69737,2000-12-03,2023,한화,23,3B,131,595,514,...,0.541,0.929,0.418,159.3,6.93,4.25,0.198,0.124,0.0521,0.332
5321,최재훈,78288,1989-08-27,2023,한화,34,C,125,417,327,...,0.294,0.686,0.338,104.7,2.8,0.28,0.115,0.134,0.0024,0.286
5630,채은성,79192,1990-02-06,2023,한화,33,1B,137,596,521,...,0.428,0.779,0.358,118.6,2.64,2.17,0.171,0.087,0.0386,0.286


The number of hitters whose wRC+ > 100 by team

In [12]:
team_2023 = ['KT', 'LG', '키움', '롯데', '두산', '삼성', 'KIA', '한화', 'SSG', 'NC']

In [13]:

hitter_2023 = hitter_data[(hitter_data['연도']==2023) & (hitter_data['wRC+']>100) & (hitter_data['타석']>100)]
num_WRC_player_list = []
for team in team_2023:
    num_WRC_player_list.append(hitter_2023[hitter_2023['팀']==team].shape[0])
num_WRC_player_list

[6, 7, 7, 9, 6, 5, 9, 5, 7, 6]

In [14]:
hitter_2023.to_csv("hitter2023.csv")

## WAR Analysis

The total WAR of Eagles hitters was the poorest among 10 team.

In [16]:
hanwha_hitter_2023[(hanwha_hitter_2023['타석']>100)][['name', 'WAR*']]

Unnamed: 0,name,WAR*
114,박정현,-0.31
119,최인호,1.0
305,윌리엄스,0.32
307,문현빈,1.55
1880,이도윤,0.82
2029,이진영,1.93
2047,김태연,0.75
2053,박상언,-0.23
2058,장진혁,-0.18
2066,김인환,-0.33


In [17]:
hitter_war_sum = sum(hanwha_hitter_2023[(hanwha_hitter_2023['타석']>100)][ 'WAR*'])
print(f"Hanwha hitters' WAR in total: {hitter_war_sum}")

Hanwha hitters' WAR in total: 19.03


In [18]:
hitter_data[(hitter_data['연도']==2023) & (hitter_data['타석']>100)].groupby('팀')['WAR*'].sum().\
                                                                    sort_values(ascending=False)[:10]

팀
LG     30.08
KIA    27.43
NC     24.57
SSG    23.97
두산     22.12
KT     20.81
키움     20.63
롯데     19.40
한화     19.03
삼성     18.27
Name: WAR*, dtype: float64

## BABIP Analysis
Find players whose BABIP was lower than their BABIP on average among Eagles players


In [19]:
hitter_data.columns

Index(['name', 'id', 'birthday', '연도', '팀', '나이', 'P', 'G', '타석', '타수', '득점',
       '안타', '2타', '3타', '홈런', '루타', '타점', '도루', '도실', '볼넷', '사구', '고4', '삼진',
       '병살', '희타', '희비', '타율', '출루', '장타', 'OPS', 'wOBA', 'wRC+', 'WAR*',
       'WPA', '삼진 비율', '볼넷 비율', '타석 당 홈런', 'BABIP'],
      dtype='object')

Extract Eagles hitters in 2023

In [20]:
hanwha_hitter = hitter_data[(hitter_data['name'].isin(hanwha_hitter_2023['name'])) & (hitter_data['id'].isin(hanwha_hitter_2023['id']))]

Calculate balls in play

In [21]:
hanwha_hitter_sum = hanwha_hitter.groupby(['id', 'name']).sum()[['안타','홈런','타수','삼진','희비']]
hanwha_hitter_sum.index = hanwha_hitter_sum.index.droplevel(0)
hanwha_hitter_sum

Unnamed: 0_level_0,안타,홈런,타수,삼진,희비
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
강재민,0,0,0,0,0
최인호,97,6,394,99,5
박정현,107,6,478,141,1
장지승,11,1,64,24,1
이상혁,0,0,2,1,0
유상빈,15,0,52,12,1
박준영,0,0,0,0,0
권광민,27,2,144,54,2
오그레디,10,0,80,40,1
윌리엄스,63,9,258,63,3


Calculate BABIP in total

In [22]:
hanwha_hitter_sum['BABIP'] = round(calc_BABIP(hanwha_hitter_sum['안타'],
                                              hanwha_hitter_sum['홈런'],
                                              hanwha_hitter_sum['타수'],
                                              hanwha_hitter_sum['삼진'],
                                              hanwha_hitter_sum['희비'] ), 3)
hanwha_hitter_sum["balls_in_play"] = hanwha_hitter_sum['타수'] \
                                    -hanwha_hitter_sum['홈런'] \
                                    -hanwha_hitter_sum['삼진'] \
                                    +hanwha_hitter_sum['희비']
hanwha_hitter_sum

Unnamed: 0_level_0,안타,홈런,타수,삼진,희비,BABIP,balls_in_play
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
강재민,0,0,0,0,0,,0
최인호,97,6,394,99,5,0.31,294
박정현,107,6,478,141,1,0.304,332
장지승,11,1,64,24,1,0.25,40
이상혁,0,0,2,1,0,0.0,1
유상빈,15,0,52,12,1,0.366,41
박준영,0,0,0,0,0,,0
권광민,27,2,144,54,2,0.278,90
오그레디,10,0,80,40,1,0.244,41
윌리엄스,63,9,258,63,3,0.286,189


Criterion of meaningfull AB values  
https://yagongso.com/?p=4405

In [23]:
hanwha_hitter_sum[hanwha_hitter_sum['balls_in_play']>820]

Unnamed: 0_level_0,안타,홈런,타수,삼진,희비,BABIP,balls_in_play
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
하주석,727,48,2754,727,10,0.341,1989
노수광,813,35,3021,684,20,0.335,2322
정은원,627,31,2410,479,15,0.311,1915
노시환,487,68,1851,508,13,0.325,1288
이명기,1245,30,4101,619,25,0.349,3477
최재훈,779,26,3023,498,19,0.299,2518
오선진,629,18,2612,509,14,0.291,2099
채은성,1129,119,3857,718,58,0.328,3078


In [28]:
hanwha_hitter_2023_summary = hanwha_hitter_2023[['name', 'id', 'G', '타석', '안타', '홈런', '타점', '삼진 비율', '볼넷 비율',
                                                 '타석 당 홈런', 'OPS', 'wRC+', 'WAR*','BABIP']]

In [29]:
hanwha_hitter_2023_summary

Unnamed: 0,name,id,G,타석,안타,홈런,타점,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
114,박정현,50709,53,113,19,2,4,0.319,0.027,0.0177,0.513,34.6,-0.31,0.254
116,강재민,50705,1,0,0,0,0,,,,,,0.0,
119,최인호,50707,41,148,39,2,11,0.149,0.068,0.0135,0.791,124.5,1.0,0.343
199,장지승,51706,3,3,0,0,0,0.0,0.0,0.0,0.0,-123.9,-0.07,0.0
268,이상혁,52704,7,2,0,0,0,0.5,0.0,0.0,0.0,-123.9,-0.07,0.0
270,유상빈,52705,3,8,0,0,0,0.125,0.125,0.0,0.25,0.4,-0.08,0.0
272,박준영,52731,1,0,0,0,0,,,,,,0.0,
275,권광민,52769,66,81,11,2,9,0.296,0.074,0.0247,0.499,36.1,-0.3,0.191
304,오그레디,53712,22,86,10,0,8,0.465,0.058,0.0,0.337,-10.4,-0.9,0.244
305,윌리엄스,53716,68,273,63,9,45,0.231,0.029,0.033,0.678,86.0,0.32,0.286


In [30]:
hanwha_hitter_sum[hanwha_hitter_sum['balls_in_play']>820]

Unnamed: 0_level_0,안타,홈런,타수,삼진,희비,BABIP,balls_in_play
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
하주석,727,48,2754,727,10,0.341,1989
노수광,813,35,3021,684,20,0.335,2322
정은원,627,31,2410,479,15,0.311,1915
노시환,487,68,1851,508,13,0.325,1288
이명기,1245,30,4101,619,25,0.349,3477
최재훈,779,26,3023,498,19,0.299,2518
오선진,629,18,2612,509,14,0.291,2099
채은성,1129,119,3857,718,58,0.328,3078


Compare the player's BABIP in total against his records in recent 3 years

In [68]:
for hitter_name in hanwha_hitter_sum[hanwha_hitter_sum['balls_in_play']>820].index:
    if hitter_name in hanwha_hitter_2023_summary[hanwha_hitter_2023_summary['타석']>100]['name'].to_list():
        print(f"{hitter_name} - BABIP in total: {hanwha_hitter_sum[hanwha_hitter_sum.index == hitter_name].loc[hitter_name, 'BABIP']}")
        print("record in recent 3 seasons")
        display(hitter_data[(hitter_data['name']==hitter_name)&(hitter_data['연도'].isin(range(2021, 2024)))]\
            [['name', 'id', '연도', '나이', 'G', '타석', '삼진 비율', '볼넷 비율',
            '타석 당 홈런', 'OPS', 'wRC+', 'WAR*','BABIP']].reset_index(drop = True))
        print("-"*100)
        

정은원 - BABIP in total: 0.311
record in recent 3 seasons


Unnamed: 0,name,id,연도,나이,G,타석,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
0,정은원,68743,2021,21,139,608,0.173,0.173,0.0099,0.791,126.6,4.46,0.346
1,정은원,68743,2022,22,140,601,0.181,0.141,0.0133,0.749,118.2,3.7,0.335
2,정은원,68743,2023,23,122,459,0.159,0.135,0.0044,0.601,76.8,0.84,0.268


----------------------------------------------------------------------------------------------------
노시환 - BABIP in total: 0.325
record in recent 3 seasons


Unnamed: 0,name,id,연도,나이,G,타석,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
0,노시환,69737,2021,21,107,458,0.234,0.159,0.0393,0.852,131.8,3.75,0.328
1,노시환,69737,2022,22,115,490,0.194,0.098,0.0122,0.738,112.5,2.65,0.344
2,노시환,69737,2023,23,131,595,0.198,0.124,0.0521,0.929,159.3,6.93,0.332


----------------------------------------------------------------------------------------------------
최재훈 - BABIP in total: 0.299
record in recent 3 seasons


Unnamed: 0,name,id,연도,나이,G,타석,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
0,최재훈,78288,2021,32,116,467,0.146,0.152,0.015,0.791,124.6,3.67,0.317
1,최재훈,78288,2022,33,114,437,0.149,0.101,0.0114,0.641,87.4,1.78,0.257
2,최재훈,78288,2023,34,125,417,0.115,0.134,0.0024,0.686,104.7,2.8,0.286


----------------------------------------------------------------------------------------------------
오선진 - BABIP in total: 0.291
record in recent 3 seasons


Unnamed: 0,name,id,연도,나이,G,타석,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
0,오선진,78756,2021,32,23,46,0.174,0.065,0.0,0.497,41.8,-0.08,0.265
1,오선진,78756,2022,33,100,306,0.167,0.059,0.0098,0.667,79.9,0.53,0.33
2,오선진,78756,2023,34,90,199,0.171,0.121,0.0,0.609,79.8,0.46,0.286


----------------------------------------------------------------------------------------------------
채은성 - BABIP in total: 0.328
record in recent 3 seasons


Unnamed: 0,name,id,연도,나이,G,타석,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
0,채은성,79192,2021,31,110,448,0.179,0.085,0.0357,0.812,128.2,2.86,0.305
1,채은성,79192,2022,32,126,526,0.167,0.051,0.0228,0.79,127.3,2.89,0.333
2,채은성,79192,2023,33,137,596,0.171,0.087,0.0386,0.779,118.6,2.64,0.286


----------------------------------------------------------------------------------------------------


Conclusion  
반등 가능성 있는 타자: 정은원, 최재훈, 오선진, 채은성  
특히 채은성은 통산 BABIP과 지난 시즌 BABIP이 0.04가량 차이나므로 지난 시즌이 상당히 불운한 시즌이었다고 볼 수 있다.