Detailed analysis for hitters by using tracking data

In [None]:
import numpy as np
import pandas as pd


## Analysis with "hitter_detailed_stats.csv"

Read hitter data

In [None]:
hitter_data = pd.read_csv('../datasets/hitter_detailed_stats.csv')

### Calculate K%, BB% and HR/AB

In [None]:
hitter_data['삼진 비율'] = (hitter_data['삼진']/hitter_data['타석']).round(3)
hitter_data['볼넷 비율'] = (hitter_data['볼넷']/hitter_data['타석']).round(3)
hitter_data['타석 당 홈런'] = (hitter_data['홈런']/hitter_data['타석']).round(4)
hitter_data.columns

Index(['name', 'id', 'birthday', '연도', '팀', '나이', 'P', 'G', '타석', '타수', '득점',
       '안타', '2타', '3타', '홈런', '루타', '타점', '도루', '도실', '볼넷', '사구', '고4', '삼진',
       '병살', '희타', '희비', '타율', '출루', '장타', 'OPS', 'wOBA', 'wRC+', 'WAR*',
       'WPA', 'BABIP', '삼진율', '볼넷율', '타석 당 홈런', '삼진 비율', '볼넷 비율'],
      dtype='object')

## Calculate BABIP

In [None]:
def calc_BABIP(hit, HR, AB, K, SF ):
    numerator = hit - HR
    denominator = AB - K - HR + SF
    return(numerator/denominator)

In [None]:
hitter_data["BABIP"] = calc_BABIP(hitter_data['안타'],
                                  hitter_data['홈런'],
                                  hitter_data['타석'],
                                  hitter_data['삼진'],
                                  hitter_data['희비']).round(3)

In [None]:
def total_BABIP(df, name, id):
    new_df = df[(df['name']==name)&(df['id']==id)]
    new_df_agg = new_df.groupby(['id', 'name']).sum()[['안타','홈런','타석','삼진','희비']]
    new_df_agg["BABIP"] = calc_BABIP(new_df_agg['안타'],
                                    new_df_agg['홈런'],
                                    new_df_agg['타석'],
                                    new_df_agg['삼진'],
                                    new_df_agg['희비']).round(3)
    return new_df_agg

### function for searching a player

In [None]:
def search_player(df, name, id, year):
    return df[ (df['name']==name)&\
                (df['id']==id) &\
                (df['연도'].isin(year))].reset_index(drop = True)

In [None]:
hitter_data.columns

Index(['name', 'id', 'birthday', '연도', '팀', '나이', 'P', 'G', '타석', '타수', '득점',
       '안타', '2타', '3타', '홈런', '루타', '타점', '도루', '도실', '볼넷', '사구', '고4', '삼진',
       '병살', '희타', '희비', '타율', '출루', '장타', 'OPS', 'wOBA', 'wRC+', 'WAR*',
       'WPA', 'BABIP', '삼진율', '볼넷율', '타석 당 홈런', '삼진 비율', '볼넷 비율'],
      dtype='object')

new players in 2024

In [None]:
new_player_list = [['안치홍', 79608],
                   ['김강민', 71837],
                   ['이재원', 76812]]

### Compare the player's BABIP in total against his records in recent 3 years

In [None]:
babip = total_BABIP(hitter_data, new_player_list[0][0], new_player_list[0][1])['BABIP']
print(f"{new_player_list[0][0]} - BABIP in total: {babip.values}")

search_player(hitter_data, new_player_list[0][0], new_player_list[0][1], range(2021,2024))\
            [['name', 'id', '연도', '나이', 'G', '타석', '삼진 비율', '볼넷 비율',
            '타석 당 홈런', 'OPS', 'wRC+', 'WAR*','BABIP']]

안치홍 - BABIP in total: [0.28]


Unnamed: 0,name,id,연도,나이,G,타석,삼진 비율,볼넷 비율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
0,안치홍,79608,2021,31,119,490,0.118,0.106,0.0204,0.838,124.3,3.45,0.276
1,안치홍,79608,2022,32,132,562,0.093,0.091,0.0249,0.79,117.2,3.24,0.251
2,안치홍,79608,2023,33,121,494,0.107,0.099,0.0162,0.774,119.1,2.85,0.265


In [None]:
print(total_BABIP(hitter_data, new_player_list[1][0], new_player_list[1][1]))
search_player(hitter_data, new_player_list[1][0], new_player_list[1][1], range(2021,2024))\
[['name', 'id', '연도', '나이', 'G', '타석', '삼진 비율', '볼넷 비율',
            '타석 당 홈런', 'OPS', 'wRC+', 'WAR*','BABIP']]

              안타   홈런    타석    삼진  희비  BABIP
id    name                                  
71837 김강민   1470  138  6047  1114  33  0.276


Unnamed: 0,name,id,나이,G,타석,삼진율,볼넷율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
3155,김강민,71837,39,122,260,0.2,0.096,0.0308,0.735,95.1,0.88,0.222
3156,김강민,71837,40,84,202,0.193,0.089,0.0248,0.824,129.5,1.33,0.308
3157,김강민,71837,41,70,166,0.229,0.139,0.012,0.627,79.8,0.16,0.225


In [None]:
print(total_BABIP(hitter_data, new_player_list[2][0], new_player_list[2][1]))
search_player(hitter_data, new_player_list[2][0], new_player_list[2][1], [2021, 2022, 2023])\
[['name', 'id', '연도', '나이', 'G', '타석', '삼진 비율', '볼넷 비율',
            '타석 당 홈런', 'OPS', 'wRC+', 'WAR*','BABIP']]

              안타   홈런    타석   삼진  희비  BABIP
id    name                                 
76812 이재원   1087  108  4471  623  53  0.258


Unnamed: 0,name,id,연도,나이,G,타석,삼진율,볼넷율,타석 당 홈런,OPS,wRC+,WAR*,BABIP
4944,이재원,76812,2021,33,107,313,0.15,0.093,0.0096,0.72,95.2,1.5,0.275
4945,이재원,76812,2022,34,105,279,0.158,0.09,0.0143,0.574,57.8,0.28,0.184
4946,이재원,76812,2023,35,27,48,0.167,0.042,0.0,0.241,-43.3,-0.56,0.098


## Analysis with tracking data

load the data

In [None]:
hitter_tracking = pd.read_csv('../datasets/hitter_tracking_data.csv', index_col=0)
hitter_tracking.columns

Index(['연도', '선수', '팀', '구종', '상대%', '타구 속도', '상하 각도', '타율', '장타율', '땅볼%',
       '라인드라이브%', '뜬공%', '팝플라이%', '강한 타구%', '안타', '안타 타구 속도', '홈런', '홈런 타구 속도',
       '장타', '장타 타구 속도'],
      dtype='object')

Display a player's year-to-year tracking data for the recent 3 years

In [None]:
hitter_tracking[(hitter_tracking['연도'].isin(range(2021,2024)))&(hitter_tracking['선수']=='안치홍')]\
    [['연도', '선수', '타구 속도', '상하 각도', '장타율', '땅볼%', '라인드라이브%', '뜬공%', '팝플라이%',
      '강한 타구%', '홈런', '장타']].sort_values(by='연도').reset_index(drop=True)

Unnamed: 0,연도,선수,타구 속도,상하 각도,장타율,땅볼%,라인드라이브%,뜬공%,팝플라이%,강한 타구%,홈런,장타
0,2021,안치홍,136.6,15.2,0.464,35.7,37.6,16.9,9.9,29.3,10,42
1,2022,안치홍,135.9,17.8,0.44,33.8,31.1,23.2,12.0,24.8,14,43
2,2023,안치홍,134.2,20.7,0.406,33.0,29.0,18.2,19.9,21.9,8,29


In [None]:
player_name = "채은성"
hitter_tracking[(hitter_tracking['연도'].isin(range(2021,2024)))&(hitter_tracking['선수']==player_name)]\
    [['연도', '선수', '타구 속도', '상하 각도', '장타율', '땅볼%', '라인드라이브%', '뜬공%', '팝플라이%',
      '강한 타구%', '홈런', '장타']].sort_values(by='연도').reset_index(drop=True)

Unnamed: 0,연도,선수,타구 속도,상하 각도,장타율,땅볼%,라인드라이브%,뜬공%,팝플라이%,강한 타구%,홈런,장타
0,2021,채은성,137.8,19.4,0.452,32.1,28.6,24.7,14.6,27.9,16,36
1,2022,채은성,137.5,17.6,0.437,32.5,33.0,23.5,11.0,27.8,12,40
2,2023,채은성,138.2,21.7,0.428,25.3,33.8,24.4,16.5,33.0,23,40


In [None]:
player_name = "정은원"
hitter_tracking[(hitter_tracking['연도'].isin(range(2021,2024)))&(hitter_tracking['선수']==player_name)]\
    [['연도', '선수', '타구 속도', '상하 각도', '장타율', '땅볼%', '라인드라이브%', '뜬공%', '팝플라이%',
      '강한 타구%', '홈런', '장타']].sort_values(by='연도').reset_index(drop=True)

Unnamed: 0,연도,선수,타구 속도,상하 각도,장타율,땅볼%,라인드라이브%,뜬공%,팝플라이%,강한 타구%,홈런,장타
0,2021,정은원,130.4,14.5,0.385,35.2,36.9,20.8,7.0,9.6,6,33
1,2022,정은원,128.9,16.7,0.365,31.7,37.1,19.2,12.0,2.1,7,29
2,2023,정은원,130.1,21.3,0.268,31.1,29.9,19.7,19.3,8.7,2,14
