In [120]:
from pybaseball import statcast_batter
from pybaseball import batting_stats_bref
from pybaseball import retrosheet
from pybaseball import playerid_lookup
from pybaseball.lahman import *
from pybaseball import playerid_reverse_lookup
import pandas as pd

In [97]:
download_lahman()

In [98]:
batting = batting()

In [99]:
recent_bats = batting[batting['yearID'] > 2000]

In [100]:
career_totals = recent_bats.groupby('playerID').sum()
career_totals.reset_index()

# Limit to only players with 100+ AB
career_totals = career_totals[career_totals['AB'] > 100]

# Set career stat columns
# Batting Average
career_totals['BA'] = career_totals['H'] / career_totals['AB']
# 2B/AB
career_totals['2B/AB'] = career_totals['2B'] / career_totals['AB']
# 3B/AB
career_totals['3B/AB'] = career_totals['3B'] / career_totals['AB']
# HR/AB
career_totals['HR/AB'] = career_totals['HR'] / career_totals['AB']
# RBI/G
career_totals['RBI/G'] = career_totals['RBI'] / career_totals['G']
# R/G
career_totals['R/G'] = career_totals['R'] / career_totals['G']
# BB/G
career_totals['BB/G'] = career_totals['BB'] / career_totals['G']
# SB/G
career_totals['SB/G'] = career_totals['SB'] / career_totals['G']
# HBP/G
career_totals['HBP/G'] = career_totals['HBP'] / career_totals['G']




In [101]:
# Drop columns we don't need
career_totals = career_totals.iloc[:,19:]
career_totals = career_totals.reset_index()

In [102]:
data = pd.read_csv('../meana2020.txt', header=None)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2424 entries, 0 to 2423
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       2424 non-null   object
 1   1       2424 non-null   object
 2   2       2424 non-null   object
 3   3       2424 non-null   object
 4   4       2424 non-null   object
 5   5       798 non-null    object
 6   6       523 non-null    object
 7   7       265 non-null    object
 8   8       2424 non-null   int64 
 9   9       2424 non-null   int64 
 10  10      2424 non-null   int64 
 11  11      2424 non-null   int64 
 12  12      2424 non-null   int64 
 13  13      2424 non-null   object
 14  14      2424 non-null   object
 15  15      2424 non-null   object
dtypes: int64(5), object(11)
memory usage: 303.1+ KB


In [103]:
columns = {0:'game_id', 1:'batter', 2:'batter_hand', 3:'pitcher', 4:'pitcher_hand',
           5:'run_first', 6:'run_second', 7:'run_third', 8:'result', 9:'rbi',
           10:'first_dest', 11:'second_dest', 12:'third_dest',13:'sb_first',
           14:'sb_second', 15:'sb_third'}

new_data = data.rename(columns=columns)

In [118]:

def calc_runs(game_df, batter, hrs):
    runs = hrs
    runs += game_df[(game_df['run_first'] == batter) & (game_df['first_dest'].isin([4,5,6]))].shape[0]
    runs += game_df[(game_df['run_second'] == batter) & (game_df['second_dest'].isin([4,5,6]))].shape[0]
    runs += game_df[(game_df['run_third'] == batter) & (game_df['third_dest'].isin([4,5,6]))].shape[0]
    return runs

def calc_sb(game_df, batter):
    sb = 0 
    sb += game_df[(game_df['run_first'] == batter) & (game_df['sb_first'] == 'T')].shape[0]
    sb += game_df[(game_df['run_second'] == batter) & (game_df['sb_second'] == 'T')].shape[0]
    sb += game_df[(game_df['run_third'] == batter) & (game_df['sb_third'] == 'T')].shape[0]
    return sb

In [119]:
games = new_data['game_id'].unique()
for game in games:
    current_game = new_data[new_data['game_id'] == game]
    for batter in current_game['batter'].unique():
        current_batter = current_game[current_game['batter'] == batter]
        singles = current_batter[current_batter['result'] == 20].shape[0]
        doubles = current_batter[current_batter['result'] == 21].shape[0]
        triples = current_batter[current_batter['result'] == 22].shape[0]
        hrs = current_batter[current_batter['result'] == 23].shape[0] 
        rbis = current_batter['rbi'].sum()
        runs = calc_runs(current_game, batter, hrs)
        walks = current_batter[current_batter['result'] == 14].shape[0]
        sb = calc_sb(current_game, batter)
        hbp = current_batter[current_batter['result'] == 16].shape[0]
        fantasy_points = 3*singles + 6*doubles + 9*triples + 12*hrs + 3.5*rbis + 3.2*runs + 3*walks + 6*sb + 3*hbp
        
        # Convert name to that used by Baseball-Reference
        bbref_name = playerid_reverse_lookup(['semim001'], key_type='retro')['key_bbref']
        career_nums = career_totals['playerID'] == bbref_name
        
        
  

semim001 1
laurr001 0
olsom001 0
chapm001 0
fletd002 0
lastt001 0
troum001 0
renda001 0
ohtas001 1
pujoa001 0
canhm001 0
grosr001 0
davik003 0
kempt001 0
allea001 0
castj006 0
uptoj001 0
goodb001 0
rengl001 0
machv001 0


In [124]:
playerid_reverse_lookup(['semim001'], key_type='retro')

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last
0,semien,marcus,543760,semim001,semiema01,12533,2013.0,2021.0


In [127]:
career_totals[career_totals['playerID'] == 'semiema01']


Unnamed: 0,playerID,BA,2B/AB,3B/AB,HR/AB,RBI/G,R/G,BB/G,SB/G,HBP/G
1768,semiema01,0.254133,0.052051,0.006736,0.035211,0.44289,0.544289,0.379953,0.076923,0.008159
