In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import statsapi
import datetime
import string
import json
import os

In [None]:
# Comprehensive matchups file
matchups = pd.read_csv("data\\matchups\\all_matchups.csv", index_col=0)

batter_list = list(matchups['batter_id'].unique())
batter_list.sort()

pitcher_list = list(matchups['pitcher_id'].unique())
pitcher_list.sort()

In [None]:
# Player/Team lookup files
player_search = pd.read_csv("data\\player_search.csv", encoding = "ISO-8859-1")
team_search = pd.read_csv("data\\team_search.csv")

# Constants
wOBA_constants = pd.read_csv("data\\wOBA_constants.csv", index_col=0)
park_factors = pd.read_csv("data\\park_factors.csv", index_col=0)
league_averages = pd.read_csv("data\\league_averages.csv", index_col=0)

In [None]:
# Player hands dictionaries
with open('data\\batter_hands.json') as f:
    batter_hands = json.load(f)
with open('data\\pitcher_hands.json') as f:
    pitcher_hands = json.load(f)

# Player platoon stats dictionary
with open('data\\batter_platoons.json') as f:
    batter_platoons = json.load(f)
with open('data\\pitcher_platoons.json') as f:
    pitcher_platoons = json.load(f)

In [None]:
def add_platoon_stats(row):
    try:
        row['h_platoon_ab'] = batter_platoons[f"{row['batter_id']}"][row['pitcher_hand']][0]
    except:
        row['h_platoon_ab'] = 0

    try:
        row['h_platoon_wOBA'] = batter_platoons[f"{row['batter_id']}"][row['pitcher_hand']][1]
    except:
        row['h_platoon_wOBA'] = 0
        
    try:
        row['p_platoon_ab'] = pitcher_platoons[f"{row['pitcher_id']}"][row['batter_hand']][0]
    except:
        row['p_platoon_ab'] = 0

    try:
        row['p_platoon_wOBA'] = pitcher_platoons[f"{row['pitcher_id']}"][row['batter_hand']][1]
    except: 
        row['p_platoon_wOBA'] = 0

    return(row)


matchups[['h_platoon_ab','h_platoon_wOBA','p_platoon_ab','p_platoon_wOBA']] = 0
matchups = matchups.apply(add_platoon_stats, axis=1)

In [None]:
def add_h2h_stats(row):
    h2h_stats = matchups.loc[(matchups['batter_id'] == row['batter_id']) & (matchups['pitcher_id'] == row['pitcher_id'])]

    num_pa = len(h2h_stats)
    outcomes = h2h_stats.groupby(['outcome']).size()
    indexes = outcomes.index.tolist()

    singles = 0
    for event in ['h_S1','h_S2','h_S3','h_S4','h_S5','h_S6','h_S7','h_S8','h_S9']:
        if event in indexes:
            singles += int(outcomes[event])

    doubles = 0
    for event in ['h_D1','h_D2','h_D3','h_D4','h_D5','h_D6','h_D7','h_D8','h_D9']:
        if event in indexes:
            doubles += int(outcomes[event])

    triples = 0
    for event in ['h_T1','h_T2','h_T3','h_T4','h_T5','h_T6','h_T7','h_T8','h_T9']:
        if event in indexes:
            triples += int(outcomes[event])

    home_runs = 0
    if 'h_HR' in indexes:
        home_runs += int(outcomes['h_HR'])

    bb = 0
    if 'p_W' in indexes:
        bb += int(outcomes['p_W'])

    ibb = 0
    if 'IW' in indexes:
        ibb += int(outcomes['IW'])

    k = 0
    if 'p_K' in indexes:
        k += int(outcomes['p_K'])

    hbp = 0
    if 'HP' in indexes:
        hbp += int(outcomes['HP'])

    at_bats = outcomes.sum()-bb-hbp

    wOBA_row = wOBA_constants.loc['Average']
    wOBA_num = (wOBA_row['wBB']*bb)+(wOBA_row['wHBP']*hbp)+(wOBA_row['w1B']*singles)+(wOBA_row['w2B']*doubles)+(wOBA_row['w3B']*triples)+(wOBA_row['wHR']*home_runs)
    wOBA_denom = at_bats + (bb - ibb) + hbp

    if wOBA_denom:
        wOBA = round(wOBA_num/wOBA_denom, 3)
    else:
        wOBA = 0

    row['h2h_wOBA']  = wOBA
    row['h2h_pa']    = num_pa
    row['h2h_1B_pa'] = round(singles/num_pa, 3)
    row['h2h_2B_pa'] = round(doubles/num_pa, 3)
    row['h2h_3B_pa'] = round(triples/num_pa, 3)
    row['h2h_HR_pa'] = round(home_runs/num_pa, 3)
    row['h2h_BB_pa'] = round(bb/num_pa, 3)
    row['h2h_K_pa']  = round(k/num_pa, 3)

    return(row)


matchups = matchups.apply(add_h2h_stats, axis=1)
matchups.to_csv("data\\matchups\\all_matchups_test.csv")