# Position Player fWAR
___

           {    (Batting Runs + Base Running Runs + Fielding Runs +        }
    fWAR = {  Positional Adjustment + League Adjustment +Replacement Runs) }
           {                      / (Runs Per Win)                         }


## Dynamic inputs

In [1]:
import pandas as pd
import numpy as np
import sqlite3

import config

In [2]:
con = sqlite3.connect(config.mlb_db_path)

In [3]:
# df = pd.read_sql_query("""SELECT * 
#                        FROM {} 
#                        WHERE (gameid like '%OAK%' OR vteam='OAK')
#                        """.format('bevent'),
#                        con=con)
df = pd.read_sql_query("""SELECT * 
                       FROM {}
                       WHERE gameid like '%2018%'
                       """.format('bevent'),
                       con=con)

In [4]:
play_defs = {
     0:'Unknown event',
     1:'No event',
     2:'Generic out',
     3:'Strikeout',
     4:'Stolen base',
     5:'Defensive indifference',
     6:'Caught stealing',
     7:'Pickoff error',
     8:'Pickoff',
     9:'Wild pitch',
     10:'Passed ball',
     11:'Balk',
     12:'Other advance',
     13:'Foul error',
     14:'Walk',
     15:'Intentional walk',
     16:'Hit by pitch',
     17:'Interference',
     18:'Error',
     19:'Fielders choice',
     20:'Single',
     21:'Double',
     22:'Triple',
     23:'Home run',
     24:'Missing play'}

## Batting Runs
___
    player’s:
        wOBA
        PA
        home park factor 
    League: 
        Average wOBA (lgwOBA)
        the wOBA Scale
        MLB R/PA (lgR/PA)
        Specific league (AL or NL) wRC and PA for non-pitchers
___
    >> wRAA = ((wOBA – lgwOBA)/wOBA Scale) * PA
    >>> Batting Runs = wRAA + (lgR/PA – (PF*lgR/PA))*PA + (lgR/PA – (AL or NL non-pitcher wRC/PA))*PA

In [5]:
class metric_calculator(object):
    def __init__(self, data):
        self.mts = stat_metrics(data)
        self.metric_fxns = {
            'BA':self.mts.batting_avg,
            'SLG':self.mts.slugging,
            'wOBA':self.mts.wOBA,
            'wRAA':self.mts.wRAA,
            'UZR':self.mts.UZR,
            'fWAR':self.mts.fWAR}
            
    def calculate(self, player_ids, metric):
        self.metric_exists(metric)
        metric_calc = self.metric_fxns[metric]
        value = metric_calc(player_ids)
        return value
        
    def metric_exists(self, metric):
        assert metric in self.metric_fxns.keys(), "Invalid metric, metric not defined"
        return None

In [140]:
class stat_metrics(object):
    def __init__(self, data):
        self.df = data
        self.fg_constants = {'wOBA':.315,
                        'wOBAScale':1.226,
                        'wBB':.690,
                        'wHBP':.720,
                        'w1B':.880,
                        'w2B':1.247,
                        'w3B':1.578,
                        'wHR':2.031}
        self.position_adj = {1:0,
            2:+12.5,
            3:-12.5,
            4:+2.5,
            6:+7.5,
            5:+2.5,
            7:-7.5,
            8:+2.5,
            9:-7.5,
            10:-17.5}
        
    def pre_process(self, df, process_dict):
#         event_specs =  pd.DataFrame(
#             index=np.arange(26)
#             ).join(df.groupby('eventtype').agg(process_dict)).fillna(0)
        event_specs = df.groupby('eventtype').agg(process_dict)
        event_specs = self.check_eventtype(event_specs)
        return event_specs
    
    def check_eventtype(self,df):
        for num in range(26):
            if num not in df.index:
                df.loc[num] = 0.0
            else:
                pass
        return df
    
    def player_df(self, player_ids, stat_type='batter'):
        if type(player_ids) == str:
            player_ids = [player_id]
        elif type(player_ids) == list:
            pass
        else:
            assert "Invalid Player ID/List"
        
        return self.df[self.df[stat_type].isin(player_ids)]
            
    #####################################
    ############ BATTING  ###############
    #####################################
    def wOBA(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        ## General Stats
        df_ = self.pre_process(df, {'gameid':'count'})['gameid']
        hbp = df_.loc[16]
        ibb = df_.loc[15]
        bb = df_.loc[14] + ibb
        b1 = df_.loc[20]
        b2 = df_.loc[21]
        b3 = df_.loc[22]
        hr = df_.loc[23]
        ab = len(df[df['abflag'] == 'T'])
        ibb = df_.loc[15]
        ## SF calculation
        sf = len(df[df['sfflag'] == 'T'])
        value = (
            (self.fg_constants['wBB']*(bb-ibb) + 
            self.fg_constants['wHBP']*hbp + 
            self.fg_constants['w1B']*b1 + 
            self.fg_constants['w2B']*b2 + 
            self.fg_constants['w3B']*b3 + 
            self.fg_constants['wHR']*hr)/
                (ab+bb-ibb+sf+hbp))
        return value
    
    def batting_avg(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        df_ = self.pre_process(df, {'gameid':'count'})['gameid']
        b1 = df_.loc[20]
        b2 = df_.loc[21]
        b3 = df_.loc[22]
        hr = df_.loc[23]
        ## AB flag count
        ab = len(df[df['abflag'] == 'T'])
        value = (b1+b2+b3+hr)/ab
        return value

    def slugging(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        df_ = self.pre_process(df, {'gameid':'count'})['gameid']
        b1 = df_.loc[20]
        b2 = df_.loc[21]
        b3 = df_.loc[22]
        hr = df_.loc[23]
        ## AB flag count
        ab = len(df[df['abflag'] == 'T'])
        value = (b1+(b2*2)+(b3*3)+(hr*4))/ab
        return value
    
    def wRAA(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        wOBA = self.wOBA(player_ids=player_ids)
        df_ = self.pre_process(df, {'gameid':'count'})['gameid']
        ab = len(df[df['abflag'] == 'T'])
        hbp = df_.loc[16]
        ibb = df_.loc[15]
        bb = df_.loc[14] + ibb
        sh = len(df[df['shflag'] == 'T'])
        ## SF calculation
        sf = len(df[df['sfflag'] == 'T'])
        value = ((wOBA-self.fg_constants['wOBA'])/self.fg_constants['wOBAScale'])*(ab+bb+hbp+sf+sh)
        return value
    
    def UZR(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        value = 0
        return value
    
    def position_determination(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        pos_group = df[['gameid','defensiveposition']].groupby('defensiveposition').count()
        position = pos_group.sort_values('gameid', ascending=False).index.tolist()[0]
        return position
    
    def fWAR(self, player_ids):
        df = self.player_df(player_ids=player_ids, stat_type='batter')
        wRAA = self.wRAA(player_ids)
        UZR = self.UZR(player_ids)
        pos = self.position_determination(player_ids)
        position = self.position_adj[pos]
        pa = len(df)
        value = wRAA + 0 + position + (20/600)*pa
        return value
        

In [141]:
calc = metric_calculator(data=df)

In [142]:
df['year'] = df['gameid'].apply(lambda x: int(x[3:7]))

In [143]:
team_id = 'ANA'

team = list(set(
    list(set(df[(df['year'] == 2018) & (df['gameid'].str.contains(team_id)) & (df['battingteam'] == 1)]['batter'])) + 
    list(set(df[(df['year'] == 2018) & (df['vteam']==team_id) & (df['battingteam'] == 0)]['batter']))
))

In [144]:
for player_id in team:
    print('{}:\t{}'.format(player_id, calc.calculate(player_ids=[player_id], metric='fWAR')))
#     except:
#         print('No Data')

penaf002:	-1.54159869494
pujoa001:	-18.8235750655
hermm001:	-9.81729200653
calhk001:	-21.7392872106
blasj001:	-13.0774877651
gratj001:	12.1859706362
wardt002:	-4.68433931485
mcgud001:	-0.0530179445351
valbl001:	-12.0998797974
troum001:	67.7051246475
bricj001:	10.9248885692
maldm001:	-0.686814702994
youne003:	-4.53099510604
hudsj002:	11.1517128874
younc004:	-12.364266355
barrj003:	-1.28466557912
johns002:	0.26101141925
cowak001:	-8.77406199021
arcif001:	9.20554649266
kinsi001:	-4.9339314845
ohtas001:	4.80377086209
despo001:	-0.566884176183
fontn001:	6.63621533442
uptoj001:	9.07189542484
heana001:	-1.33768352365
schir001:	3.4836867863
simma001:	12.8841802815
martj007:	-18.3246329527
river003:	11.2659053834
cozaz001:	-2.80424143556
fernj004:	-13.6150081566
fletd002:	-1.99498529664


In [97]:
df[df['batter'].isin(['penaf002'])][['gameid','defensiveposition']].groupby('defensiveposition').count()

Unnamed: 0_level_0,gameid
defensiveposition,Unnamed: 1_level_1
1,6


In [87]:
df.columns

Index([u'gameid', u'vteam', u'inning', u'battingteam', u'outs', u'balls',
       u'strikes', u'pitchsequence', u'vscore', u'hscore', u'batter',
       u'batterhand', u'resbatter', u'resbatterhand', u'pitcher',
       u'pitcherhand', u'respitcher', u'respitcherhand', u'catcher',
       u'firstbase', u'secondbase', u'thirdbase', u'shortstop', u'leftfield',
       u'centerfield', u'rightfield', u'firstrunner', u'secondrunner',
       u'thirdrunner', u'eventtext', u'leadoff', u'pinchhit',
       u'defensiveposition', u'lineupposition', u'eventtype',
       u'battereventflag', u'abflag', u'hitvalue', u'shflag', u'sfflag',
       u'outsonplay', u'doubleplayflag', u'tripleplayflag', u'rbionplay',
       u'wildpitchflag', u'passedballflag', u'fieldedby', u'battedballtype',
       u'buntflag', u'foulflag', u'hitlocation', u'numerrors', u'firsterror',
       u'firsterrortype', u'seconderror', u'seconderrortype', u'thirderror',
       u'thirderrortype', u'batterdest', u'firstdest', u'seconddest',