# Predicting Optimal Challenge Usage

# Part I Modeling Expected Payoff of Successful Challenge

In [85]:
import pandas as pd

In [86]:
win_exp_table=pd.read_csv('WinExp/BigTable-Table 1.csv')

In [87]:
win_exp_table.head()

Unnamed: 0.1,Unnamed: 0,Inning,HalfInning,BaseSit,Outs,BaseOuts,InnBaseOut,-15,-14,-13,...,11,12,13,14,15,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42
0,Top,1.0,11.0,1.0,0.0,10.0,1110.0,0.1%,0.2%,0.3%,...,0.0%,0.0%,0.0%,0.0%,0.0%,,,,,
1,Top,1.0,11.0,2.0,0.0,20.0,1120.0,0.1%,0.2%,0.3%,...,,,,,,,,,,
2,Top,1.0,11.0,3.0,0.0,30.0,1130.0,0.1%,0.1%,0.2%,...,,,,,,,,,,
3,Top,1.0,11.0,4.0,0.0,40.0,1140.0,0.1%,0.1%,0.2%,...,,,,,,,,,,
4,Top,1.0,11.0,5.0,0.0,50.0,1150.0,0.1%,0.1%,0.2%,...,,,,,,,,,,


In [88]:
#function takes inning, half, runner on each base, out, run difference and outputs win prob
#call it twice to see result of challenge
def win_exp(inning,half,first,second,third,outs,run_dif):
    sit=int(str(first)+str(second)+str(third),2)+1
    sit_code=str(inning)+str(half)+str(sit)+str(outs)
    return float(win_exp_table[win_exp_table.InnBaseOut==int(sit_code)][str(run_dif)].tolist()[0].strip('%'))/100

# Part II Modeling probability of success of challenge

In [89]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [90]:
#scrape all replays from 2014 season from mlb.com
mlb14=requests.get('https://baseballsavant.mlb.com/replay?challenge_type=&year=2014&challenger=teamchallenging&team=')

In [6]:
#same for 15
mlb15=requests.get('https://baseballsavant.mlb.com/replay?challenge_type=&year=2015&challenger=teamchallenging&team=')

In [91]:
#same for 16
mlb16=requests.get('https://baseballsavant.mlb.com/replay?challenge_type=&year=2016&challenger=teamchallenging&team=')

In [92]:
#format
data=[BeautifulSoup(season.text) for season in [mlb14,mlb15,mlb16]]

In [93]:
#initialize lists to put data into
num=[]
date=[]
teams=[]
challenger=[]
play_type=[]
overturned=[]
pitcher=[]
batter=[]
inning=[]
description=[]

In [94]:
#iterate through data to place on lists
for season in data:
    for event in season.table.findAll('tr')[1::2]:
        num.append(event.findAll('td')[0].text)
        date.append(event.findAll('td')[1].text)
        teams.append(event.findAll('td')[2].text)
        challenger.append(event.findAll('td')[3].text)
        play_type.append(event.findAll('td')[4].text)
        overturned.append(event.findAll('td')[5].text)
        pitcher.append(event.findAll('td')[6].text)
        batter.append(event.findAll('td')[7].text)
        inning.append(event.findAll('td')[8].text)

    for event in season.table.findAll('tr')[2::2]:
        description.append(event.text)       
        
        
    

In [95]:
#create column names
titles=['num',
'date',
'teams',
'challenger',
'play_type',
'overturned',
'pitcher',
'batter',
'inning',
'description']

In [96]:
#reformat and put into df
challenges=pd.DataFrame(np.matrix([num,
date,
teams,
challenger,
play_type,
overturned,
pitcher,
batter,
inning,description]).T,columns=titles)

In [97]:
#make inning into int
challenges['inning']=pd.to_numeric(challenges['inning'])

In [98]:
#read in text from retrosheet, copied and pasted from http://retrosheet.org/Replay2016.htm and saved locally
ump_stats_head=open('mlbreplay.txt').read().split('\n')[0] #the header
ump_stats=open('mlbreplay.txt').read().split('\n')[1:] #the rest of the data

In [99]:
#wasn't formatted nicely to read in, so figured out spacing 
#inds are simply the indexes that precede a new column
inds=[10,14,18,21,45,65,87,99] 

In [100]:
#copied over header, but inserted commas bf new column
ump_stats_head_fixed=''
for num,char in enumerate(ump_stats_head):
    if num in inds:
        ump_stats_head_fixed+=','
    else:
        ump_stats_head_fixed+=char

In [101]:
#split and stripped each column title
ump_stats_head_fixed=[word.rstrip() for word in ump_stats_head_fixed.split(',')]

In [102]:
#added new column to classify if umpire challenging or manager
ump_stats_head_fixed=ump_stats_head_fixed[:4]+['Init_type']+ump_stats_head_fixed[4:]

In [103]:
#same thing as before except transformed the data into a list of lists of vals
ump_stats_fixed=[]
for line in ump_stats:
    info=''
    for num,char in enumerate(line):
        if num in inds or num==25:
            info+=','
        else:
            info+=char
    ump_stats_fixed.append(map(str.rstrip,info.split(',')))


In [104]:
#read into df
ump_table=pd.DataFrame(ump_stats_fixed,columns=ump_stats_head_fixed)

In [105]:
ump_table.head()

Unnamed: 0,Date,Tm,Opp,In,Init_type,Initiator,Umpire,Type,Ruling,Time
0,03/31/2014,ATL,MIL,6,Mgr,Fredi Gonzalez,Greg Gibson,Force play,Overturned,1:05
1,03/31/2014,WAS,NYN,10,Mgr,Matt Williams,Clint Fagan,Force play,Confirmed,1:44
2,03/31/2014,CLE,OAK,6,Ump,Mike Winters,Mike Winters,HP collision,Confirmed,1:06
3,03/31/2014,CHN,PIT,5,Mgr,Rich Renteria,Bob Davidson,Force play,Confirmed,1:39
4,03/31/2014,PIT,CHN,10,Mgr,Clint Hurdle,Bob Davidson,Tag play,Overturned,1:46


In [106]:
#stripped out whitespace
for col in ump_table:
    try:
        ump_table[col].apply(lambda x: x.rstrip(),axis=1)
    except:
        pass

In [107]:
#created dictionary to map full name of team to abbreviation, to merge together both datasets
team_abbrev={
    'Giants':'SFN',
    'Angels':'ANA',
    'Astros':'HOU',
    'Athletics':'OAK',
    'Blue Jays':'TOR',
    'Braves':'ATL',
    'Brewers':'MIL',
    'Cardinals':'SLN',
    'Cubs':'CHN',
    'D-backs':'ARI',
    'Dodgers':'LAN',
    'Indians':'CLE',
    'Mariners':'SEA',
    'Marlins':'MIA',
    'Mets':'NYN',
    'Nationals':'WAS',
    'Orioles':'BAL',
    'Padres':'SDN',
    'Phillies':'PHI',
    'Pirates':'PIT',
    'Rangers':'TEX',
    'Rays':'TBA',
    'Red Sox':'BOS',
    'Reds':'CIN',
    'Rockies':'COL',
    'Royals':'KCA',
    'Tigers':'DET',
    'Twins':'MIN',
    'White Sox':'CHA',
    'Yankees':'NYA',
    'Umpire':'UMP',
    'NL':'NL'
    
}

In [108]:
#reverse the dicitonary to enable mapping back and forth
inv_teams = {team_abbr: team for team, team_abbr in team_abbrev.items()}

In [109]:
#make the changes to abbrev.
challenges.challenger=challenges.challenger.apply(lambda x: team_abbrev[x])

In [110]:
ump_table.head()

Unnamed: 0,Date,Tm,Opp,In,Init_type,Initiator,Umpire,Type,Ruling,Time
0,03/31/2014,ATL,MIL,6,Mgr,Fredi Gonzalez,Greg Gibson,Force play,Overturned,1:05
1,03/31/2014,WAS,NYN,10,Mgr,Matt Williams,Clint Fagan,Force play,Confirmed,1:44
2,03/31/2014,CLE,OAK,6,Ump,Mike Winters,Mike Winters,HP collision,Confirmed,1:06
3,03/31/2014,CHN,PIT,5,Mgr,Rich Renteria,Bob Davidson,Force play,Confirmed,1:39
4,03/31/2014,PIT,CHN,10,Mgr,Clint Hurdle,Bob Davidson,Tag play,Overturned,1:46


In [111]:
#create new column in umpire dataset to impute who challenged call
ump_table['challenger2']=np.where(ump_table['Init_type']=='Ump','UMP',ump_table['Tm'])

In [112]:
#map the play category from one dataset to another
play_dict={
    'catch or drop':'Catch/no catch',
 'fair or foul in outfield':'Fair/foul (outfield)',
 'fan interference':'Fan interference',
 'force play':'Force play',
 'grounds rule':'Grounds rule',
 'hit by pitch':'Hit by pitch',
 'home run':'Home run',
 'home-plate collision':'HP collision',
 'other':'Other',
 'passing runners':'Passing runners',
 'play at 1st':'Force play',
 'record keeping':'Rcd keeping',
 'rules check':'Rules check',
 'slide interference':'Slide rule',
 'stadium boundary call':'Boundary call',
 'tag play':'Tag play',
 'tag-up play':'Tag-up',
 'timing play':'Timing play',
 'touching a base':'Touching a base',
 'trap play':'Trap play (outfield)'
}

In [113]:
challenges.play_type=challenges.play_type.apply(lambda x: play_dict[x])

In [114]:
#change innings to num
ump_table.In=pd.to_numeric(ump_table.In)


In [115]:
#change date to datetime
challenges.date=pd.to_datetime(challenges.date)

In [116]:
ump_table.Date=pd.to_datetime(ump_table.Date)

In [117]:
#eliminates duplicate rows, which were found in mlb.com dataset
challenges=challenges.drop_duplicates(['date','description'])

In [118]:
#modify mlb.com df to drop out the more recent challenges that are not yet recorded by retrosheet
short_challenges=challenges[challenges.date<=sorted(ump_table.Date)[-1]]

In [119]:
#merge both datasets
inner_merge=ump_table.merge(short_challenges,how='inner', left_on=['challenger2','In','Date'], right_on=['challenger','inning','date']).sort_values('Date')

In [120]:
#make day of week variable (based on SABR showing it's predictive)
inner_merge['day']=inner_merge.date.dt.dayofweek

In [121]:
#standardize the play call labels
inner_merge.Ruling=inner_merge.Ruling.apply(lambda x: x.rstrip())

In [122]:
#get rid of retrosheet's 'Rcd keeping' box and default to mlb's ruling (assuming 0=confirmed)
inner_merge['ruling_res']=np.where(inner_merge.Ruling=='Rcd keeping', inner_merge.overturned,inner_merge.Ruling)

In [123]:
#delete the All-Star Game entry
inner_merge = inner_merge[inner_merge.challenger2 != 'NL']

In [124]:
#convert ruling to binary number
inner_merge.ruling_res=inner_merge.ruling_res.apply(lambda x: 'Confirmed' if x==0 else 'Overturned' if x==1 else x)

In [125]:
#inputs a feature in the inner_merge dataframe and calculates the 3 part breakdown of accuracy given the feature, 
#returns a dict with the unique features as keys 
def accuracy(feature):
    accuracy_dict={}
    unique_vars=feature.unique()
    for case in unique_vars:
        total=len(inner_merge[feature==case])
        overturned=len(inner_merge[(feature==case)&(inner_merge.ruling_res=='Overturned')])
        stands=len(inner_merge[(feature==case)&(inner_merge.ruling_res=='Stands')])
        confirmed=len(inner_merge[(feature==case)&(inner_merge.ruling_res=='Confirmed')])
        accuracy_dict[case]=[float(overturned)/total,float(stands)/total,float(confirmed)/total,total]
        
    return accuracy_dict


In [126]:
#create dictionaries that break down the distribution of overturned/stands/confirmed for each unique subcategory
challenger_dict=accuracy(inner_merge.challenger2)
umpire_dict=accuracy(inner_merge.Umpire)
play_dict=accuracy(inner_merge.play_type)
day_dict=accuracy(inner_merge.day)
inning_dict=accuracy(inner_merge.inning)

In [127]:
#create new vals for accuracy given each situation
inner_merge['team_overturned']=inner_merge.challenger2.apply(lambda x: challenger_dict[x][0])
inner_merge['team_stands']=inner_merge.challenger2.apply(lambda x: challenger_dict[x][1])
inner_merge['team_confirmed']=inner_merge.challenger2.apply(lambda x: challenger_dict[x][2])
inner_merge['ump_ov']=inner_merge.Umpire.apply(lambda x: umpire_dict[x][0])
inner_merge['ump_st']=inner_merge.Umpire.apply(lambda x: umpire_dict[x][1])
inner_merge['ump_con']=inner_merge.Umpire.apply(lambda x: umpire_dict[x][2])
inner_merge['type_ov']=inner_merge.play_type.apply(lambda x: play_dict[x][0])
inner_merge['type_st']=inner_merge.play_type.apply(lambda x: play_dict[x][1])
inner_merge['type_con']=inner_merge.play_type.apply(lambda x: play_dict[x][2])
inner_merge['day_ov']=inner_merge.day.apply(lambda x: day_dict[x][0])
inner_merge['day_st']=inner_merge.day.apply(lambda x: day_dict[x][1])
inner_merge['day_con']=inner_merge.day.apply(lambda x: day_dict[x][2])
inner_merge['inn_ov']=inner_merge.inning.apply(lambda x: inning_dict[x][0])
inner_merge['inn_st']=inner_merge.inning.apply(lambda x: inning_dict[x][1])
inner_merge['inn_con']=inner_merge.inning.apply(lambda x: inning_dict[x][2])

In [128]:
inner_merge['labels']=inner_merge.overturned.apply(lambda x: 1 if x=='Yes' else 0)

In [129]:
#used for modeling that includes team challenging
X=inner_merge.iloc[:,-16:-1]

In [131]:
#used when exclude team challenging
X2=inner_merge.iloc[:,-13:-1]

In [132]:
y=inner_merge['labels']

In [138]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,log_loss
from sklearn.cross_validation import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from  sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV

Using Logistic Regression and/or SVM to optimize for log-loss, trained on first set

In [139]:
log_reg=LogisticRegression()

In [137]:
cross_val_score(log_reg,X,y,scoring='log_loss').mean()

-0.61478277655527236

In [140]:
svm=SVC(probability=True)

In [141]:
param_grid = [{'kernel': ['rbf'], 'gamma': [1,1e-1,1e-2,1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},{'kernel': ['linear'], 'C': [1, 10, 100, 1000]},{'kernel':['poly'],'degree':[1,2,3]}]

In [142]:
grid = GridSearchCV(svm, param_grid, scoring='log_loss')
grid.fit(X, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid=[{'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}, {'kernel': ['poly'], 'degree': [1, 2, 3]}],
       pre_dispatch='2*n_jobs', refit=True, scoring='log_loss', verbose=0)

In [143]:
print grid.best_score_
print grid.best_params_

-0.612855525027
{'kernel': 'linear', 'C': 1000}


In [144]:
svm=SVC(kernel='linear',C=1000,probability=True)

In [145]:
#SVM is better
cross_val_score(svm,X,y,scoring='log_loss').mean()

-0.61291663469162849

In [200]:
svm.fit(X,y)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Now, same for making model trained for dataset excluding team

In [146]:
log_reg2=LogisticRegression()

In [148]:
cross_val_score(log_reg2,X2,y,scoring='log_loss').mean()

-0.62057711181744113

In [152]:
log_reg2.fit(X2,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [149]:
svm2=SVC(probability=True)

In [150]:
grid = GridSearchCV(svm2, param_grid, scoring='log_loss')
grid.fit(X2, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid=[{'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}, {'kernel': ['poly'], 'degree': [1, 2, 3]}],
       pre_dispatch='2*n_jobs', refit=True, scoring='log_loss', verbose=0)

In [151]:
print grid.best_score_
print grid.best_params_

-0.620760065367
{'kernel': 'rbf', 'C': 100, 'gamma': 0.01}


Logistic Regression Performs better

Now, the model will output expected probability. Now we have to convert that value taking into consideration that we are modeling on all "challengeable" plays. To model the total number of reasonably "challengeable" plays find the max for any team. To model the total "successes" find the max of successes for any team. (Maybe come back to it, but find the umpire challenges also).

In [153]:
team_games={'ANA': 3,
 'ARI': 0,
 'ATL': 0,
 'BAL': 7,
 'BOS': 0,
 'CHA': 0,
 'CHN': 9,
 'CIN': 0,
 'CLE': 0,
 'COL': 0,
 'DET': 3,
 'HOU': 6,
 'KCA': 31,
 'LAN': 9,
 'MIA': 0,
 'MIL': 0,
 'MIN': 0,
 'NYA': 1,
 'NYN': 14,
 'OAK': 1,
 'PHI': 0,
 'PIT': 2,
 'SDN': 0,
 'SEA': 0,
 'SFN': 16,
 'SLN': 13,
 'TBA': 0,
 'TEX': 5,
 'TOR': 11,
 'UMP': 0,
 'WAS': 4}

In [154]:
#calculating total umpire games
162*2*30/2+27*3*30/2+25*30/2+67

6517

In [155]:
#calculate games played per team, 162 for 2 + 106 from 2016 (through 7/31) plus playoffs, ump=6515 from previous summing
inner_merge['games_played']=inner_merge.challenger2.apply(lambda x: 162*2+106+team_games[x] if x!='UMP' else 6517)

In [156]:
#total challenges for a team
inner_merge['tot_challenges']=inner_merge.challenger2.apply(lambda x: len(inner_merge[inner_merge.challenger2==x]))

In [157]:
#divided by total games played
inner_merge['challenge_p_g']=inner_merge.tot_challenges/inner_merge.games_played

In [158]:
#total succesful challenges per team per game
inner_merge['tot_suc']=inner_merge.challenger2.apply(lambda x: len(inner_merge[(inner_merge.challenger2==x)&(inner_merge.overturned=='Yes')]))

In [159]:
#total successful challenges per team per game
inner_merge['tot_suc_p_g']=inner_merge.tot_suc/inner_merge.games_played

In [160]:
#list of challenges only iniatable by umpire
ump_plays=[x for x in inner_merge.play_type.unique() if x not in inner_merge[inner_merge.challenger2!='UMP'].play_type.unique()]

In [162]:
#total umpire initiated challenges
tot_ump_c=len(inner_merge[(inner_merge.play_type.isin(ump_plays)==False)&(inner_merge.challenger2=='UMP')])/2

In [163]:
#total umpire-initiated where overturned
tot_ump_s=len(inner_merge[(inner_merge.play_type.isin(ump_plays)==False)&(inner_merge.challenger2=='UMP')&(inner_merge.overturned=='Yes')])/2

In [164]:
#average umpire issued challenges per game
ucpg=float(tot_ump_c)/6517

In [165]:
#average umpire overturned per game
uspg=float(tot_ump_s)/6517

In [175]:
#average team challenge per game
avg_c_p_g=inner_merge[inner_merge.challenger2!='UMP'].groupby('challenger2').challenge_p_g.mean().mean()

In [176]:
#average team success per game
avg_s_p_g=inner_merge[inner_merge.challenger2!='UMP'].groupby('challenger2').tot_suc_p_g.mean().mean()

In [181]:
#approximating "close calls" set by finding most aggressive and most succesfull counts
max_c_p_g=inner_merge.challenge_p_g.unique().max()+ucpg

In [184]:
max_s_p_g=inner_merge.tot_suc_p_g.unique().max()+uspg

In [189]:
#finally, we use conv_f to factor in how much to lower the output of the probability model
conv_f=(max_s_p_g/max_c_p_g)/(avg_s_p_g/avg_c_p_g)

In [190]:
conv_f

0.93218625109714881

# Part III Calculate the "opportunity cost" of a lost challenge

In [191]:
import mlbgame

In [192]:
#winner function inputs a team and date and leverages mlbgame package to output if the team won that game
def winner(challenger,date):
    winner=mlbgame.day(date.year,date.month,date.day,home=inv_teams[challenger],away=inv_teams[challenger])
    if len(winner)<1:
        return 'Wrong Input'
    elif winner[0].w_team==inv_teams[challenger]:
        return 1
    else:
        return 0

In [193]:
inner_merge['won_game']=inner_merge.apply(lambda row: winner(row['Tm'], row['Date']), axis=1)

In [194]:
#finding winning percentage, given lost challenge
winning_pct_lost=inner_merge[(inner_merge.challenger2!='UMP')&(inner_merge.overturned=='No')].won_game.mean()

In [195]:
#finding winning percentage, given won challenge
winning_pct_won=inner_merge[(inner_merge.challenger2!='UMP')&(inner_merge.overturned=='Yes')].won_game.mean()

In [196]:
ocpo=(.5-inner_merge[(inner_merge.challenger2!='UMP')&(inner_merge.overturned=='No')&(inner_merge.inning==1)].won_game.mean())/49

In [197]:
#opportunity cost per out
ocpo

0.00029720626114523467

# Testing Model

Inputs needed:
1. Team
2. day
3. umpire
4. play type
5. before challenge
 - Inning
 - Top/Bottom
 - Outs
 - Runners on What base
 - score differential
6. after challenge:
 - Inning
 - Top/Bottom
 - Outs
 - Runners on What base
 - score differential
7. team vs. general variable

In [203]:
#logistic regression with gridsearch
def exp_val(team,day,umpire,play_type,inning1,inning2,half1,half2,outs1,outs2,fb1,fb2,sb1,sb2,tb1,tb2,dif1,dif2,weights=.5):

    #finds the potential gain of an overturned call in WPA
    gain=abs(win_exp(inning2,half2,fb2,sb2,tb2,outs2,dif2)-win_exp(inning1,half1,fb1,sb1,tb1,outs1,dif1))
    
    #transforms the input data into a vector to feed into the model
    vec=[challenger_dict[team][0],
    challenger_dict[team][1], 
    challenger_dict[team][2],
    umpire_dict[umpire][0],
    umpire_dict[umpire][1],
    umpire_dict[umpire][2],
    play_dict[play_type][0],
    play_dict[play_type][1],
    play_dict[play_type][2],
    day_dict[day][0],
    day_dict[day][1],
    day_dict[day][2],
    inning_dict[inning1][0],
    inning_dict[inning1][1],
    inning_dict[inning1][2]]
    
    #generates the probability of success, using team tendency
    prob=svm.predict_proba(vec)[0][1]
    
    #generates the probability of success based on general model
    prob2=(log_reg2.predict_proba(vec[3:])[0][1])*conv_f
    
    #attaches weights to each of the two probabilities
    weighted_prob=weights*prob+(1-weights)*prob2
    
    #multiplies all together to get the expected upside of overturned call
    upside=gain*weighted_prob
    
    #find out how many outs remaining to project downside of call
    if half1==1:
        remaining=(9-inning1)*6+4+3+3-outs1
    else:
        remaining=(9-inning1)*6+4+3-outs1
    
    #downside is number of outs remaining times cost per out
    downside=(1-weighted_prob)*remaining*ocpo
    
    print 'Expected upside:', upside
    print 'Expected downside: ', downside
    print 'Net Gain: ', upside-downside
    return 

In [204]:
#predict a Mets overturned call, on Tuesday, Joe West umpire, force play at home, 1st inning none on no outs
exp_val('NYN',3,'Joe West','Force play',1,1,1,1,1,0,0,1,0,0,0,0,0,0,0)

Expected upside: 0.0719264455751
Expected downside:  0.00544558303519
Net Gain:  0.0664808625399


