In [1]:
import pandas as pd

In [2]:
target = pd.read_csv("YM2014.csv")
league = pd.read_csv("MLB2014.csv")
injury_date = (7.10,)
recovered_date = (8.27,)
strong_start = (0,)
strong_end = (0,)
min_to_be_Strong = 87
min_to_be_Average = 78

In [3]:
target = target[['Date', 'Tm', 'Opp', 'W/L']]
league = league[['Tm', 'W']]
league =league.rename(columns={'Tm' : 'Opp'})
df = pd.merge(target, league, on = 'Opp')

In [4]:
#helper function to modify the date data for later analysis
def modi_date(date):
    date_dict = {
        'Mar': '3',
        'Apr': '4',
        'May': '5',
        'Jun': '6',
        'Jul': '7',
        'Aug': '8',
        'Sep': '9',
        'Oct': '10'
    }
    date_split = date.split(" ")[1:]
    d = date_dict[date_split[0]]
    return float('{}.{:0>2}'.format(date_dict[date_split[0]], date_split[1]))

In [5]:
df['Date'] = df['Date'].apply(modi_date)
df = df.sort_values(by = ['Date'])

In [6]:
def get_winning_games_number(team_name: str):
    return league[league['Opp'] == team_name]['W'].values[0]

In [7]:
#helper function to classify an opponent's status: weak, average, or strong
def get_oppo_status(team_name: str):
    w_n = get_winning_games_number(team_name)
    if w_n >= min_to_be_Strong:
        return "Strong"
    elif min_to_be_Average <= w_n < min_to_be_Strong:
        return 'Average'
    else:
        return "Weak"

In [8]:
#helper function to classify an athelet's status: available or unavaliable
#injury_date and recovered_date are initialized above
#return True if the athelet is avaliable at that date, otherwise False
def get_avaliablity(date):
    for inj, rec in zip(injury_date, recovered_date):
        if inj <= date <= rec:
            return False
    return True

In [9]:
#helper function to classify a team's status: available or unavaliable
def is_Strong(date):
    for start, end in zip(strong_start, strong_end):
        if start <= date < end:
            return True
    return False

In [10]:
frame = pd.DataFrame({
                      'Date': df['Date'], 
                      "Aval": df['Date'].apply(get_avaliablity), 
                      "Strong": df['Date'].apply(is_Strong),
                      "Opp-Status": df['Opp'].apply(get_oppo_status),
                      'Opp': df['Opp'],
                      'W/L': df['W/L'], 
                      'Opp-W': df['W']
                      })

In [11]:
frame.to_csv('Yadier_Molina_2015.csv')

In [12]:
#helper funtion to calculate the number of the games in which the target team and its
#opponent are classifies as a particular status.
def count_category(data, strong=False, opp_status=None):
    strong_series = data['Strong'] if strong else data['Strong'] == False
    c1 = len(data[strong_series & (frame['Opp-Status'].str.match(opp_status))])
    a1 = len(data)
    print('numerator :    ', c1)
    print('denumerator : ', a1)
    print('output: ', c1/a1)
    return c1/a1

In [13]:
def count_win(data, aval=False, strong=False, opp_status=None):
    aval_series = data['Aval'] if aval else data['Aval'] == False
    strong_series = data['Strong'] if strong else data['Strong'] == False
    win = len(data[(data['W/L'].str.contains("W")) & aval_series & strong_series & (frame['Opp-Status'].str.match(opp_status))])
    number_of_all_games =  len(data[aval_series & strong_series & (frame['Opp-Status'].str.match(opp_status))])
    print('numerator :    ', win)
    print('denumerator : ', number_of_all_games)
    if number_of_all_games == 0:
        print('output: ', 0)
        return 0
    else: 
        print('output: ', win/number_of_all_games)
        return win/number_of_all_games

In [18]:
def WAR(data = frame):
    
    print('P_Aval_Weak_weakOpp :')
    aval_weak_weak = count_win(data, aval = True, strong = False, opp_status="Weak") 
    print('P_Unaval_Weak_weakOpp :')
    unaval_weal_weak = count_win(data, aval = False, strong = False, opp_status="Weak")
    print('P X = Weak_weakOpp :')
    weak_weak = count_category(data, strong = False, opp_status="Weak")
    
    print('P_Aval_Weak_averageOpp :')
    aval_weak_average = count_win(data, aval = True, strong = False, opp_status="Average")
    print('P_Unaval_Weak_averageOpp :')
    unaval_weak_average = count_win(data, aval = False, strong = False, opp_status="Average")
    print('P X = Weak_averageOpp :')
    weak_average = count_category(data, strong = False, opp_status="Average")
    
    print('P_Aval_Weak_strongOpp :')
    aval_weak_strong = count_win(data, aval = True, strong = False, opp_status="Strong")
    print('P_Unaval_Weak_strongOpp :')
    unaval_weak_strong = count_win(data, aval = False, strong = False, opp_status="Strong")
    print('P X = Weak_strongOpp :')
    weak_strong = count_category(data, strong = False, opp_status="Strong")
    
    print('P_Aval_Strong_weakOpp :')
    aval_strong_weak = count_win(data, aval = True, strong = True, opp_status="Weak") 
    print('P_Unaval_Strong_weakOpp :')
    unaval_strong_weak = count_win(data, aval = False, strong = True, opp_status="Weak")
    print('P X = Strong_weakOpp :')
    strong_weak = count_category(data, strong = True, opp_status="Weak")
    
    print('P_Aval_Strong_averageOpp :')
    aval_strong_average = count_win(data, aval = True, strong = True, opp_status="Average")
    print('P_Unaval_Strong_averageOpp :')
    unaval_strong_average = count_win(data, aval = False, strong = True, opp_status="Average")
    print('P X = Strong_averageOpp :')
    strong_average = count_category(data, strong = True, opp_status="Average")
    
    print('P_Aval_Strong_strongOpp :')
    aval_strong_strong = count_win(data, aval = True, strong = True, opp_status="Strong")
    print('P_Unaval_Strong_strongOpp :')
    unaval_strong_strong = count_win(data, aval = False, strong = True, opp_status="Strong")
    print('P X = Strong_strongOpp :')
    strong_strong = count_category(data, strong = True, opp_status="Strong")
    
    war = ((aval_weak_weak - unaval_weal_weak)* weak_weak
           + (aval_weak_average - unaval_weak_average)* weak_average
           + (aval_weak_strong - unaval_weak_strong)* weak_strong 
           + (aval_strong_weak - unaval_strong_weak)* strong_weak
           + (aval_strong_average - unaval_strong_average)* strong_average 
           + (aval_strong_strong - unaval_strong_strong)* strong_strong) * 110
    return war
           


In [19]:
WAR()

P_Aval_Weak_weakOpp :
numerator :     20
denumerator :  33
output:  0.6060606060606061
P_Unaval_Weak_weakOpp :
numerator :     2
denumerator :  7
output:  0.2857142857142857
P X = Weak_weakOpp :
numerator :     40
denumerator :  162
output:  0.24691358024691357
P_Aval_Weak_averageOpp :
numerator :     13
denumerator :  25
output:  0.52
P_Unaval_Weak_averageOpp :
numerator :     2
denumerator :  3
output:  0.6666666666666666
P X = Weak_averageOpp :
numerator :     28
denumerator :  162
output:  0.1728395061728395
P_Aval_Weak_strongOpp :
numerator :     17
denumerator :  34
output:  0.5
P_Unaval_Weak_strongOpp :
numerator :     2
denumerator :  4
output:  0.5
P X = Weak_strongOpp :
numerator :     38
denumerator :  162
output:  0.2345679012345679
P_Aval_Strong_weakOpp :
numerator :     11
denumerator :  20
output:  0.55
P_Unaval_Strong_weakOpp :
numerator :     11
denumerator :  17
output:  0.6470588235294118
P X = Strong_weakOpp :
numerator :     37
denumerator :  162
output:  0.2283950

8.515258136924807