In [1]:
import pandas as pd
import numpy as np

In [2]:
target = pd.read_csv("YM2014.csv")
league = pd.read_csv("MLB2014.csv")
injury_date = (7.10,)
recovered_date = (8.27,)
strong_start = (7.31,)
strong_end = (12.31,)
min_to_be_Strong = 162
min_to_be_Average = 162

In [3]:
target = target[['Date', 'Tm', 'Opp', 'W/L']]
league = league[['Tm', 'W']]
league =league.rename(columns={'Tm' : 'Opp'})
df = pd.merge(target, league, on = 'Opp')

In [4]:
#helper function to modify the date data for later analysis
def modi_date(date):
    date_dict = {
        'Mar': '3',
        'Apr': '4',
        'May': '5',
        'Jun': '6',
        'Jul': '7',
        'Aug': '8',
        'Sep': '9',
        'Oct': '10'
    }
    date_split = date.split(" ")[1:]
    d = date_dict[date_split[0]]
    return float('{}.{:0>2}'.format(date_dict[date_split[0]], date_split[1]))

In [5]:
df['Date'] = df['Date'].apply(modi_date)
df = df.sort_values(by = ['Date'])

In [6]:
def get_winning_games_number(team_name: str):
    return league[league['Opp'] == team_name]['W'].values[0]

In [7]:
#helper function to get the classify an opponent's status: weak, average, or strong
def get_oppo_status(team_name: str):
    w_n = get_winning_games_number(team_name)
    if w_n >= min_to_be_Strong:
        return "Strong"
    elif min_to_be_Average <= w_n < min_to_be_Strong:
        return 'Average'
    else:
        return "Weak"

In [8]:
def get_avaliablity(date):
    for inj, rec in zip(injury_date, recovered_date):
        if inj <= date <= rec:
            return False
    return True

In [9]:
def is_Strong(date):
    for start, end in zip(strong_start, strong_end):
        if start <= date < end:
            return True
    return False

In [10]:
frame = pd.DataFrame({
                      'Date': df['Date'], 
                      "Aval": df['Date'].apply(get_avaliablity), 
                      "Strong": df['Date'].apply(is_Strong),
                      "Opp-Status": df['Opp'].apply(get_oppo_status),
                      'Opp': df['Opp'],
                      'W/L': df['W/L'], 
                      'Opp-W': df['W']
                      })

In [11]:
#helper funtion to calculate the number of the games in which the target team and its
#opponent are classifies as a particular status.
def count_category(data, strong=False, opp_status=None):
    strong_series = data['Strong'] if strong else data['Strong'] == False
    c1 = len(data[strong_series & (frame['Opp-Status'].str.match(opp_status))])
    a1 = len(data)
    #print('numerator :    ', c1)
    #print('denumerator : ', a1)
    #print('output: ', c1/a1)
    return c1/a1

In [12]:
def count_win(data, aval=False, strong=False, opp_status=None):
    aval_series = data['Aval'] if aval else data['Aval'] == False
    strong_series = data['Strong'] if strong else data['Strong'] == False
    win = len(data[(data['W/L'].str.contains("W")) & aval_series & strong_series & (frame['Opp-Status'].str.match(opp_status))])
    number_of_all_games =  len(data[aval_series & strong_series & (frame['Opp-Status'].str.match(opp_status))])
    #print('numerator :    ', win)
    #print('denumerator : ', number_of_all_games)
    if number_of_all_games == 0:
        #print('output: ', 0)
        return 0
    else: 
        #print('output: ', win/number_of_all_games)
        return win/number_of_all_games

In [14]:
def get_WAR(data = frame):
    
    #print('P_Aval_Weak_weakOpp :')
    aval_weak_weak = count_win(data, aval = True, strong = False, opp_status="Weak") 
    #print('P_Unaval_Weak_weakOpp :')
    unaval_weal_weak = count_win(data, aval = False, strong = False, opp_status="Weak")
    #print('P X = Weak_weakOpp :')
    weak_weak = count_category(data, strong = False, opp_status="Weak")
    
    #print('P_Aval_Weak_averageOpp :')
    aval_weak_average = count_win(data, aval = True, strong = False, opp_status="Average")
    #print('P_Unaval_Weak_averageOpp :')
    unaval_weak_average = count_win(data, aval = False, strong = False, opp_status="Average")
    #print('P X = Weak_averageOpp :')
    weak_average = count_category(data, strong = False, opp_status="Average")
    
    #print('P_Aval_Weak_strongOpp :')
    aval_weak_strong = count_win(data, aval = True, strong = False, opp_status="Strong")
    #print('P_Unaval_Weak_strongOpp :')
    unaval_weak_strong = count_win(data, aval = False, strong = False, opp_status="Strong")
    #print('P X = Weak_strongOpp :')
    weak_strong = count_category(data, strong = False, opp_status="Strong")
    
    #print('P_Aval_Strong_weakOpp :')
    aval_strong_weak = count_win(data, aval = True, strong = True, opp_status="Weak") 
    #print('P_Unaval_Strong_weakOpp :')
    unaval_strong_weak = count_win(data, aval = False, strong = True, opp_status="Weak")
    #print('P X = Strong_weakOpp :')
    strong_weak = count_category(data, strong = True, opp_status="Weak")
    
    #print('P_Aval_Strong_averageOpp :')
    aval_strong_average = count_win(data, aval = True, strong = True, opp_status="Average")
    #print('P_Unaval_Strong_averageOpp :')
    unaval_strong_average = count_win(data, aval = False, strong = True, opp_status="Average")
    #print('P X = Strong_averageOpp :')
    strong_average = count_category(data, strong = True, opp_status="Average")
    
    #print('P_Aval_Strong_strongOpp :')
    aval_strong_strong = count_win(data, aval = True, strong = True, opp_status="Strong")
    #print('P_Unaval_Strong_strongOpp :')
    unaval_strong_strong = count_win(data, aval = False, strong = True, opp_status="Strong")
    #print('P X = Strong_strongOpp :')
    strong_strong = count_category(data, strong = True, opp_status="Strong")
    
    war = ((aval_weak_weak - unaval_weal_weak)* weak_weak
           + (aval_weak_average - unaval_weak_average)* weak_average
           + (aval_weak_strong - unaval_weak_strong)* weak_strong 
           + (aval_strong_weak - unaval_strong_weak)* strong_weak
           + (aval_strong_average - unaval_strong_average)* strong_average 
           + (aval_strong_strong - unaval_strong_strong)* strong_strong) * 110
    return war

In [15]:
def WAR_change_oppSAW():
    output = []
    new_df = frame
    for i in range(80,95):
        for j in range(70, i):
            
            #helper function to change the data
            def new_opp_status(team_name: str):
                w_n = get_winning_games_number(team_name)
                if w_n >= i:
                    return "Strong"
                elif j <= w_n < i:
                    return 'Average'
                else:
                    return "Weak"
                
            new_df['Opp-Status'] = new_df['Opp'].apply(new_opp_status)
            twar = round(get_WAR(new_df), 4)
            temp = [i, j, twar]
            output.append(temp)
    return output

In [16]:
x = WAR_change_oppSAW()

In [17]:
x

[[80, 70, 12.642],
 [80, 71, 12.642],
 [80, 72, 10.7904],
 [80, 73, 10.7904],
 [80, 74, 11.0243],
 [80, 75, 11.0243],
 [80, 76, 11.0243],
 [80, 77, 3.2921],
 [80, 78, 13.032],
 [80, 79, 13.032],
 [81, 70, 12.642],
 [81, 71, 12.642],
 [81, 72, 10.7904],
 [81, 73, 10.7904],
 [81, 74, 11.0243],
 [81, 75, 11.0243],
 [81, 76, 11.0243],
 [81, 77, 3.2921],
 [81, 78, 13.032],
 [81, 79, 13.032],
 [81, 80, 10.4191],
 [82, 70, 12.642],
 [82, 71, 12.642],
 [82, 72, 10.7904],
 [82, 73, 10.7904],
 [82, 74, 11.0243],
 [82, 75, 11.0243],
 [82, 76, 11.0243],
 [82, 77, 3.2921],
 [82, 78, 13.032],
 [82, 79, 13.032],
 [82, 80, 10.4191],
 [82, 81, 10.4191],
 [83, 70, 12.8068],
 [83, 71, 12.8068],
 [83, 72, 10.7632],
 [83, 73, 10.7632],
 [83, 74, 9.3772],
 [83, 75, 9.3772],
 [83, 76, 9.3772],
 [83, 77, 6.7385],
 [83, 78, 8.5631],
 [83, 79, 8.5631],
 [83, 80, 11.7814],
 [83, 81, 11.7814],
 [83, 82, 11.7814],
 [84, 70, 13.025],
 [84, 71, 13.025],
 [84, 72, 10.9814],
 [84, 73, 10.9814],
 [84, 74, 9.825],
 [84,

In [18]:
factor1 = []
factor2 = []
result = []
for row in x:
    factor1.append(row[0])
    factor2.append(row[1])
    result.append(row[2])
df = pd.DataFrame({
    'min_to_be_Strong': factor1,
    'min_to_be_Average': factor2,
    'war': result
    })
df.to_excel('YM2014.xlsx', sheet_name='with_Weak_Strong')