# Marcel Model Demonstration

### The league average function

In [1]:
def leagueAverage(df, perGame):
    dictionary = {}
    if perGame == 1:
        df = dc.perGameConversionAll(df, 1)
    for column in df:
        if type(df[column].values[0]) != str:
            dictionary[column] = df[column].mean()
    return dictionary

### The Marcel Model

In [17]:
def marcelModel(stat, playerId, firstDf, secondDf, thirdDf, firstAvg, secondAvg, thirdAvg):

    # gather data for requested player in each season
    try:
        firstData = firstDf.loc[playerId]
        firstStat = firstData.loc[stat]
        firstIP = firstData.loc['IP']
    except KeyError:
        firstStat = firstAvg[stat]
        firstIP = firstAvg['IP']
    try:
        secondData = secondDf.loc[playerId]
        secondStat = secondData.loc[stat]
        secondIP = secondData.loc['IP']
    except KeyError:
        secondStat = secondAvg[stat]
        secondIP = secondAvg['IP']
    try:
        thirdData = thirdDf.loc[playerId]
        thirdStat = thirdData.loc[stat]
        thirdIP = thirdData.loc['IP']
    except KeyError:
        thirdStat = thirdAvg[stat]
        thirdIP = thirdAvg['IP']

    # calculate stat value used in calculations
    projStatValue = (firstStat*3)+(secondStat*2)+(thirdStat*1)

    # starting pitcher innings pitched value used in calculations
    projIP = (firstIP * 0.5) + (secondIP * 0.1) + 60

    # finding expected stat for each year incorporating total stat to total IP ratio
    expFirst = firstDf[stat].sum() / firstDf['IP'].sum() * firstIP * 3
    expSecond = secondDf[stat].sum() / secondDf['IP'].sum() * secondIP * 2
    expThird = thirdDf[stat].sum() / thirdDf['IP'].sum() * thirdIP * 1

    # sum of the three seasons expected stat
    expStatTotal = expFirst + expSecond + expThird
    expIPTotal = (firstIP * 3) + (secondIP * 2) + (thirdIP * 1)

    # calculate stat per IP ratio
    regressedStatTotal = expStatTotal / expIPTotal * 900

    statPerIP = (regressedStatTotal + projStatValue) / (900 + expIPTotal) / 2

    # getting the predicted stat output using the stat per IP ratio and the predicted IP calculated
    prediction = statPerIP * projIP
    return prediction

### Find Toronto's Pitching Roster

In [3]:
import modules.data_processing.data_query as dq
import modules.data_processing.data_cleaning as dc
import modules.get_rosters as rosters
import pandas as pd
import numpy as np

dateRange = ["2015-04-01", "2017-12-01"]
teamName = "Toronto Blue Jays"

roster = rosters.createTeamRosters(teamName)
roster

Unnamed: 0,0
0,Jacob Waguespack
1,Marcus Stroman
2,Thomas Pannone
3,Elvis Luciano
4,Matt Shoemaker
5,Tim Mayza
6,Aaron Sanchez
7,Clayton Richard
8,Julian Merryweather
9,Sean Reid-Foley


### Prepare the Marcel Dataframe

In [4]:
league = "al"
stats = ["ERA", "WHIP", "SO9"]
marcelStats = ["Name", "Age","Lev", "Year", "G", "GS", "IP"]
marcelStats.extend(stats)
marcelDF = dq.query("bref", marcelStats, dateRange)
marcelDF

Unnamed: 0,Name,Age,Lev,Year,G,GS,IP,ERA,WHIP,SO9
1,David Aardsma,33,MLB-NL,2015,33,0,30.2,4.70,1.272,10.3
2,Fernando Abad,29,MLB-AL,2015,62,0,47.2,4.15,1.343,8.5
3,A.J. Achter,26,MLB-AL,2015,11,0,13.1,6.75,1.350,9.5
4,Austin Adams,28,MLB-AL,2015,28,0,33.1,3.78,1.500,6.2
5,Nathan Adcock,27,MLB-NL,2015,13,0,18.0,6.00,1.500,6.5
6,Jeremy Affeldt,36,MLB-NL,2015,52,0,35.1,5.86,1.613,5.3
7,Andrew Albers,29,MLB-AL,2015,1,0,2.2,3.38,1.125,3.4
8,Matt Albers,32,MLB-AL,2015,30,0,37.1,1.21,1.071,6.8
9,Al Alburquerque,29,MLB-AL,2015,67,0,62.0,4.21,1.548,8.4
10,Scott Alexander,25,MLB-AL,2015,4,0,6.0,4.50,1.333,4.5


### Split Dataframe by league and into 3 seasons

In [6]:
dates = dc.convertDateStringToInt(dateRange)
firstDate = dates[1]
secondDate = dates[1] - 1
thirdDate = dates[0]

if league == 'al':
    lev = 'MLB-AL'
elif league == 'nl':
    lev = 'MLB-NL'

leagueDF = marcelDF[marcelDF['Lev'] == lev]
lgSeason1DF = leagueDF[leagueDF['Year'] == firstDate]
lgSeason2DF = leagueDF[leagueDF['Year'] == secondDate]
lgSeason3DF = leagueDF[leagueDF['Year'] == thirdDate]

firstAvg = leagueAverage(lgSeason1DF, 0)
secondAvg = leagueAverage(lgSeason2DF, 0)
thirdAvg = leagueAverage(lgSeason3DF, 0)

season1DF = marcelDF[marcelDF['Year'] == firstDate]
season2DF = marcelDF[marcelDF['Year'] == secondDate]
season3DF = marcelDF[marcelDF['Year'] == thirdDate]

# set search parameters
season1DF.set_index("Name", inplace=True)
season2DF.set_index("Name", inplace=True)
season3DF.set_index("Name", inplace=True)

rosterNames = list(roster[0])
statsPlus = ["IP"]
statsPlus.extend(stats)
stats1DF = season1DF[statsPlus].replace([np.inf, -np.inf], np.nan).dropna()
stats2DF = season2DF[statsPlus].replace([np.inf, -np.inf], np.nan).dropna()
stats3DF = season3DF[statsPlus].replace([np.inf, -np.inf], np.nan).dropna()
stats1DF

Unnamed: 0_level_0,IP,ERA,WHIP,SO9
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fernando Abad,43.2,3.30,1.237,7.6
Austin L. Adams,5.0,3.60,2.400,18.0
Tim Adleman,122.1,5.52,1.431,7.9
Andrew Albers,41.0,3.51,1.293,8.1
Matt Albers,61.0,1.62,0.852,9.3
Al Alburquerque,18.0,2.50,1.000,7.0
Raul Alcantara,24.0,7.13,1.417,4.5
Sandy Alcantara,8.1,4.32,1.800,10.8
Victor Alcantara,7.1,8.59,2.182,6.1
Scott Alexander,69.0,2.48,1.319,7.7


### Predict Stats of Toronto's Pitching Roster

In [18]:
predictDF = pd.DataFrame(index=rosterNames, columns=stats)

for player in rosterNames:
    for stat in stats:
        predictDF.at[player, stat] = marcelModel(stat, player, stats1DF, stats2DF,
                                            stats3DF, firstAvg, secondAvg,thirdAvg)
predictDF

Unnamed: 0,ERA,WHIP,SO9
Jacob Waguespack,4.32572,1.2385,6.28622
Marcus Stroman,4.89905,1.47207,7.70994
Thomas Pannone,4.32572,1.2385,6.28622
Elvis Luciano,4.32572,1.2385,6.28622
Matt Shoemaker,3.87715,1.12518,6.06877
Tim Mayza,3.79836,1.07847,6.09143
Aaron Sanchez,3.35722,1.05431,5.18123
Clayton Richard,5.52968,1.65486,7.94909
Julian Merryweather,4.32572,1.2385,6.28622
Sean Reid-Foley,4.32572,1.2385,6.28622
