## Import required third party libraries

In [1]:
from re import I
import pandas as pd
from pandas.compat import F
import requests
import unicodedata

#### If you want to show all the columns and rows on the dataframe run the next cell

In [28]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

### Defining utilities functions

#### Function to convert unicode characters when retrieveing spanish letters

In [4]:
def convert_unicode_accented_char(text):
    return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')

#### Function to convert innings quantity to thirds

In [5]:
def innings_to_thirds(ip):
    ip, thirds = divmod(float(ip),1)
    return int(ip) + ((thirds *10) / 3)

#### Function to calculate wOBA

In [2]:
def calculat_wOBA(df):
    wBB  = 0.670
    wHBP = 0.710
    w1B  = 0.900
    w2B  = 1.320
    w3B  = 1.720
    wHR  = 2.110

    hits = df.H - ((df['2B']) + df['3B'] + df.HR)

    numerator = ((wBB * df.BB) + (wHBP * df.HBP) + (w1B * hits) + (w2B * df['2B']) + (w3B * df['3B']) + (wHR * df.HR)) 
    denominator = (df.AB + (df.BB - df.IBB) + df.SF + df.HBP)  

    if(denominator == 0):
        return 0;
    wOBA = numerator / denominator
    
    return wOBA

### Function to get the ERA of the starters on LIDOM 
#### Per team, stage and season

In [6]:
def starters_era_per_team_stage_and_season(teamId, stageId, seasonYear):
    urlLIDOM = f"http://estadisticas.lidom.com/Equipo/EquipoLanzamiento?idTemporada="+seasonYear+"&idEtapa="+stageId+"&idEquipo="+teamId
    resLIDOM = requests.post(urlLIDOM, timeout=None).content


    df = pd.read_html(resLIDOM, attrs={'class': 'estadisticas', 'id': 'tbLanzamiento'})[0]
    df.drop('Unnamed: 0', axis=1, inplace=True)
    #print(df.info())

    columns = ['Jugador', 'ERA', 'WHIP', 'G', 'IP', 'R', 'ER', 'W', 'L','H', 'SO', 'BB', 'IBB', 'SV']
 
    StartersERA = df.loc[df.GS > 0, columns]
    StartersERA.loc[:,['IP']] = StartersERA.loc[:,['IP']].apply(lambda row : innings_to_thirds(row.IP), axis=1)
    StartersERA = StartersERA.append(StartersERA[columns[4:]].sum(), ignore_index=True).fillna('')


    StartersERA['WHIP'].iloc[-1] = round((StartersERA['H'].iloc[-1] + StartersERA['BB'].iloc[-1] + StartersERA['IBB'].iloc[-1]) / StartersERA['IP'].iloc[-1],3)
    StartersERA['ERA'].iloc[-1] = round((9 * StartersERA['ER'].iloc[-1]) / StartersERA['IP'].iloc[-1],3)
    
    return StartersERA

### Function to get the ERA of the relievers on LIDOM 
#### Per team, stage and season

##### This function needs a workaround to just retrieve the ERA of the appearence as a reliever (planned but, not down yet)

In [7]:
def relievers_era_per_team_stage_and_season(teamId, stageId, seasonYear):
    urlLIDOM = f"http://estadisticas.lidom.com/Equipo/EquipoLanzamiento?idTemporada="+seasonYear+"&idEtapa="+stageId+"&idEquipo="+teamId
    resLIDOM = requests.post(urlLIDOM, timeout=None).content


    df = pd.read_html(resLIDOM, attrs={'class': 'estadisticas', 'id': 'tbLanzamiento'})[0]
    df.drop('Unnamed: 0', axis=1, inplace=True)
    #print(df.info())
    columns = ['Jugador', 'ERA', 'WHIP', 'G', 'IP', 'R', 'ER', 'W', 'L','H', 'SO', 'BB', 'IBB', 'SV']

    RelieversERA = df[columns]
    RelieversERA = df.loc[df.GS == 0, columns]
    RelieversERA.loc[:,['IP']] = RelieversERA.loc[:,['IP']].apply(lambda row : innings_to_thirds(row.IP), axis=1)
    RelieversERA = RelieversERA.append(RelieversERA[columns[4:]].sum(), ignore_index=True).fillna('')
    
    RelieversERA['WHIP'].iloc[-1] = round((RelieversERA['H'].iloc[-1] + RelieversERA['BB'].iloc[-1] + RelieversERA['IBB'].iloc[-1]) / RelieversERA['IP'].iloc[-1],3)
    RelieversERA['ERA'].iloc[-1] = round((9 * RelieversERA['ER'].iloc[-1]) / RelieversERA['IP'].iloc[-1],3)
    
    return RelieversERA

### Function to get the ERA of LIDOM (Both: starters and relievers)
#### Per stage and season

In [9]:
def league_era(seasonId, stageId):
    urlLIDOM = f"http://estadisticas.lidom.com/Colectivo/ColectivoLanzamiento"
    
    params = {
              "idTemporada": seasonId,
              "idEtapa": stageId
              }
    
    resLIDOM = requests.post(urlLIDOM, params=params, timeout=None).content 
    
    df = pd.read_html(resLIDOM, attrs={'class': 'TablaFildeo'})[0]
    #df.drop('Unnamed: 0', axis=1, inplace=True)
    #df.drop('Equipo', axis=1, inplace=True)
    columns = ['Equipo','G','W','L','IP','ER', 'R']
    LeagueERA = df[columns]
    LeagueERA = LeagueERA.append(LeagueERA[columns[3:]].sum(), ignore_index = True).fillna('')
    
    ERA = ( 9 * LeagueERA['ER'].iloc[-1]) / LeagueERA['IP'].iloc[-1] 
    
    return ERA

In [15]:
league_era('2022', 'RR')

  LeagueERA = LeagueERA.append(LeagueERA[columns[3:]].sum(), ignore_index = True).fillna('')


3.205218617771509

### Function to get the stats of  a pitcher in LIDOM
#### Per stage, venue and season

In [21]:
def pitcher_stats_vs_team_per_stage_stadium_and_year(playerId, playerTeamId, season, stageId, stadiumId):
    urlLIDOM = f"http://estadisticas.lidom.com/Miembro/BoxcoreLanzamiento"
    params = {
              'idTemporada': season,
              'idEtapa': stageId,
              'idJugador': playerId,
              'idEstadio': stadiumId,
              'idEquipoLanzador': playerTeamId
            }

    resLIDOM = requests.post(urlLIDOM, timeout=None, params = params)

    df = pd.read_html(resLIDOM.content, attrs={'class': 'estadisticas', 'id': 'tblLanzamientos'})[0]
    #df.drop('Unnamed: 0', axis=1, inplace=True)
    #df.drop('Equipo', axis=1, inplace=True)
    df.loc[:, ['Oponente']] = df.loc[:,['Oponente']].apply(lambda row : convert_unicode_accented_char(row.Oponente), axis=1)
    
    return df

In [22]:
pitcher_stats_vs_team_per_stage_stadium_and_year(3897, '04','2022','SR','')

Unnamed: 0,Fecha,Oponente,G,W,L,SV,ERA,IP,H,R,ER,HR,BB,IBB,SO,WHIP
0,24/10/2022,Gigantes del Cibao,1,0,0,0,1.8,5.0,5,2,1,0,0,0,7,1.0


### Function to get the stats of  a batter in LIDOM
#### Per stage, team and season
##### note: we are taking a minimum of 20 at bats and ordering this by the calculated wOBA

In [39]:
def batting_stats_per_team_stage_and_season(teamId, stageId, seasonYear):
    urlLIDOM = f"http://estadisticas.lidom.com/Equipo/EquipoBateo?idTemporada="+seasonYear+"&idEtapa="+stageId+"&idEquipo="+teamId
    resLIDOM = requests.post(urlLIDOM, timeout=None).content


    df = pd.read_html(resLIDOM, attrs={'class': 'estadisticas', 'id': 'tbBateo'})[0]
    df.drop('Unnamed: 0', axis=1, inplace=True)
    #print(df.info())
    columns = ['Jugador', 'AVG', 'OBP', 'SLG', 'OPS', 'G', 'AB', 'R', 'H','2B', '3B', 'HR', 'RBI', 'BB', 'IBB', 'SO', 'LOB', 'SB', 'CS', 'HBP', 'SF', 'SH', 'DP']
    
    BattingStats = df.loc[df.AB > 20, columns]
    #BattingStats = df[columns] 
    #BattingStats = BattingStats.append(BattingStats[columns[5:]].sum(), ignore_index=True).fillna('')
    
    BattingStats['wOBA'] = BattingStats.loc[:,:].apply(lambda row : calculat_wOBA(row), axis=1)
    
    BattingStats = BattingStats.sort_values(by='wOBA', ascending=False, ignore_index=True)
    
    return BattingStats

In [40]:
batting_stats_per_team_stage_and_season('01','RR', '2022')

Unnamed: 0,Jugador,AVG,OBP,SLG,OPS,G,AB,R,H,2B,3B,HR,RBI,BB,IBB,SO,LOB,SB,CS,HBP,SF,SH,DP,wOBA
0,Geraldo Perdomo,0.311,0.403,0.41,0.813,16,61,10,19,4,1,0,6,9,0,9,16,1,2,1,1,0,0,0.365833
1,Zoilo Almonte,0.261,0.333,0.457,0.79,13,46,8,12,3,0,2,9,5,0,5,18,2,0,0,0,0,1,0.349608
2,Joe Hudson,0.2,0.385,0.333,0.718,12,30,4,6,1,0,1,4,9,0,12,7,0,0,0,0,1,0,0.334872
3,Jonathan Villar,0.238,0.333,0.381,0.714,12,42,4,10,3,0,1,5,5,1,11,17,0,0,1,0,0,0,0.330426
4,Aneury Tavarez,0.304,0.319,0.413,0.732,13,46,6,14,2,0,1,4,0,0,11,15,1,0,1,0,0,1,0.326809
5,Ramon Torres,0.286,0.375,0.333,0.708,9,21,2,6,1,0,0,1,3,0,2,9,0,2,0,0,0,1,0.32625
6,Juan Lagares,0.222,0.276,0.333,0.609,15,54,4,12,4,1,0,3,3,0,11,23,1,0,1,0,1,3,0.276207
7,Johan Camargo,0.192,0.283,0.288,0.572,16,52,5,10,3,1,0,5,4,0,10,21,0,0,3,1,0,1,0.264833
8,Jerar Encarnacion,0.2,0.289,0.25,0.539,12,40,4,8,2,0,0,6,4,0,15,21,2,0,1,0,0,0,0.254
9,Yairo Munoz,0.224,0.25,0.259,0.509,14,58,5,13,2,0,0,4,2,0,5,22,3,1,0,0,0,2,0.231333


### Function to get all pitchers stats
#### Per stage and season

In [43]:
def pitchers_stats_per_season_and_stage(season, stage):
    urlLIDOM = f"http://estadisticas.lidom.com/Lider/EstadisticasLanzadores"

    params = {"filtrar": False,
              "idTemporada": season,
              "idEtapa": stage}
    resLIDOM = requests.post(urlLIDOM, params = params, timeout = None).content

    df = pd.read_html(resLIDOM, attrs={'class': 'estadisticas', 'id': 'tbLanzamiento'})[0] 
    df.drop('Unnamed: 0', axis=1, inplace=True)
    
    df = df.sort_values(by='ERA', ignore_index = True)
    return df

In [44]:
pitchers_stats_per_season_and_stage('2022','RR')

Unnamed: 0,Jugador,Equipo,W,L,ERA,G,GS,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,WP,BK,WHIP
0,Junior Fernandez,Aguilas Cibaeñas,0,1,0.0,8,0,0,6.1,4,4,0,0,2,1,2,0,0,0,0.79
1,Richard Rodriguez,Aguilas Cibaeñas,0,0,0.0,7,0,0,7.0,4,0,0,0,0,0,9,0,0,0,0.57
2,Williams Jerez,Estrellas Orientales,1,0,0.0,6,0,0,5.1,3,0,0,0,1,0,4,0,0,0,0.75
3,Francis Martes,Aguilas Cibaeñas,0,0,0.0,1,0,0,1.0,1,0,0,0,2,0,1,0,1,0,3.0
4,Albert Abreu,Tigres del Licey,0,0,0.0,3,0,0,6.1,7,0,0,0,2,0,7,1,0,0,1.42
5,Marcos Diplan,Aguilas Cibaeñas,0,0,0.0,1,0,0,0.1,0,1,0,0,2,0,0,0,0,0,6.01
6,Jesus Liranzo,Tigres del Licey,0,0,0.0,3,0,0,3.0,1,0,0,0,1,0,3,0,0,0,0.67
7,Gregory Soto,Aguilas Cibaeñas,0,0,0.0,6,0,0,5.2,7,1,0,0,3,0,4,0,1,0,1.76
8,Raffi Vizcaíno,Estrellas Orientales,1,0,0.0,5,0,0,4.1,1,0,0,0,2,0,5,0,1,0,0.69
9,Oliver Ortega,Tigres del Licey,0,0,0.0,3,0,0,3.0,1,0,0,0,1,0,0,0,0,0,0.67


### Function to get the FIP constant of LIDOM
#### Per stage and season

In [14]:
def fip_constant_per_season_and_stage(season, stage):
    pitchersDF = pitchers_stats_per_season_and_stage(season, stage)

    FIPColumns = ['Jugador','HR', 'BB', 'HBP', 'SO', 'IP']
    pitchersDF = pitchersDF[FIPColumns]
   

    pitchersDF.loc[:,['IP']] = pitchersDF.loc[:,['IP']].apply(lambda row : innings_to_thirds(row.IP), axis=1)
    pitchersDF = pitchersDF.append(pitchersDF[FIPColumns[1:]].sum(), ignore_index = True).fillna('')

    lgERA = league_era(season, stage)

    numerador = ((13 * pitchersDF['HR'].iloc[-1]) + (3 * (pitchersDF['BB'].iloc[-1] + pitchersDF['HBP'].iloc[-1])) - (2 * (pitchersDF['SO'].iloc[-1]))) 
    denominador = pitchersDF['BB'].iloc[-1]

    FIPConstant = lgERA - (numerador / denominador)
    
    return FIPConstant

In [45]:
fip_constant_per_season_and_stage('2022', 'RR')

  pitchersDF = pitchersDF.append(pitchersDF[FIPColumns[1:]].sum(), ignore_index = True).fillna('')
  LeagueERA = LeagueERA.append(LeagueERA[columns[3:]].sum(), ignore_index = True).fillna('')


2.0033406834992085