In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns',1000)
pd.set_option('display.max_rows',1000)
import lxml
import html5lib
from urllib.request import urlopen
import time

In [2]:
def line_to_prob(line):
    prob_underdog = 100/(np.abs(line)+100) # this is the probability for the 
    add_term = ((1-np.sign(line))/2) # 0 if negative, 1 if positive
    mult_factor = np.sign(line) # -1 if negative, 1 if positive
    # if line is positive, team is underdog, give 0 + 1*prob_underdog
    # if line is negative, team is favoritesm give 1 + (-1)*prob_underdog
    imp_prob = add_term + mult_factor * prob_underdog 
    return(imp_prob)

In [3]:
# manually figure out what number in url corresponds to which team
# use the 3 letter abbrev from retrosheet for each team

oddsshark_num_to_team_dict = {}
oddsshark_num_to_team_dict[26995]='PHI'
oddsshark_num_to_team_dict[26996]='SDN'
oddsshark_num_to_team_dict[26997]='SFN'
oddsshark_num_to_team_dict[26998]='ANA'
oddsshark_num_to_team_dict[26999]='DET'
oddsshark_num_to_team_dict[27000]='CIN'
oddsshark_num_to_team_dict[27001]='NYA'
oddsshark_num_to_team_dict[27002]='TEX'
oddsshark_num_to_team_dict[27003]='TBA'
oddsshark_num_to_team_dict[27004]='COL'
oddsshark_num_to_team_dict[27005]='MIN'
oddsshark_num_to_team_dict[27006]='KCA'
oddsshark_num_to_team_dict[27007]='ARI'
oddsshark_num_to_team_dict[27008]='BAL'
oddsshark_num_to_team_dict[27009]='ATL'
oddsshark_num_to_team_dict[27010]='TOR'
oddsshark_num_to_team_dict[27011]='SEA'
oddsshark_num_to_team_dict[27012]='MIL'
oddsshark_num_to_team_dict[27013]='PIT'
oddsshark_num_to_team_dict[27014]='NYN'
oddsshark_num_to_team_dict[27015]='LAN'
oddsshark_num_to_team_dict[27016]='OAK'
oddsshark_num_to_team_dict[27017]='WAS'
oddsshark_num_to_team_dict[27018]='CHA'
oddsshark_num_to_team_dict[27019]='SLN'
oddsshark_num_to_team_dict[27020]='CHN'
oddsshark_num_to_team_dict[27021]='BOS'
oddsshark_num_to_team_dict[27022]='MIA'
oddsshark_num_to_team_dict[27023]='HOU'
oddsshark_num_to_team_dict[27024]='CLE'

In [4]:
for i in range(26995, 27025):
    team_name = oddsshark_num_to_team_dict[i]
    print(team_name)
    #for season in range(2019,2023):
    season = 2023
    try:
        print(season)
        url = 'https://www.oddsshark.com/stats/gamelog/baseball/mlb/'+str(i)+'?season='+str(season)
        df_temp = pd.read_html(url)[0]
        
        df_temp = df_temp[df_temp.Game=='REG']
        #print(df_temp.shape)
        df_temp['team_source'] = team_name
        df_temp['season'] = season
        df_temp['date_numeric'] = pd.to_datetime(df_temp.Date).astype(str).str.replace('-','')
        df_temp['game_no'] = np.arange(1,df_temp.shape[0]+1)
        df_temp['prob_implied'] = line_to_prob(df_temp['Line'])      
        
        next_game_date = np.concatenate((df_temp['date_numeric'].iloc[1:],[0]))
        previous_game_date = np.concatenate(([0], df_temp['date_numeric'].iloc[:-1]))
        
        game_1_dblheader = (df_temp.date_numeric.to_numpy()==next_game_date).astype(int)
        game_2_dblheader = (df_temp.date_numeric.to_numpy()==previous_game_date).astype(int)*2
        
        df_temp['dblheader_num'] = game_1_dblheader+game_2_dblheader        
        fname_out = 'OddShark/oddsshark_'+team_name+'_'+str(season)+'.csv'
        
        df_temp.to_csv(fname_out,index=False)
        time.sleep(.1)
    except:
        print(f"problem with: {season}")

PHI
2023
SDN
2023
SFN
2023
ANA
2023
DET
2023
CIN
2023
NYA
2023
TEX
2023
TBA
2023
COL
2023
MIN
2023
KCA
2023
ARI
2023
BAL
2023
ATL
2023
TOR
2023
SEA
2023
MIL
2023
PIT
2023
NYN
2023
LAN
2023
OAK
2023
WAS
2023
CHA
2023
SLN
2023
CHN
2023
BOS
2023
MIA
2023
HOU
2023
CLE
2023


In [7]:
df = pd.read_csv('OddShark/oddsshark_ANA_2023.csv')
df

Unnamed: 0,Date,Opponent,Game,Result,Score,Line,OU,Total,team_source,season,date_numeric,game_no,prob_implied,dblheader_num
0,"Mar 30, 2023",@ Oakland,REG,L,2-1,-205.0,U,7.0,ANA,2023,20230330,1,0.672131,0
1,"Apr 1, 2023",@ Oakland,REG,W,13-1,-170.0,O,7.5,ANA,2023,20230401,2,0.62963,0
2,"Apr 2, 2023",@ Oakland,REG,W,6-0,-150.0,U,7.5,ANA,2023,20230402,3,0.6,0
3,"Apr 3, 2023",@ Seattle,REG,W,7-3,122.0,O,7.0,ANA,2023,20230403,4,0.45045,0
4,"Apr 4, 2023",@ Seattle,REG,L,11-2,150.0,O,7.5,ANA,2023,20230404,5,0.4,0
5,"Apr 5, 2023",@ Seattle,REG,W,4-3,-170.0,P,7.0,ANA,2023,20230405,6,0.62963,0
6,"Apr 7, 2023",vs Toronto,REG,L,4-3,-145.0,U,9.0,ANA,2023,20230407,7,0.591837,0
7,"Apr 8, 2023",vs Toronto,REG,W,9-5,-130.0,O,9.5,ANA,2023,20230408,8,0.565217,0
8,"Apr 9, 2023",vs Toronto,REG,L,12-11,-132.0,O,10.0,ANA,2023,20230409,9,0.568966,0
9,"Apr 10, 2023",vs Washington,REG,L,6-4,-280.0,P,10.0,ANA,2023,20230410,10,0.736842,0


In [24]:
from datetime import datetime

dt = datetime.now()
t = f"{dt.year}-{dt.month}-28"
t

'2023-8-28'

In [17]:
df["Date"] = pd.to_datetime(df["Date"])

Unnamed: 0,Date,Opponent,Game,Result,Score,Line,OU,Total,team_source,season,date_numeric,game_no,prob_implied,dblheader_num
0,2023-03-30,@ Oakland,REG,L,2-1,-205.0,U,7.0,ANA,2023,20230330,1,0.672131,0
1,2023-04-01,@ Oakland,REG,W,13-1,-170.0,O,7.5,ANA,2023,20230401,2,0.62963,0
2,2023-04-02,@ Oakland,REG,W,6-0,-150.0,U,7.5,ANA,2023,20230402,3,0.6,0
3,2023-04-03,@ Seattle,REG,W,7-3,122.0,O,7.0,ANA,2023,20230403,4,0.45045,0
4,2023-04-04,@ Seattle,REG,L,11-2,150.0,O,7.5,ANA,2023,20230404,5,0.4,0
5,2023-04-05,@ Seattle,REG,W,4-3,-170.0,P,7.0,ANA,2023,20230405,6,0.62963,0
6,2023-04-07,vs Toronto,REG,L,4-3,-145.0,U,9.0,ANA,2023,20230407,7,0.591837,0
7,2023-04-08,vs Toronto,REG,W,9-5,-130.0,O,9.5,ANA,2023,20230408,8,0.565217,0
8,2023-04-09,vs Toronto,REG,L,12-11,-132.0,O,10.0,ANA,2023,20230409,9,0.568966,0
9,2023-04-10,vs Washington,REG,L,6-4,-280.0,P,10.0,ANA,2023,20230410,10,0.736842,0


In [26]:
df["Line"][df.Date == t]

131    125.0
Name: Line, dtype: float64

In [86]:
#Now lets grab all the odd lines for each team for yesterday

TEAMS = ['PHI','SDN','SFN','ANA','DET','CIN','NYA','TEX','TBA','COL','MIN','KCA','ARI','BAL','ATL','TOR','SEA','MIL','PIT','NYN','LAN','OAK','WAS'
,'CHA','SLN','CHN','BOS','MIA','HOU','CLE']


def grab_team_odds(date):
    for team in TEAMS:
        f = f"OddShark/oddsshark_{team}_2023.csv"
        df = pd.read_csv(f)

        df["Date"] = pd.to_datetime(df["Date"])
        
        Line = df["Line"][df.Date == date]
        
        if Line.dtype == 'float64':
            print(f"Team: {team}, Line: {int(Line)}")

In [87]:
grab_team_odds(t)

Team: PHI, Line: -140
Team: SDN, Line: -210
Team: SFN, Line: -129
Team: ANA, Line: 125
Team: DET, Line: -111
Team: CIN, Line: 119
Team: NYA, Line: 101
Team: TEX, Line: -127


  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")
  print(f"Team: {team}, Line: {int(Line)}")


TypeError: cannot convert the series to <class 'int'>