In [3]:
from logger import setup_logger
import pandas as pd
import json
import plotly.express as px
import hashlib
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time

logger = setup_logger(__name__)

def generate_game_id(row):
    try:
        # Example: Use a combination of date, home team, and away team to generate a unique ID
        identifier = f"{row['date']}_{row['home']}_{row['away']}"
        return hashlib.md5(identifier.encode()).hexdigest()
    except Exception as e:
        logger.exception("Generate Game error")
# Function to convert the betting odds to integers while handling the signs
def convert_to_int(value):
    try:
        if value == 'EVEN':
            return 0
        if value.startswith('+'):
            return int(value[1:])
        elif value.startswith('-'):
            return int(value)
        else:
            return int(value)
    except Exception as e:
        logger.exception("Convert to int error")
        return -1
    
def concat_values(x, y, z=None):
    if z:
        return f"{x} {y} {z}"
    return f"{x} {y}"

In [4]:
# Configure ChromeOptions for headless browsing
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-extensions")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")  # This line can be important in certain environments
# Initialize the Chrome WebDriver with the specified options
driver = webdriver.Chrome(options=options)

driver.get("https://www.bovada.lv/sports/football/nfl")

# wait for the page to load
time.sleep(10)
driver.implicitly_wait(10)
# get the HTML source
html = driver.page_source

# create a BeautifulSoup object
soup = BeautifulSoup(html, "html.parser")

# close the driver
driver.quit()

data = []
sections = soup.find_all("section", {"class":"coupon-content more-info"})#soup.find_all("section", {"class":"coupon-content more-info"})
for game in sections:
    try:
        item = str(game).split('>')
        info = [x.split('<')[0].strip() for x in item if not x.startswith("<")]
        data.append(info)
    except Exception as e:
        logger.exception("get data section error")
        pass

df = pd.DataFrame(data)

df["Home Spread"] = df.apply(lambda row: concat_values(row[10], row[11]), axis=1)
df["Away Spread"] = df.apply(lambda row: concat_values(row[12], row[13]), axis=1)
df["total_home"] = df.apply(lambda row: concat_values(row[16], row[17], row[18]), axis=1)
df["total_away"] = df.apply(lambda row: concat_values(row[19], row[20], row[21]), axis=1)

df.drop(columns = [3, 4, 5, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22], inplace=True)
columns = ["date", "time", "bets", "home", "away", "Home Win", "Away Win", "Home Spread", "Away Spread", "Total Over", "Total Under"]
df.columns = columns


df['bets'] = df['bets'].apply(lambda x: x[2:])
df["date"] = pd.to_datetime(df["date"], format="%m/%d/%y")
df["day"] = df['date'].dt.strftime('%A')
df.reset_index(inplace=True, drop=True)

# Applying the conversion to the 'win_home' and "Away Win" columns
df['Home Win'] = df['Home Win'].apply(convert_to_int)
df["Away Win"] = df["Away Win"].apply(convert_to_int)
#ranking
home = df[["home", 'Home Win']].rename(columns={'home': 'team', 'Home Win': 'odds'})
away = df[['away', "Away Win"]].rename(columns={'away': 'team', "Away Win": 'odds'})

combined = pd.concat([home, away]).sort_values('odds', ascending=False)
combined['index'] = combined.index
combined.index = range(0, 2*len(combined), 2)
df['points'] = None

# Iterating over the combined DataFrame to assign ranks
for i, x in combined.iterrows():
    df.at[x['index'], 'points'] = (i-len(combined))/2

current_df = df.sort_values('points', ascending=False)
current_df["game_id"] = current_df.apply(generate_game_id, axis=1)
current_df = current_df[['date', 'day', 'time', 'bets', 'home', 'away', 'points', 'Home Win', 'Away Win', 'Home Spread', 'Away Spread', 'Total Over', 'Total Under', 'game_id']]
                        

In [5]:
current_df

Unnamed: 0,date,day,time,bets,home,away,points,Home Win,Away Win,Home Spread,Away Spread,Total Over,Total Under,game_id
15,2023-12-11,Monday,8:15 PM,55,Tennessee Titans,Miami Dolphins,15.0,525,-800,+13.5 (-110),-13.5 (-110),O 47.0 (-110),U 47.0 (-110),230657f3a19e1132fb1a1c786dc9ad58
10,2023-12-10,Sunday,4:05 PM,53,Seattle Seahawks,San Francisco 49ers,14.0,430,-625,+10.5 (-105),-10.5 (-115),O 46.0 (-115),U 46.0 (-105),3d6b45fbd217de191d4551b68cdce15d
0,2023-12-04,Monday,8:15 PM,252,Cincinnati Bengals,Jacksonville Jaguars,13.0,355,-490,+10.0 (-115),-10.0 (-105),O 39.5 (-115),U 39.5 (-105),90ee7e9c76168a6fdeeaa7205bdfddf4
7,2023-12-10,Sunday,1:00 PM,55,Los Angeles Rams,Baltimore Ravens,12.0,270,-340,+7.0 (-105),-7.0 (-115),O 43.5 (-110),U 43.5 (-110),255a794f0ceb280c3899bd55d685e8f2
14,2023-12-11,Monday,8:15 PM,3,Green Bay Packers,New York Giants,11.0,-320,260,-6.5 (-110),+6.5 (-110),O 37.0 (-110),U 37.0 (-110),80ef7e5522759e0c8764cba33054aa13
1,2023-12-07,Thursday,8:15 PM,57,New England Patriots,Pittsburgh Steelers,10.0,225,-275,+6.0 (-110),-6.0 (-110),O 30.5 (-110),U 30.5 (-110),9ca8d84b58f5443b657fa697023b27c5
4,2023-12-10,Sunday,1:00 PM,55,Houston Texans,New York Jets,9.0,-270,220,-6.0 (-110),+6.0 (-110),O 35.5 (-115),U 35.5 (-105),b4230f41aff0b78341ff65a858563083
2,2023-12-10,Sunday,1:00 PM,55,Carolina Panthers,New Orleans Saints,8.0,200,-240,+5.0 (-110),-5.0 (-110),O 38.0 (-110),U 38.0 (-110),cc5353411b6ce893c5048de614a3e510
13,2023-12-10,Sunday,8:20 PM,55,Philadelphia Eagles,Dallas Cowboys,7.0,155,-180,+3.5 (-115),-3.5 (-105),O 52.5 (-110),U 52.5 (-110),c0a4aa479cb6038509b8dd1737a9811b
3,2023-12-10,Sunday,1:00 PM,55,Detroit Lions,Chicago Bears,6.0,-175,150,-3.0 (-120),+3.0 (EVEN),O 43.0 (-110),U 43.0 (-110),46fc51c470efdb43c2e0e9b51c8b1f71


In [6]:
current_df[["date", "game_id", "home", "away", "Home Win", "Away Win", "points"]]

Unnamed: 0,date,game_id,home,away,Home Win,Away Win,points
15,2023-12-11,230657f3a19e1132fb1a1c786dc9ad58,Tennessee Titans,Miami Dolphins,525,-800,15.0
10,2023-12-10,3d6b45fbd217de191d4551b68cdce15d,Seattle Seahawks,San Francisco 49ers,430,-625,14.0
0,2023-12-04,90ee7e9c76168a6fdeeaa7205bdfddf4,Cincinnati Bengals,Jacksonville Jaguars,355,-490,13.0
7,2023-12-10,255a794f0ceb280c3899bd55d685e8f2,Los Angeles Rams,Baltimore Ravens,270,-340,12.0
14,2023-12-11,80ef7e5522759e0c8764cba33054aa13,Green Bay Packers,New York Giants,-320,260,11.0
1,2023-12-07,9ca8d84b58f5443b657fa697023b27c5,New England Patriots,Pittsburgh Steelers,225,-275,10.0
4,2023-12-10,b4230f41aff0b78341ff65a858563083,Houston Texans,New York Jets,-270,220,9.0
2,2023-12-10,cc5353411b6ce893c5048de614a3e510,Carolina Panthers,New Orleans Saints,200,-240,8.0
13,2023-12-10,c0a4aa479cb6038509b8dd1737a9811b,Philadelphia Eagles,Dallas Cowboys,155,-180,7.0
3,2023-12-10,46fc51c470efdb43c2e0e9b51c8b1f71,Detroit Lions,Chicago Bears,-175,150,6.0


In [37]:
import json
import pandas as pd

def load_historical_data():
    plot_data = []
    with open('data_log.jsonl', 'r') as file:
        for line in file:
            entry = json.loads(line)
            datetime = entry['datetime']
            data = entry['data']
            for index, game_id in data['game_id'].items():
                home_team = data['home'][index]
                away_team = data['away'][index]
                home_win = data['Home Win'][index]
                away_win = data['Away Win'][index]

                # Determine which team has the lower win odds
                if home_win < away_win:
                    # Home team has lower odds, so it gets positive points
                    home_points = data['points'][index]
                    away_points = -data['points'][index]
                else:
                    # Away team has lower odds, so it gets positive points
                    home_points = -data['points'][index]
                    away_points = data['points'][index]

                plot_data.append({
                    'DateTime': datetime,
                    'Team': home_team,
                    'Win': home_win,
                    'Type': 'Home Win',
                    'points': home_points
                })
                plot_data.append({
                    'DateTime': datetime,
                    'Team': away_team,
                    'Win': away_win,
                    'Type': 'Away Win',
                    'points': away_points
                })
    df = pd.DataFrame(plot_data)
    return df


In [38]:
df = load_historical_data()
df['DateTime'] = pd.to_datetime(df['DateTime'])

# Getting the latest entry for each team
latest_entries = df.sort_values(by='DateTime').groupby('Team').last().reset_index()

# Sorting these entries by 'points'
sorted_teams = latest_entries.sort_values(by='points', ascending=False)['Team']

fig = px.line(df, x='DateTime', y='points', color='Team', line_group='Type')

# Reordering the legend
fig.update_layout(legend={'traceorder': 'normal'})
fig.data = tuple(sorted(fig.data, key=lambda trace: sorted_teams.tolist().index(trace.name)))

# Show the figure
fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [27]:
import plotly.express as px
px.line(sorted_teams, x='DateTime', y='points', color='Team', line_group='Type')

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['Team'] but received: DateTime

In [17]:
with open('data_log.jsonl', 'r') as file:
    for line in file:
        entry = json.loads(line)
        print(entry['data']['points'])

{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'15': 15.0, '10': 14.0, '0': 13.0, '7': 12.0, '14': 11.0, '1': 10.0, '4': 9.0, '2': 8.0, '13': 7.0, '3': 6.0, '9': 5.0, '6': 4.0, '12': 3.0, '8': 2.0, '11': 1.0, '5': 0.0}
{'15': 15.0, '10': 14.0, '0': 13.0, '7': 12.0, '14': 11.0, '1': 10.0, '4': 9.0, '2': 8.0, '13': 7.0, '3': 6.0, '9': 5.0, '6': 4.0, '12': 3.0, '8': 2.0, '11': 1.0, '5': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{}
{'14': 14.0, '9': 13.0, '6': 12.0, '13': 11.0, '0': 10.0, '3': 9.0, '1': 8.0, '12': 7.0, '2': 6.0, '8': 5.0, '5': 4.0, '7': 3.0, '10': 2.0, '11': 1.0, '4': 0.0}
{'14': 14.0, '9': 13.0, '6': 12.0, '13': 11.0, '0': 10.0, '3': 9.0, '1': 8.0, '12': 7.0, '2': 6.0, '8': 5.0, '5': 4.0, '7': 3.0, '10': 2.0, '11': 1.0, '4': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{'0': 0.0}
{}
{'14': 14.0, '9': 13.0, '6': 12.0, '13': 11.0, '3': 10.0, '0':

In [42]:
def load_historical_data():
    plot_data = []
    with open('data_log.jsonl', 'r') as file:
        for line in file:
            entry = json.loads(line)
            datetime = entry['datetime']
            data = entry['data']
            for index, game_id in data['game_id'].items():
                home_team = data['home'][index]
                away_team = data['away'][index]
                home_win = data['Home Win'][index]
                away_win = data['Away Win'][index]

                # Determine which team has the lower win odds
                if home_win < away_win:
                    # Home team has lower odds, so it gets positive points
                    home_points = data['points'][index]
                    away_points = -data['points'][index]
                else:
                    # Away team has lower odds, so it gets positive points
                    home_points = -data['points'][index]
                    away_points = data['points'][index]

                plot_data.append({
                    'DateTime': datetime,
                    'Team': home_team,
                    'Win': home_win,
                    'Type': 'Home Win',
                    'points': home_points
                })
                plot_data.append({
                    'DateTime': datetime,
                    'Team': away_team,
                    'Win': away_win,
                    'Type': 'Away Win',
                    'points': away_points
                })
    df = pd.DataFrame(plot_data)
    return df

In [43]:
load_historical_data()

Unnamed: 0,DateTime,Team,Win,Type,points
0,2023-12-05T12:32:09.355434,Tennessee Titans,575,Home Win,-14.0
1,2023-12-05T12:32:09.355434,Miami Dolphins,-900,Away Win,14.0
2,2023-12-05T12:32:09.355434,Seattle Seahawks,425,Home Win,-13.0
3,2023-12-05T12:32:09.355434,San Francisco 49ers,-600,Away Win,13.0
4,2023-12-05T12:32:09.355434,Los Angeles Rams,270,Home Win,-12.0
5,2023-12-05T12:32:09.355434,Baltimore Ravens,-340,Away Win,12.0
6,2023-12-05T12:32:09.355434,Green Bay Packers,-310,Home Win,11.0
7,2023-12-05T12:32:09.355434,New York Giants,255,Away Win,-11.0
8,2023-12-05T12:32:09.355434,Houston Texans,-300,Home Win,10.0
9,2023-12-05T12:32:09.355434,New York Jets,250,Away Win,-10.0
