In [7]:
from logger import setup_logger
import pandas as pd
import json
import plotly.express as px
import plotly.graph_objects as go
import hashlib
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time
import random

logger = setup_logger(__name__)

today = datetime.now()
weekday = today.weekday()  # Monday is 0 and Sunday is 6

# Calculate the start date (Tuesday)
if weekday >= 1:  # If today is Tuesday or after
    start_date = today - timedelta(days=(weekday - 1))
else:  # If today is before Tuesday
    start_date = today - timedelta(days=(weekday + 6))

# Calculate the end date (Monday)
if weekday <= 0:  # If today is Monday
    end_date = today
else:  # If today is after Monday
    end_date = today + timedelta(days=(7 - weekday))

# Convert datetime to date
start_date = start_date.date()
end_date = end_date.date()

def generate_game_id(row):
    try:
        # Example: Use a combination of date, home team, and away team to generate a unique ID
        identifier = f"{row['date']}_{row['home']}_{row['away']}"
        return hashlib.md5(identifier.encode()).hexdigest()
    except Exception as e:
        logger.exception("Generate Game error")

def convert_to_int(value):
    try:
        if value == 'EVEN':
            return 0
        if value.startswith('+'):
            return int(value[1:])
        elif value.startswith('-'):
            return int(value)
        else:
            return int(value)
    except Exception as e:
        logger.exception("Convert to int error")
        return -1
    
def concat_values(x, y, z=None):
    if z:
        return f"{x} {y} {z}"
    return f"{x} {y}"


def get_data(start_date, end_date):
    logger.info(f"fetching data for {start_date}-{end_date}")
    # Configure ChromeOptions for headless browsing
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")  # This line can be important in certain environments
    options.set_capability('goog:loggingPrefs', {'browser': 'SEVERE'})
    # Initialize the Chrome WebDriver with the specified options
    driver = webdriver.Chrome(options=options)
    driver.get("https://www.bovada.lv/sports/football/nfl")
    # wait for the page to load
    time.sleep(10)
    driver.implicitly_wait(10)
    # get the HTML source
    html = driver.page_source
    # create a BeautifulSoup object
    soup = BeautifulSoup(html, "html.parser")
    # close the driver
    driver.quit()

    data = []
    sections = soup.find_all("section", {"class":"coupon-content more-info"})#soup.find_all("section", {"class":"coupon-content more-info"})
    for game in sections:
        try:
            item = str(game).split('>')
            info = [x.split('<')[0].strip() for x in item if not x.startswith("<")]
            data.append(info)
        except Exception as e:
            logger.exception("get data section error")
            pass

    df = pd.DataFrame(data)

    df["Home Spread"] = df.apply(lambda row: concat_values(row[10], row[11]), axis=1)
    df["Away Spread"] = df.apply(lambda row: concat_values(row[12], row[13]), axis=1)
    df["total_home"] = df.apply(lambda row: concat_values(row[16], row[17], row[18]), axis=1)
    df["total_away"] = df.apply(lambda row: concat_values(row[19], row[20], row[21]), axis=1)
    #drop columns
    df.drop(columns = [3, 4, 5, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22], inplace=True)
    columns = ["date", "time", "bets", "home", "away", "Home Win", "Away Win", "Home Spread", "Away Spread", "Total Over", "Total Under"]
    df.columns = columns

    #remove plus from bets
    df['bets'] = df['bets'].apply(lambda x: x[2:])

    #date operations
    #filter data for date
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, '%Y-%m-%d')  # Adjust the format if needed
    if isinstance(end_date, str):
        end_date = datetime.strptime(end_date, '%Y-%m-%d')  # Adjust the format if needed
        # Ensure the 'date' column in df is of type datetime.date
    
    # # Ensure the 'date' column in df is of type datetime
    # try:
    #     df['date'] = pd.to_datetime(df['date'])
    # except:
    #     print("date error")
    # df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    #create day of the week column
    df["day"] = 'day'
    #set back to string
    df['date'] = 'date'
    df.reset_index(inplace=True, drop=True)

    # Applying the conversion to the 'win_home' and "Away Win" columns
    df['Home Win'] = df['Home Win'].apply(convert_to_int)
    df["Away Win"] = df["Away Win"].apply(convert_to_int)
    #ranking
    home = df[["home", 'Home Win']].rename(columns={'home': 'team', 'Home Win': 'odds'})
    away = df[['away', "Away Win"]].rename(columns={'away': 'team', "Away Win": 'odds'})
    combined = pd.concat([home, away]).sort_values('odds', ascending=False)
    combined['index'] = combined.index
    combined.index = range(0, 2*len(combined), 2)
    df['points'] = None
    # Iterating over the combined DataFrame to assign ranks
    for i, x in combined.iterrows():
        df.at[x['index'], 'points'] = (i-len(combined))/2
    current_df = df.sort_values('points', ascending=False)
    #add game id
    current_df["game_id"] = current_df.apply(generate_game_id, axis=1)
    #change column order
    current_df = current_df[['date', 'day', 'time', 'bets', 'home', 'away', 'points', 'Home Win', 'Away Win', 'Home Spread', 'Away Spread', 'Total Over', 'Total Under', 'game_id']]
    log_data = current_df[['game_id', 'date', 'home', 'away', 'Home Win', 'Away Win', 'points']]
    #log_data_if_changed(log_data)

    return current_df


In [8]:
df = get_data(start_date, end_date)

In [9]:
df

Unnamed: 0,date,day,time,bets,home,away,points,Home Win,Away Win,Home Spread,Away Spread,Total Over,Total Under,game_id
9,date,day,4:05 PM,181,San Francisco 49ers,Arizona Cardinals,27.0,-950,600,-12.0 (-110),+12.0 (-110),O 48.5 (-110),U 48.5 (-110),d8dbf61c46d17b583ea28b2a563a4e35
26,date,day,4:30 PM,3,New York Giants,Philadelphia Eagles,26.0,360,-500,+10.5 (-110),-10.5 (-110),O 43.5 (-110),U 43.5 (-110),84dc0dd68b78e69f2fa106464d863c1c
25,date,day,1:00 PM,3,Las Vegas Raiders,Kansas City Chiefs,25.0,360,-500,+10.5 (-110),-10.5 (-110),O 42.0 (-110),U 42.0 (-110),7cadebd1adfcee11102f8301814d1b96
7,date,day,1:00 PM,171,New York Jets,Miami Dolphins,24.0,335,-450,+9.0 (-110),-9.0 (-110),O 36.5 (-115),U 36.5 (-105),42ce033e30b4d681daf8ba4a0b576830
5,date,day,1:00 PM,180,Kansas City Chiefs,New England Patriots,23.0,-420,315,-8.0 (-110),+8.0 (-110),O 37.5 (-110),U 37.5 (-110),5d74b63e52358a3f0e2912fe5b2223da
10,date,day,4:05 PM,184,Washington Commanders,Los Angeles Rams,22.0,250,-300,+6.5 (-110),-6.5 (-110),O 50.5 (-115),U 50.5 (-105),4c42f5e937b4d6a9f5f5128112d25d35
24,date,day,8:15 PM,3,New England Patriots,Denver Broncos,21.0,210,-250,+5.5 (-110),-5.5 (-110),O 36.0 (-110),U 36.0 (-110),97d77c1c90cee959e3a0cdc97a211232
6,date,day,1:00 PM,174,New York Giants,New Orleans Saints,20.0,205,-245,+5.5 (-110),-5.5 (-110),O 39.0 (-120),U 39.0 (EVEN),3e1a1d9e6b82f8fbf874ad1d54645680
2,date,day,8:15 PM,233,Denver Broncos,Detroit Lions,19.0,195,-235,+4.5 (-105),-4.5 (-115),O 48.0 (-110),U 48.0 (-110),36ab51f2916f5556a60e3e74a3f73896
17,date,day,1:00 PM,3,Green Bay Packers,Carolina Panthers,18.0,-230,190,-5.5 (-110),+5.5 (-110),O 36.0 (-110),U 36.0 (-110),14caada0dc6b529a73b9aee21e66b4a4
