In [55]:
from logger import setup_logger
import pandas as pd
import json
import plotly.express as px
import plotly.graph_objects as go
import hashlib
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time
import random
import sqlite3
from utils.helper_functions import *
from datetime import datetime, timedelta

def get_data(start_date, end_date):

    # Configure ChromeOptions for headless browsing
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")  # This line can be important in certain environments
    options.set_capability('goog:loggingPrefs', {'browser': 'SEVERE'})
    # Initialize the Chrome WebDriver with the specified options
    driver = webdriver.Chrome(options=options)
    driver.get("https://www.bovada.lv/sports/football/nfl")
    # wait for the page to load
    time.sleep(10)
    driver.implicitly_wait(10)
    # get the HTML source
    html = driver.page_source
    # create a BeautifulSoup object
    soup = BeautifulSoup(html, "html.parser")
    # close the driver
    driver.quit()

    data = []
    sections = soup.find_all("section", {"class":"coupon-content more-info"})#soup.find_all("section", {"class":"coupon-content more-info"})
    for game in sections:
        try:
            item = str(game).split('>')
            info = [x.split('<')[0].strip() for x in item if not x.startswith("<")]
            data.append(info)
        except Exception as e:
            pass

    df = pd.DataFrame(data)

    df["Home Spread"] = df.apply(lambda row: concat_values(row[10], row[11]), axis=1)
    df["Away Spread"] = df.apply(lambda row: concat_values(row[12], row[13]), axis=1)
    df["total_home"] = df.apply(lambda row: concat_values(row[16], row[17], row[18]), axis=1)
    df["total_away"] = df.apply(lambda row: concat_values(row[19], row[20], row[21]), axis=1)
    #drop columns
    df.drop(columns = [3, 4, 5, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22], inplace=True)
    columns = ["date", "time", "bets", "home_team", "away_team", "home_win", "away_win", "home_spread", "away_spread", "total_over", "total_under"]
    df.columns = columns

    #remove plus from bets
    df['bets'] = df['bets'].apply(lambda x: x[2:])

    #date operations
    #filter data for date
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, '%Y-%m-%d')  # Adjust the format if needed
    if isinstance(end_date, str):
        end_date = datetime.strptime(end_date, '%Y-%m-%d')  # Adjust the format if needed
        # Ensure the 'date' column in df is of type datetime.date
    
    # Ensure the 'date' column in df is of type datetime
    df['date'] = pd.to_datetime(df['date'])

    df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    #create day of the week column
    df["day"] = df['date'].dt.strftime('%A')
    #set back to string
    df['date'] = df['date'].dt.strftime('%Y-%m-%d')
    df.reset_index(inplace=True, drop=True)

    # Applying the conversion to the 'win_home' and "Away Win" columns
    df['home_win'] = df['home_win'].apply(convert_to_int)
    df["away_win"] = df["away_win"].apply(convert_to_int)
    #ranking
    home = df[["home_team", 'home_win']].rename(columns={'home_team': 'team', 'home_win': 'odds'})
    away = df[['away_team', "away_win"]].rename(columns={'away_team': 'team', "away_win": 'odds'})
    combined = pd.concat([home, away]).sort_values('odds', ascending=False)
    combined['index'] = combined.index
    combined.index = range(0, 2*len(combined), 2)
    df['points'] = None
    # Iterating over the combined DataFrame to assign ranks
    for i, x in combined.iterrows():
        df.at[x['index'], 'points'] = (i-len(combined))/2+1
    current_df = df.sort_values('points', ascending=False)
    #add game id
    current_df["game_id"] = current_df.apply(generate_game_id, axis=1)
    #change column order
    current_df = current_df[['date', 'day', 'time', 'bets', 'home_team', 'away_team', 'points', 'home_win', 'away_win', 'home_spread', 'away_spread', 'total_over', 'total_under', 'game_id']]
    log_data = current_df[['game_id', 'date', 'home_team', 'away_team', 'home_win', 'away_win', 'points']]
    log_data_if_changed(log_data)

    return current_df

def generate_matchups(df):
    # Ensure DateTime is properly formatted
    df['DateTime'] = pd.to_datetime(df['date'])

    # Sort the DataFrame by DateTime to get matchups from soonest to latest
    sorted_df = df.sort_values(by='DateTime')

    # Prepare data for the DataTable
    matchups_data = []

    team_conversion_dict = {
    "Houston Texans": "HOU",
    "New York Jets": "NYJ",
    "Denver Broncos": "DEN",
    "Baltimore Ravens": "BAL",
    "Jacksonville Jaguars": "JAX",
    "Philadelphia Eagles": "PHI",
    "New Orleans Saints": "NO",
    "Carolina Panthers": "CAR",
    "Las Vegas Raiders": "LV",
    "Cincinnati Bengals": "CIN",
    "Miami Dolphins": "MIA",
    "Buffalo Bills": "BUF",
    "Indianapolis Colts": "IND",
    "Minnesota Vikings": "MIN",
    "Washington Commanders": "WSH",
    "New York Giants": "NYG",
    "Detroit Lions": "DET",
    "Green Bay Packers": "GB",
    "New England Patriots": "NE",
    "Tennessee Titans": "TEN",
    "Dallas Cowboys": "DAL",
    "Atlanta Falcons": "ATL",
    "Chicago Bears": "CHI",
    "Arizona Cardinals": "ARI",
    "Los Angeles Chargers": "LAC",
    "Cleveland Browns": "CLE",
    "Los Angeles Rams": "LAR",
    "Seattle Seahawks": "SEA",
    "Tampa Bay Buccaneers": "TB",
    "Kansas City Chiefs": "KC"
    }

    for _, row in sorted_df.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        points = row['points']

        # Determine the favored team
        projected_winner = home_team if row['home_win'] < row['away_win'] else away_team

        # Add row data
        matchups_data.append({
            "game_id": f"{team_conversion_dict[home_team]}{team_conversion_dict[away_team]}",
            "matchup": f"{home_team} vs {away_team}",
            "time": row['DateTime'].strftime('%H:%M %p'),
            "projected_winner": projected_winner,
            "ranking": points
        })

    return matchups_data

def get_espn_expert_data():
    # Function to transform the game string
    def transform_game(game):
        teams = game.split(' at ')
        return teams[0] + teams[1]
    try:
        options = Options()
        options.add_argument("--headless")
        options.add_argument("--disable-extensions")
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")  # This line can be important in certain environments
        options.set_capability('goog:loggingPrefs', {'browser': 'SEVERE'})
        # Initialize the Chrome WebDriver with the specified options
        driver = webdriver.Chrome(options=options)
        driver.get("https://www.espn.com/nfl/picks")
        #time.sleep(10)
        driver.implicitly_wait(10)
        # get the HTML source
        html = driver.page_source
        # create a BeautifulSoup object
        soup = BeautifulSoup(html, "html.parser")
        # close the driver
        driver.quit()

        week = soup.find('h1', class_='headline headline__h1 dib').get_text(strip=True).split('- ')[1]

        # Extract game details
        games = []
        game_rows = soup.select('.Table--fixed-left .Table__TBODY .Table__TR')
        for row in game_rows:
            game_info_element = row.select_one('.wrap-competition a')
            game_time_element = row.select_one('.competition-dates')
            if game_info_element and game_time_element:
                game_info = game_info_element.text
                game_time = game_time_element.text
                games.append((game_info, game_time))

        # Extract expert names
        experts = []
        expert_headers = soup.select('.Table__Scroller .Table__THEAD .Table__TH')
        for header in expert_headers:
            expert_name_element = header.select_one('div')
            if expert_name_element:
                expert_name = expert_name_element.text.strip()
                experts.append(expert_name)

        # Extract picks
        picks = []
        pick_rows = soup.select('.Table__Scroller .Table__TBODY .Table__TR')
        for row in pick_rows:
            pick_row = []
            pick_cells = row.select('.Table__TD')
            for cell in pick_cells:
                team_logo = cell.select_one('img')
                if team_logo:
                    # Extract the team abbreviation from the image URL
                    team = team_logo['src'].split('/')[-1].split('.')[0]
                else:
                    team = None
                pick_row.append(team)
            picks.append(pick_row)

        # Create DataFrame
        data = {'Game': [game[0] for game in games], 'Time': [game[1] for game in games]}
        for i, expert in enumerate(experts):
            data[expert] = [pick[i] for pick in picks]

        data['Game'].append(None)
        data['Time'].append(None)

        df = pd.DataFrame(data)
        df.dropna(subset=["Game"], inplace=True)

        df['week'] = week

        convert_dict = {
            "min": "Vikings", "phi": "Eagles", "bal": "Ravens", "det": "Lions", "mia": "Dolphins",
            "nyj": "Jets", "atl": "Falcons", "gb": "Packers", "hou" : "Texans", "lac": "Chargers",
            "buf": "Bills", "den": "Broncos", "kc": "Chiefs", "chi": "Bears", "sf": "49ers", "pit": "Steelers",
            "no": "Saints", "cin": "Bengals", "ne": "Patriots", "wsh": "Commanders", "ari": "Cardinals", 
            "lar": "Rams"
        }

        for ix, row in df.iterrows():
            values = row.to_list()[2:]
            values = [value for value in values if value is not None]
            values_len = len(values)
            values_dict = {}
            for value in values:
                if value not in values_dict.keys():
                    values_dict[value] = 1
                else:
                    values_dict[value] += 1
            #sorting
            values_dict = dict(sorted(values_dict.items(), key=lambda item: item[1], reverse=True))
            top_key = next(iter(values_dict))
            if top_key in convert_dict:
                converted_key = convert_dict[top_key]
            else:
                converted_key = top_key
            pct = int(values_dict[top_key]/values_len*100)
            message = f"{pct}% of experts chose {converted_key}"
            df.loc[ix, "pct"] = pct
            df.loc[ix, "message"] = message

        df["game_id"] = df["Game"].apply(transform_game)
        return df[["game_id", "week", "Game", "Time", "pct", "message"]]
    except Exception as e:
        logger.exception("get espn data")

In [53]:
today = datetime.now()
weekday = today.weekday()  # Monday is 0 and Sunday is 6

# Calculate the start date (Tuesday)
if weekday >= 1:  # If today is Tuesday or after
    start_date = today - timedelta(days=(weekday - 1))
else:  # If today is before Tuesday
    start_date = today - timedelta(days=(weekday + 6))

# Calculate the end date (Monday)
if weekday <= 0:  # If today is Monday
    end_date = today
else:  # If today is after Monday
    end_date = today + timedelta(days=(7 - weekday))

bovada_df = get_data(start_date, end_date)

In [54]:
bovada_df

Unnamed: 0,date,day,time,bets,home_team,away_team,points,home_win,away_win,home_spread,away_spread,total_over,total_under,game_id
2,2024-11-03,Sunday,1:00 PM,200,Denver Broncos,Baltimore Ravens,15.0,330,-440,+9.0 (-110),-9.0 (-110),O 46.0 (-110),U 46.0 (-110),dee922dd9d37ca61e2624d39fad58020
14,2024-11-04,Monday,8:15 PM,175,Tampa Bay Buccaneers,Kansas City Chiefs,14.0,320,-430,+8.5 (-110),-8.5 (-110),O 45.5 (-110),U 45.5 (-110),4768d238132aa5ce57a99eb5980feae9
10,2024-11-03,Sunday,4:05 PM,195,Jacksonville Jaguars,Philadelphia Eagles,13.0,285,-370,+7.5 (-110),-7.5 (-110),O 45.5 (-110),U 45.5 (-110),b8eed70630acf144be31d461e052311e
7,2024-11-03,Sunday,1:00 PM,177,New Orleans Saints,Carolina Panthers,12.0,-360,280,-7.5 (-110),+7.5 (-110),O 43.0 (-110),U 43.0 (-110),1d638b0b7f931903cf9f2b2bda275ec4
3,2024-11-03,Sunday,1:00 PM,202,Las Vegas Raiders,Cincinnati Bengals,11.0,270,-340,+7.0 (-110),-7.0 (-110),O 46.5 (-110),U 46.5 (-110),0b2256621b3719813da269c7b14a0e9a
5,2024-11-03,Sunday,1:00 PM,216,Miami Dolphins,Buffalo Bills,10.0,220,-270,+6.0 (-110),-6.0 (-110),O 49.0 (-115),U 49.0 (-105),0244b831e50a0bc34622ff46ab3350f0
13,2024-11-03,Sunday,8:20 PM,195,Indianapolis Colts,Minnesota Vikings,9.0,210,-250,+5.5 (-105),-5.5 (-115),O 47.0 (-105),U 47.0 (-115),daed56d5e0298a70ef18179c12cafa9c
8,2024-11-03,Sunday,1:00 PM,209,Washington Commanders,New York Giants,8.0,-200,170,-4.0 (-110),+4.0 (-110),O 44.5 (-110),U 44.5 (-110),a16b463fa2d8f654803ed6d20787ea86
11,2024-11-03,Sunday,4:25 PM,188,Detroit Lions,Green Bay Packers,7.0,-185,160,-3.5 (-105),+3.5 (-115),O 48.0 (-110),U 48.0 (-110),de64607af58b848a21c0a297520e1972
6,2024-11-03,Sunday,1:00 PM,174,New England Patriots,Tennessee Titans,6.0,160,-185,+3.5 (-115),-3.5 (-105),O 37.5 (-115),U 37.5 (-105),e9bf462e398b6aa0c5de2d38c7044217


In [56]:
matchup_df = pd.DataFrame(generate_matchups(bovada_df)).sort_values("ranking", ascending=False)

In [57]:
matchup_df

Unnamed: 0,game_id,matchup,time,projected_winner,ranking
1,DENBAL,Denver Broncos vs Baltimore Ravens,00:00 AM,Baltimore Ravens,15.0
14,TBKC,Tampa Bay Buccaneers vs Kansas City Chiefs,00:00 AM,Kansas City Chiefs,14.0
2,JAXPHI,Jacksonville Jaguars vs Philadelphia Eagles,00:00 AM,Philadelphia Eagles,13.0
3,NOCAR,New Orleans Saints vs Carolina Panthers,00:00 AM,New Orleans Saints,12.0
4,LVCIN,Las Vegas Raiders vs Cincinnati Bengals,00:00 AM,Cincinnati Bengals,11.0
5,MIABUF,Miami Dolphins vs Buffalo Bills,00:00 AM,Buffalo Bills,10.0
6,INDMIN,Indianapolis Colts vs Minnesota Vikings,00:00 AM,Minnesota Vikings,9.0
7,WSHNYG,Washington Commanders vs New York Giants,00:00 AM,Washington Commanders,8.0
8,DETGB,Detroit Lions vs Green Bay Packers,00:00 AM,Detroit Lions,7.0
9,NETEN,New England Patriots vs Tennessee Titans,00:00 AM,Tennessee Titans,6.0


In [48]:
expert_df = get_espn_expert_data()

In [49]:
expert_df.sort_values("pct", ascending=False)


Unnamed: 0,game_id,week,Game,Time,pct,message
2,MIABUF,Week 9,MIA at BUF,Sun 1:00PM,88.0,88% of experts chose Bills
3,LVCIN,Week 9,LV at CIN,Sun 1:00PM,88.0,88% of experts chose Bengals
6,WSHNYG,Week 9,WSH at NYG,Sun 1:00PM,88.0,88% of experts chose Commanders
7,NOCAR,Week 9,NO at CAR,Sun 1:00PM,88.0,88% of experts chose Saints
8,DENBAL,Week 9,DEN at BAL,Sun 1:00PM,88.0,88% of experts chose Ravens
9,JAXPHI,Week 9,JAX at PHI,Sun 4:05PM,88.0,88% of experts chose Eagles
1,DALATL,Week 9,DAL at ATL,Sun 1:00PM,77.0,77% of experts chose Falcons
10,CHIARI,Week 9,CHI at ARI,Sun 4:05PM,77.0,77% of experts chose Cardinals
13,INDMIN,Week 9,IND at MIN,Sun 8:20PM,77.0,77% of experts chose Vikings
14,TBKC,Week 9,TB at KC,Mon 8:15PM,77.0,77% of experts chose Chiefs


In [61]:
merged_df = pd.merge(matchup_df, expert_df, on="game_id")
merged_df.drop(columns=["game_id", "time", "pct", "matchup"], inplace=True)
merged_df["IngestTime"] = datetime.now().strftime("%m/%d %H:%M")
merged_df = merged_df[["IngestTime", "week", "Game", "Time", "projected_winner", "ranking", "message"]]


In [63]:
merged_df["ranking"] = merged_df["ranking"]+1

In [64]:
merged_df

Unnamed: 0,IngestTime,week,Game,Time,projected_winner,ranking,message
0,10/30 22:17,Week 9,DEN at BAL,Sun 1:00PM,Baltimore Ravens,16.0,88% of experts chose Ravens
1,10/30 22:17,Week 9,TB at KC,Mon 8:15PM,Kansas City Chiefs,15.0,77% of experts chose Chiefs
2,10/30 22:17,Week 9,JAX at PHI,Sun 4:05PM,Philadelphia Eagles,14.0,88% of experts chose Eagles
3,10/30 22:17,Week 9,NO at CAR,Sun 1:00PM,New Orleans Saints,13.0,88% of experts chose Saints
4,10/30 22:17,Week 9,LV at CIN,Sun 1:00PM,Cincinnati Bengals,12.0,88% of experts chose Bengals
5,10/30 22:17,Week 9,MIA at BUF,Sun 1:00PM,Buffalo Bills,11.0,88% of experts chose Bills
6,10/30 22:17,Week 9,IND at MIN,Sun 8:20PM,Minnesota Vikings,10.0,77% of experts chose Vikings
7,10/30 22:17,Week 9,WSH at NYG,Sun 1:00PM,Washington Commanders,9.0,88% of experts chose Commanders
8,10/30 22:17,Week 9,DET at GB,Sun 4:25PM,Detroit Lions,8.0,66% of experts chose Lions
9,10/30 22:17,Week 9,NE at TEN,Sun 1:00PM,Tennessee Titans,7.0,55% of experts chose Patriots


: 