In [1]:
from endpoints import SofaScore
from datetime import datetime, timedelta
from helpers import send_request, generate_date_list, generate_live_games, fetch_event_data
import pandas as pd
# import matplotlib.pyplot as plt


# Establish todays date
start_date = datetime.today().strftime('%Y-%m-%d')

# Establish an end date
end_date = '2023-01-01'

# Generate a list of dates
dates = generate_date_list(start_date,end_date)

# Create a list of endpoints from the list of dates and the Sofa base_endpoint
dated_endpoints = [SofaScore.BASE_ENDPOINT + date for date in dates]


In [2]:
from helpers import send_request, generate_date_list, generate_live_games, fetch_event_data

# store list of responses from the urls
responses = [response for response in await fetch_event_data(dated_endpoints) if response['events']]

# verify that the responses exist
print(responses[0])

{'events': [{'tournament': {'name': 'Premier League', 'slug': 'premier-league', 'category': {'name': 'England', 'slug': 'england', 'sport': {'name': 'Football', 'slug': 'football', 'id': 1}, 'id': 1, 'flag': 'england', 'alpha2': 'EN'}, 'uniqueTournament': {'name': 'Premier League', 'slug': 'premier-league', 'category': {'name': 'England', 'slug': 'england', 'sport': {'name': 'Football', 'slug': 'football', 'id': 1}, 'id': 1, 'flag': 'england', 'alpha2': 'EN'}, 'userCount': 0, 'id': 17, 'hasEventPlayerStatistics': True, 'crowdsourcingEnabled': False, 'hasPerformanceGraphFeature': False, 'displayInverseHomeAwayTeams': False}, 'priority': 599, 'id': 1}, 'season': {'name': 'Premier League 23/24', 'year': '23/24', 'editor': False, 'id': 52186}, 'roundInfo': {'round': 18}, 'customId': 'hsF', 'status': {'code': 100, 'description': 'Ended', 'type': 'finished'}, 'winnerCode': 3, 'homeTeam': {'name': 'Crystal Palace', 'slug': 'crystal-palace', 'shortName': 'Crystal Palace', 'sport': {'name': 'Fo

In [3]:
def extract_event_data(response_array):

    # Initialize an empty list to store the event data
    event_data = []

    for response in response_array:
        # Filter out events that are not postponed

        events = [event for event in response['events'] if event['status']['type'] not in ['postponed', 'canceled']]
        
        # Loop through each event
        for event in events:   
                    
            # sometime score dicts have multiple scores, so just take max value; they should be same. 
            score_key = max(event['homeScore'], key = event['homeScore'].get) if event['homeScore'] else None
            
            # if no score obj, then skip
            if score_key:
                # Create a dictionary to store the relevant data for each event
                struct = {
                    'tournament' : event['tournament']['name'],  # Name of the tournament
                    'region' : event['tournament']['category']['name'],  # Region of the tournament
                    'home_team' : event['homeTeam']['name'],  # Name of the home team
                    'away_team' : event['awayTeam']['name'],  # Name of the away team
                    'home_score': event['homeScore'][score_key],  # Current score of the home team
                    'away_score': event['awayScore'][score_key],  # Current score of the away team
                    'total'     : event['homeScore'][score_key] + event['awayScore'][score_key],  # Total score
                    'start_timeestamp' : event['startTimestamp'],  # Start timestamp of the event
                    # 'start_datetime' : convert_timestamp_to_est_date(event['startTimestamp'])  # Start datetime of the event in EST
                }
                
                # Append the dictionary to the list of event data
                event_data.append(struct)
    
    # Return the list of event data
    return event_data

In [4]:
event_data = extract_event_data(responses)

In [5]:
def calculate_winrate(event_data, threshold, **kwargs):
    """
    Credit to: https://github.com/rushcapital
    This function calculates the winrate of the events based on a given threshold.
    Parameters:
    event_data (list): A list of dictionaries containing event data.
    threshold (float): The threshold value to calculate the winrate.
    **kwargs: Additional filters to apply on the data.

    Returns:
    float: The calculated winrate.
    """

    # Convert the event data into a pandas DataFrame
    df = pd.DataFrame(event_data).drop_duplicates().reset_index(drop=True)
    
    # If there are additional filters, create a query string
    query = ' & '.join([f"{k} == '{v}'" for k, v in kwargs.items()]) if kwargs else None
    
    # Create the under and over queries based on the threshold and additional filters
    # The under query selects events where the total score is less than the threshold
    under_query = f"{query} & total < {threshold}" if query else f'total < {threshold}'
    
    # The over query selects events where the total score is more than the threshold
    over_query = f"{query} & total > {threshold}" if query else f'total > {threshold}'
    
    # Apply the under and over queries to the DataFrame
    under = df.query(under_query)
    over = df.query(over_query)
    
    # Calculate the winrate as the ratio of under events to the total number of events
    # If there are no events, the winrate is 0
    winrate = round(under.shape[0] / (under.shape[0] + over.shape[0]),3) if (under.shape[0] + over.shape[0]) != 0 else 0
    
    struct = {
        'total_games' : under.shape[0] + over.shape[0],
        'under_wins' : under.shape[0],
        'under_winrate' : winrate
    }

    return struct


In [6]:
# winrate_struct = calculate_winrate(event_data, threshold=3.5)
winrate_england_struct = calculate_winrate(event_data, threshold=3.5, region='England')
print(winrate_england_struct)


{'total_games': 3057, 'under_wins': 2087, 'under_winrate': 0.683}


In [8]:
def extract_regions(response_array):

        regions = []

        for response in response_array:
        # Filter out events that are not postponed

                events = [event for event in response['events'] if event['status']['type'] not in ['postponed', 'canceled']]
        
        # Loop through each event
                for event in events:
                        region = event['tournament']['category']['name']
                        if region in regions:
                                continue
                        else:
                                regions.append(region)
        
        return regions


list_of_regions = extract_regions(event_data)

for x in regions:
        print(x)

KeyError: 'events'