In [95]:
import pandas as pd
import requests
from zipfile import ZipFile
import json
from dotenv import load_dotenv
import os

load_dotenv()

ticketmaster_api_key = os.environ["TICKETMASTER_API_KEY"]
src_path = os.environ["src_path"]

In [50]:
def download_and_extract_matches_data():
    response_API = requests.get("https://www.thesportsdb.com/api/v1/json/3/searchteams.php?t=Arsenal") #https://api-football-standings.azharimm.site/leagues
    print(response_API.status_code)

    data = response_API.text
    #json.loads(data)
    print(data)

In [51]:
download_and_extract_matches_data()

200
{"teams":[{"idTeam":"133604","idESPN":"359","idAPIfootball":"42","intLoved":"8","strTeam":"Arsenal","strTeamAlternate":"Arsenal Football Club, AFC, Arsenal FC","strTeamShort":"ARS","intFormedYear":"1892","strSport":"Soccer","strLeague":"English Premier League","idLeague":"4328","strLeague2":"FA Cup","idLeague2":"4482","strLeague3":"EFL Cup","idLeague3":"4570","strLeague4":"UEFA Champions League","idLeague4":"4480","strLeague5":"","idLeague5":null,"strLeague6":"","idLeague6":null,"strLeague7":"","idLeague7":null,"strDivision":null,"idVenue":"15528","strStadium":"Emirates Stadium","strKeywords":"Gunners, Gooners","strRSS":"https:\/\/www.allarsenal.com\/feed\/","strLocation":"Holloway, London, England","intStadiumCapacity":"60338","strWebsite":"www.arsenal.com","strFacebook":"www.facebook.com\/Arsenal","strTwitter":"twitter.com\/arsenal","strInstagram":"instagram.com\/arsenal","strDescriptionEN":"Arsenal Football Club is a professional football club based in Islington, London, England

In [96]:
import sys
sys.path.append(src_path)

In [97]:
import openmeteo_requests
import requests_cache
from retry_requests import retry
from pathlib import Path
import json
import pandas as pd
"""
import sys
from pathlib import Path

# Add the src directory to the Python path
sys.path.append(str(Path.cwd().parent / 'src'))"""

from src.paths import RAW_DATA_weather_DIR

def download_and_extract_weather_data(start_date, end_date):
    """Downloads hourly weather data from the Open-Meteo API and saves it as a CSV

    Args:
        start_date (str): Start date in "YYYY-MM-DD" format.
        end_date (str): End date in "YYYY-MM-DD" format. 

    Returns:
        pd.DateFrame: A DataFrame containing hourly weather data with temperature values.
    """

    # Setup the Open-Meteo API client
    cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    params = {
        "latitude": 52.52,
        "longitude": 13.41,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["temperature_2m"],
        "timeformat": "unixtime",
        "timezone": "Europe/London" 
    }

    try:
        # Feth data from the Open-Meteo API
        responses = openmeteo.weather_api(url, params=params)[0] # assuming single location

        # Process metadata
        print(f"Coordinates: {responses.Latitude()}°N {responses.Longitude()}°E")
        print(f"Elevation {responses.Elevation()} m asl")
        print(f"Timezone {responses.Timezone()} {responses.TimezoneAbbreviation()}")
        print(f"Timezone difference to GMT+0 {responses.UtcOffsetSeconds()} seconds")
        
        # Extract hourly data
        hourly = responses.hourly()
        hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

        hourly_data = {"date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc="True"),
            end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc="True"),
            freq = pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )}

        hourly_data["temperature_2m"] = hourly_temperature_2m

        # Convert to DataFrame and process timestamps
        hourly_dataframe  = pd.DataFrame(data=hourly_data)

        hourly_dataframe["date"] = pd.to_datetime(hourly_dataframe["date"]).dt.floor("h").dt.tz_localize(None)

        # Save to file
        file_path = RAW_DATA_weather_DIR/f"weather_data_{start_date}_to{end_date}.csv"
        hourly_dataframe.to_csv(file_path, index=False)
        print(f"Weather data to saved to {file_path}")

        return hourly_dataframe
    except Exception as e:
        print(f"Error downloading weather data : {e}")
        return pd.DataFrame()



In [98]:
weather_data = download_and_extract_weather_data('2024-01-01', "2024-01-07")
weather_data

Coordinates: 52.52000045776367°N 13.419998168945312°E
Elevation 38.0 m asl
Timezone b'Europe/London' None
Timezone difference to GMT+0 0 seconds
Error downloading weather data : 'WeatherApiResponse' object has no attribute 'hourly'


In [91]:
from src.paths import RAW_DATA_ticket_DIR
import requests

def download_and_extract_ticket_data(year:int, month:int, day:int):
    url = f"https://app.ticketmaster.com/discovery/v2/events.json?apikey={ticketmaster_api_key}"

    # Make GET Request
    try:
        response = requests.get(url)
        response.raise_for_status() # Raise HTTP Error for bad responses

        # Parse JSON Response
        data = response.json()

        # Save JSON File response to a file
        file_path = RAW_DATA_ticket_DIR / f"hourly_demand_{year}-{month:02d}-{day:02d}.json"
        with open(file_path, "w") as f:
            json.dump(data, f, indent=4)

        print(f"Data successfully fetched and saved to {file_path}")

        # Convert the response data to a DataFrame
        if 'response' in data and 'data' in data['response']:
            return pd.DataFrame(data['response']['data'])
        elif 'data' in data:
            return pd.DataFrame(data['data'])
        else:
            print("Unexpected data structure in API response.")
            return pd.DataFrame()
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return pd.DataFrame()


    

In [92]:
download_and_extract_ticket_data(2022, 12, 3)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/sanjaydevarajan/Desktop/ML Projects/PremierLeague_TicketDemand_Predictor/data/raw/ticket_raw_data/hourly_demand_2022-12-03.json'

In [90]:
requests_api = requests.get(f"https://app.ticketmaster.com/discovery/v2/venues.json?keyword=Old+Trafford&apikey={ticketmaster_api_key}")
requests_api.status_code
data = requests_api.text

json.loads(data)

{'_embedded': {'venues': [{'name': 'Old Trafford',
    'type': 'venue',
    'id': 'ZfqgVMyxjZBYPywzgVMyZa',
    'test': False,
    'url': 'https://www.eticketing.co.uk/tmsport',
    'locale': 'en-us',
    'timezone': 'Europe/London',
    'country': {'name': 'United Kingdom', 'countryCode': 'GB'},
    'upcomingEvents': {'_total': 0, '_filtered': 0},
    '_links': {'self': {'href': '/discovery/v2/venues/ZfqgVMyxjZBYPywzgVMyZa?locale=en-us'}}},
   {'name': 'Old Trafford Football Stadium',
    'type': 'venue',
    'id': 'Z598xZL4Z1FeF',
    'test': False,
    'url': 'https://www.ticketweb.co.uk/venue/old-trafford-football-stadium-manchester-tickets/oldtrafpih/901',
    'locale': 'en-us',
    'postalCode': 'M160RA',
    'timezone': 'Europe/London',
    'city': {'name': 'Manchester'},
    'country': {'name': 'United Kingdom', 'countryCode': 'GB'},
    'address': {'line1': 'Sir Matt Busby Way, Old Trafford'},
    'location': {'longitude': '-2.28834', 'latitude': '53.46179'},
    'upcomingEven

In [93]:
def merge_data(df1, df2):
    new_df = pd.merge(df1, df2, on="date", how="inner")
    return new_df



In [None]:
# Here merge all the dataframes, after extracting the data functions

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
