In [1]:
import requests
import json
import time
import os
from dotenv import load_dotenv
import pandas as pd



In [2]:
#Load API key
load_dotenv()
API_KEY = os.getenv("SPORTRADAR_API_KEY")

In [3]:
#Base URL for Sportradar API (adjust for trial/production)
BASE_URL = "https://api.sportradar.com/soccer/trial/v4/en"

In [4]:
#Premier League Competition ID
PREMIER_LEAGUE_ID = "sr:competition:17"

In [5]:
#Data directory
DATA_DIR = "data"
RAW_DATA_DIR = os.path.join("..", DATA_DIR, "raw")

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
if not os.path.exists(RAW_DATA_DIR):
    os.makedirs(RAW_DATA_DIR)

In [6]:
#Function to fetch the latest epl season
def get_latest_season():
    url = f"{BASE_URL}/competitions/{PREMIER_LEAGUE_ID}/seasons.json?api_key={API_KEY}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        latest_season = data['seasons'][-1]  # Get most recent season
        print(f"✅ Latest Season: {latest_season['name']} (ID: {latest_season['id']})")
        return latest_season['id']
    else:
        print(f"❌ Failed to fetch season: {response.status_code}")
        return None

In [7]:
#Function to fetch Arsenal's team ID
def get_teams(season_id):
    time.sleep(10)
    url = f"{BASE_URL}/seasons/{season_id}/competitors.json?api_key={API_KEY}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        print("Raw JSON Response:")
        print(json.dumps(data, indent=4))  # Print nicely formatted JSON
        teams = data.get("season_competitors")  # Use the correct key
        if teams:
            team_data = []
            for team in teams:
                print(f"✅ Team: {team['name']} (ID: {team['id']})")
                team_data.append(team)
                if "Arsenal" in team["name"]:
                    arsenal_id = team['id']
            df = pd.DataFrame(team_data)
            df.to_csv(os.path.join(RAW_DATA_DIR, "teams.csv"), index=False)  # Save to CSV in data dir
            return arsenal_id
        else:
            print("❌ Key 'season_competitors' not found in the response.")
            return None
    else:
        print(f"❌ Failed to fetch teams: {response.status_code}")
        return None

In [8]:
#Function to fetch player statistics for Arsenal
def get_arsenal_players(team_id):
    time.sleep(10)
    url = f"{BASE_URL}/competitors/{team_id}/profile.json?api_key={API_KEY}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        players = data["players"]

        print(f"✅ Arsenal Squad ({len(players)} players):")
        player_data = []
        for player in players:
            print(f"  - {player['name']} (ID: {player['id']})")
            player_data.append(player)
        df = pd.DataFrame(player_data)
        df.to_csv(os.path.join(RAW_DATA_DIR, "arsenal_players.csv"), index=False)  # Save to CSV
        return players
    else:
        print(f"❌ Failed to fetch Arsenal squad: {response.status_code}")
        return None

In [9]:
#Function to fetch match schedule for Arsenal
def get_arsenal_schedule(team_id):
    time.sleep(10)
    url = f"{BASE_URL}/competitors/{team_id}/schedules.json?api_key={API_KEY}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        print("\nRaw JSON Response (Arsenal Schedule):")
        print(json.dumps(data, indent=4))  # Print nicely formatted JSON
        schedules = data.get("schedules")  # Get the 'schedules' list
        if schedules:
            matches = [schedule["sport_event"] for schedule in schedules]  # Extract 'sport_event' from each schedule
            print(f"✅ Arsenal Matches ({len(matches)} games):")
            match_data = []
            for match in matches[:5]:  # Show first 5 matches
                print(
                    f"  - {match['start_time']} | {match['competitors'][0]['name']} vs {match['competitors'][1]['name']}")
                match_data.append(match)
            df = pd.DataFrame(matches)
            df.to_csv(os.path.join(RAW_DATA_DIR, "arsenal_schedule.csv"), index=False)  # Save to CSV
            return matches
        else:
            print("❌ Key 'schedules' not found in the response.")
            return None
    else:
        print(f"❌ Failed to fetch Arsenal schedule: {response.status_code}")
        return None

In [10]:
#Function to fetch Arsenal palayer statistics
def get_arsenal_stats(season_id, team_id):
    time.sleep(10)
    url = f"{BASE_URL}/seasons/{season_id}/competitors/{team_id}/statistics.json?api_key={API_KEY}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        print(f"✅ Arsenal Stats Retrieved Successfully")
        df = pd.DataFrame(data)
        df.to_csv(os.path.join(RAW_DATA_DIR, "arsenal_stats.csv"), index=False)  # Save to CSV
        return data
    else:
        print(f"❌ Failed to fetch Arsenal statistics: {response.status_code}")
        return None

In [11]:
#Main function for data collection
def main():
    print("🔄 Fetching latest Premier League season...")
    season_id = get_latest_season()
    if not season_id:
        return

    print("\n🔄 Fetching Premier League teams...")
    arsenal_id = get_teams(season_id)
    if not arsenal_id:
        return

    print("\n🔄 Fetching Arsenal squad...")
    get_arsenal_players(arsenal_id)

    print("\n🔄 Fetching Arsenal schedule...")
    get_arsenal_schedule(arsenal_id)

    print("\n🔄 Fetching Arsenal statistics...")
    get_arsenal_stats(season_id, arsenal_id)

if __name__ == "__main__":
    main()

🔄 Fetching latest Premier League season...
✅ Latest Season: Premier League 24/25 (ID: sr:season:118689)

🔄 Fetching Premier League teams...
Raw JSON Response:
{
    "generated_at": "2025-03-27T17:54:12+00:00",
    "season_competitors": [
        {
            "id": "sr:competitor:3",
            "name": "Wolverhampton Wanderers",
            "short_name": "Wolverhampton",
            "abbreviation": "WOL",
            "gender": "male"
        },
        {
            "id": "sr:competitor:7",
            "name": "Crystal Palace",
            "short_name": "Crystal Palace",
            "abbreviation": "CRY",
            "gender": "male"
        },
        {
            "id": "sr:competitor:14",
            "name": "Nottingham Forest",
            "short_name": "Nottingham",
            "abbreviation": "NFO",
            "gender": "male"
        },
        {
            "id": "sr:competitor:17",
            "name": "Manchester City",
            "short_name": "Man City",
            "abbr