### Scraping/Downloading data from Various Sources

#### Lost Data Sources:
- ***Fbref has now changed, and scraping data from them is no longer possible***
- Tried multiple different sources, and corroborated with sources online
    - https://github.com/probberechts/soccerdata/issues/916

In [22]:
import soccerdata as sd

sd.__version__

'1.8.8'

In [None]:
import soccerdata as sd
import pandas as pd
from pathlib import Path

OUT = Path("data/raw_csv")
OUT.mkdir(parents=True, exist_ok=True)

league = "ENG-Premier League"
season = 2024

# FotMob
fm = sd.FotMob(leagues=league, seasons=season)
fm_sched = fm.read_schedule()
print("FotMob schedule:", fm_sched.shape)
display(fm_sched.head())
fm_sched.to_csv(OUT / f"fotmob__{league}__{season}__schedule.csv", index=False)

# Understat
us = sd.Understat(leagues=league, seasons=season)
us_player = us.read_player_season_stats()
print("Understat player season:", us_player.shape)
display(us_player.head())
us_player.to_csv(OUT / f"understat__{league}__{season}__player_season.csv", index=False)

us_team_match = us.read_team_match_stats()
print("Understat team match:", us_team_match.shape)
display(us_team_match.head())
us_team_match.to_csv(OUT / f"understat__{league}__{season}__team_match.csv", index=False)

In [27]:
[m for m in dir(fm) if m.startswith("read_")]

['read_league_table',
 'read_leagues',
 'read_schedule',
 'read_seasons',
 'read_team_match_stats']

In [None]:
fm_match_stats = fm.read_team_match_stats()
print("FotMob match_stats:", fm_match_stats.shape)
display(fm_match_stats.head())
fm_match_stats.to_csv(OUT / f"fotmob__{league}__{season}__match_stats.csv", index=False)

In [1]:
import requests
import json
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access the API key using os.environ or os.getenv
api_key = os.getenv("FBO_APIKEY")

uri = 'https://api.football-data.org/v4/matches'
headers = { 'X-Auth-Token': api_key }

response = requests.get(uri, headers=headers)
for match in response.json()['matches']:
  print(match)

{'area': {'id': 2077, 'name': 'Europe', 'code': 'EUR', 'flag': 'https://crests.football-data.org/EUR.svg'}, 'competition': {'id': 2001, 'name': 'UEFA Champions League', 'code': 'CL', 'type': 'CUP', 'emblem': 'https://crests.football-data.org/CL.png'}, 'season': {'id': 2454, 'startDate': '2025-09-16', 'endDate': '2026-05-30', 'currentMatchday': 7, 'winner': None}, 'id': 552001, 'utcDate': '2026-01-21T17:45:00Z', 'status': 'TIMED', 'matchday': 7, 'stage': 'LEAGUE_STAGE', 'group': None, 'lastUpdated': '2026-01-21T01:32:00Z', 'homeTeam': {'id': 611, 'name': 'Qarabağ Ağdam FK', 'shortName': 'Qarabağ Ağdam', 'tla': 'QAR', 'crest': 'https://crests.football-data.org/611.png'}, 'awayTeam': {'id': 19, 'name': 'Eintracht Frankfurt', 'shortName': 'Frankfurt', 'tla': 'SGE', 'crest': 'https://crests.football-data.org/19.png'}, 'score': {'winner': None, 'duration': 'REGULAR', 'fullTime': {'home': None, 'away': None}, 'halfTime': {'home': None, 'away': None}}, 'odds': {'msg': 'Activate Odds-Package in