### league list

In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

url = "https://www.fotmob.com"

# Setup browser
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Remove this line if you want to see the browser
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

print("‚è≥ Loading page...")
driver.get(url)
time.sleep(5)

print("‚úÖ Page loaded.")
html = driver.page_source
print("HTML length:", len(html))

driver.quit()

from bs4 import BeautifulSoup

soup = BeautifulSoup(html, 'html.parser')

main_content = soup.find('div', id='__next')
if main_content:
    print("‚úÖ Found main-content")
else:
    print("‚ùå Couldn't find main-content")


‚è≥ Loading page...
‚úÖ Page loaded.
HTML length: 692936
‚úÖ Found main-content


In [27]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time



In [28]:

def wait_and_click_leagues_button(driver):
    """Helper function to handle clicking the All leagues button"""
    try:
        # Wait for any loading overlays to disappear
        WebDriverWait(driver, 10).until_not(
            EC.presence_of_element_located((By.CLASS_NAME, "css-kabrls-SearchBoxCSS"))
        )
        
        # Find the button using different methods
        button_xpath = "//button[.//h2[contains(text(), 'All leagues')]]"
        button = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, button_xpath))
        )
        
        # Scroll to button
        driver.execute_script("arguments[0].scrollIntoView(true);", button)
        time.sleep(1)
        
        # Try multiple click methods
        try:
            # Try regular click
            button.click()
        except:
            try:
                # Try JavaScript click
                driver.execute_script("arguments[0].click();", button)
            except:
                # Try moving to element first
                from selenium.webdriver.common.action_chains import ActionChains
                actions = ActionChains(driver)
                actions.move_to_element(button).click().perform()
                
        time.sleep(2)  # Wait for animation
        return True
    except Exception as e:
        print(f"‚ùå Click failed: {str(e)}")
        return False

# Main scraping code
url = "https://www.fotmob.com"

options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
# Add these options to handle overlays better
options.add_argument("--disable-notifications")
options.add_argument("--disable-popup-blocking")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

try:
    print("‚è≥ Loading page...")
    driver.get(url)
    
    # Wait for main content
    main_content = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "__next"))
    )
    print("‚úÖ Main content found")
    
    # Try to click the button
    if wait_and_click_leagues_button(driver):
        print("‚úÖ All leagues expanded")
        time.sleep(2)  # Wait for content to load
    else:
        print("‚ùå Could not expand leagues")
    
    html = driver.page_source
    
except Exception as e:
    print(f"‚ùå Error: {str(e)}")
    html = driver.page_source
finally:
    driver.quit()

# Parse the expanded HTML
soup = BeautifulSoup(html, 'html.parser')

# Verify content
main_content = soup.find('div', id='__next')
if main_content:
    print("‚úÖ Found main-content in parsed HTML")
    # Verify leagues are expanded
    leagues = main_content.find_all('div', class_='css-146405d-LeagueListContainerCSS')
    print(f"Found {len(leagues)} league containers")
else:
    print("‚ùå Couldn't find main-content in parsed HTML")

‚è≥ Loading page...
‚úÖ Main content found
‚ùå Click failed: Message: 

‚ùå Could not expand leagues
‚úÖ Found main-content in parsed HTML
Found 2 league containers


In [32]:

def scrape_all_leagues(soup):
    """
    Scrapes all leagues for each country from Fotmob
    """
    all_data = {
        "countries": []
    }
    
    # Find all country buttons/containers
    country_buttons = soup.find_all('button', class_='css-15d5019-LeagueListHeaderButton')
    
    for button in country_buttons:
        country_info = {
            'name': None,
            'code': None,
            'flag': None,
            'leagues': []
        }
        
        # Get country details
        button_text = button.find('div', class_='css-1ib7wom-ButtonText')
        if button_text:
            img = button_text.find('img')
            if img:
                country_info['name'] = button_text.find('span').text.strip()
                country_info['flag'] = img.get('src')
                country_info['code'] = img.get('src').split('/')[-1].split('.')[0]
        
        # Find the associated leagues container
        # It should be in a sibling div after the button
        leagues_container = button.find_next_sibling('div', class_='MuiCollapse-root')
        if leagues_container:
            league_links = leagues_container.find_all('a', class_='css-bka657-LeagueEntryCSS')
            for league in league_links:
                league_info = {
                    'name': league.find('span').text.strip() if league.find('span') else None,
                    'url': league.get('href'),
                    'logo': league.find('img').get('src') if league.find('img') else None
                }
                country_info['leagues'].append(league_info)
        
        # Only add countries that were successfully parsed
        if country_info['name']:
            all_data["countries"].append(country_info)
    
    return all_data

# Usage
leagues_data = scrape_all_leagues(soup)

# Print summary
print(f"‚úÖ Found {len(leagues_data['countries'])} countries")
total_leagues = sum(len(country['leagues']) for country in leagues_data['countries'])
print(f"‚úÖ Found {total_leagues} total leagues")

# Sample output of first country with its leagues
print("\nSample country data:")
if leagues_data['countries']:
    sample = leagues_data['countries'][0]
    print(json.dumps({
        'country': sample['name'],
        'leagues_count': len(sample['leagues']),
        'leagues': sample['leagues'][:10]  # Show first 2 leagues only
    }, indent=2))

# # Save to JSON
# with open("all_leagues_by_country.json", "w", encoding="utf-8") as f:
#     json.dump(leagues_data, f, indent=2, ensure_ascii=False)

‚úÖ Found 0 countries
‚úÖ Found 0 total leagues

Sample country data:


In [39]:
def scrape_leagues_from_file(html_content):
    """
    Scrapes all leagues from the HTML file content
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    
    all_data = {
        "countries": []
    }
    
    # Find all country sections
    country_buttons = soup.find_all('button', class_='css-15d5019-LeagueListHeaderButton')
    print(f"Found {len(country_buttons)} country buttons")
    
    for button in country_buttons:
        country_info = {
            'name': None,
            'code': None,
            'flag': None,
            'leagues': []
        }
        
        # Get country info from button
        button_text = button.find('div', class_='css-1ib7wom-ButtonText')
        if button_text:
            img = button_text.find('img')
            span = button_text.find('span')
            if img and span:
                country_info['name'] = span.text.strip()
                country_info['flag'] = img.get('src')
                country_info['code'] = img.get('src').split('/')[-1].split('.')[0]
        
        # Find associated leagues section
        # The leagues should be in a div after the button
        leagues_container = button.find_next_sibling('div')
        if leagues_container:
            league_links = leagues_container.find_all('a', class_='css-1m05q61-LeagueListItem')
            for link in league_links:
                name_span = link.select_one("span.css-1wv7cir-LeagueListItemText")
                league_info = {
                    'name': name_span.text.strip() if name_span else None,
                    'url': f"https://www.fotmob.com{link.get('href')}" if link.get('href', '').startswith('/') else link.get('href')
                }
                country_info['leagues'].append(league_info)
        
        # Only add countries that were successfully parsed and have leagues
        if country_info['name']:
            all_data["countries"].append(country_info)
    
    return all_data

# Read the HTML file
with open('allLeagues.txt', 'r', encoding='utf-8') as f:
    html_content = f.read()

# Parse leagues
leagues_data = scrape_leagues_from_file(html_content)

# Print summary
print(f"\n‚úÖ Found {len(leagues_data['countries'])} countries")
total_leagues = sum(len(country['leagues']) for country in leagues_data['countries'])
print(f"‚úÖ Found {total_leagues} total leagues")

# Sample output of first country
print("\nSample country data:")
if leagues_data['countries']:
    sample = leagues_data['countries'][0]
    print(json.dumps({
        'country': sample['name'],
        'code': sample['code'],
        'leagues_count': len(sample['leagues']),
        'leagues': sample['leagues'][:2]  # Show first 2 leagues only
    }, indent=2))

# # Save to JSON
# with open("all_leagues.json", "w", encoding="utf-8") as f:
#     json.dump(leagues_data, f, indent=2, ensure_ascii=False)

# print("\n‚úÖ Data saved to all_leagues.json")

Found 94 country buttons

‚úÖ Found 94 countries
‚úÖ Found 0 total leagues

Sample country data:
{
  "country": "International",
  "code": "int",
  "leagues_count": 0,
  "leagues": []
}


In [37]:
def parse_leagues_from_txt():
    """
    Parse international leagues from the text file and add to existing JSON
    """
    with open('leaguelist.txt', 'r', encoding='utf-8') as f:
        html_content = f.read()
    
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Load existing JSON
    with open('all_leagues.json', 'r', encoding='utf-8') as f:
        all_data = json.load(f)
    
    # Find International section in existing data
    international = next(
        (country for country in all_data['countries'] if country['name'] == 'International'),
        None
    )
    
    if not international:
        return
    
    # Parse leagues from the text content
    league_links = soup.find_all('a', class_='css-yx1h73-CountryLeagueEntryCSS')
    
    leagues = []
    for link in league_links:
        img = link.find('img')
        span = link.find('span')
        
        if span and img:
            league_info = {
                'name': span.text.strip(),
                'url': f"https://www.fotmob.com{link.get('href')}" if link.get('href', '').startswith('/') else link.get('href'),
                'logo': img.get('src'),
                'id': img.get('src', '').split('/')[-1].split('.')[0] if img.get('src') else None
            }
            leagues.append(league_info)
    
    # Update International leagues
    international['leagues'] = leagues
    
    # # Save updated JSON
    # with open('all_leagues.json', 'w', encoding='utf-8') as f:
    #     json.dump(all_data, f, indent=2, ensure_ascii=False)
    
    print(f"‚úÖ Added {len(leagues)} international leagues")
    print("\nSample leagues:")
    print(json.dumps(leagues[:3], indent=2))

# Run the update
parse_leagues_from_txt()

‚úÖ Added 98 international leagues

Sample leagues:
[
  {
    "name": "Champions League",
    "url": "https://www.fotmob.com/leagues/42/overview/champions-league",
    "logo": "https://images.fotmob.com/image_resources/logo/leaguelogo/dark/42.png",
    "id": "42"
  },
  {
    "name": "Europa League",
    "url": "https://www.fotmob.com/leagues/73/overview/europa-league",
    "logo": "https://images.fotmob.com/image_resources/logo/leaguelogo/dark/73.png",
    "id": "73"
  },
  {
    "name": "AFC Challenge League",
    "url": "https://www.fotmob.com/leagues/9470/overview/afc-challenge-league",
    "logo": "https://images.fotmob.com/image_resources/logo/leaguelogo/dark/9470.png",
    "id": "9470"
  }
]


In [33]:
soup.select("section.css-1v0mbdj-LeagueListSection")

[]

In [None]:
from bs4 import BeautifulSoup

# Load the uploaded file
file_path = "/mnt/data/allLeagues.txt"

with open(file_path, "r", encoding="utf-8") as f:
    html = f.read()

# Parse with BeautifulSoup
soup = BeautifulSoup(html, "html.parser")

# Find all country sections
country_sections = soup.select("section.css-1v0mbdj-LeagueListSection")

country_data = {}

for section in country_sections:
    # Extract country name
    header_button = section.select_one("button.css-15d5019-LeagueListHeaderButton")
    country_span = header_button.select_one("div.css-1ib7wom-ButtonText span") if header_button else None
    country_name = country_span.get_text(strip=True) if country_span else "Unknown"

    # Extract leagues inside the section
    leagues = []
    league_links = section.select("a.css-1m05q61-LeagueListItem")  # each league row
    for link in league_links:
        name_span = link.select_one("span.css-1wv7cir-LeagueListItemText")
        league_name = name_span.get_text(strip=True) if name_span else "Unnamed"
        league_href = link.get("href", "")
        leagues.append({
            "league_name": league_name,
            "league_url": f"https://www.fotmob.com{league_href}" if league_href.startswith("/") else league_href
        })

    if country_name != "Unknown" and leagues:
        country_data[country_name] = leagues

country_data.keys()  # Preview extracted country names as keys


In [25]:
main_content.find_all_next('div', class_='css-146405d-LeagueListContainerCSS')

[<div class="css-146405d-LeagueListContainerCSS enpfnav1"><div class="css-1djn4qn-LeagueListHeader enpfnav7"><h2 class="css-5f8hxj-LeagueListH2 enpfnav3">Top leagues</h2></div><div class="css-17janax-LeagueListGroupCSS enpfnav2"><a class="css-bka657-LeagueEntryCSS enpfnav4" href="/leagues/47/overview/premier-league"><img alt="" class="Image LeagueIcon ImageWithFallback" height="16" loading="lazy" src="https://images.fotmob.com/image_resources/logo/leaguelogo/dark/47.png" width="16"/><span>Premier League</span></a><a class="css-bka657-LeagueEntryCSS enpfnav4" href="/leagues/42/overview/champions-league"><img alt="" class="Image LeagueIcon ImageWithFallback" height="16" loading="lazy" src="https://images.fotmob.com/image_resources/logo/leaguelogo/dark/42.png" width="16"/><span>Champions League</span></a><a class="css-bka657-LeagueEntryCSS enpfnav4" href="/leagues/87/overview/laliga"><img alt="" class="Image LeagueIcon ImageWithFallback" height="16" loading="lazy" src="https://images.fotm

In [19]:
import json
def scrape_league_list(soup):
    """
    Scrapes all leagues and their details from Fotmob
    """
    leagues_data = {
        "top_leagues": [],
        "all_leagues": []
    }
    
    # Find all league container divs
    league_containers = soup.find_all('div', class_='css-146405d-LeagueListContainerCSS')
    
    for container in league_containers:
        # Check if it's top leagues section
        header = container.find('h2', class_='css-5f8hxj-LeagueListH2')
        if header and header.text == 'Top leagues':
            # Process top leagues
            leagues = container.find_all('a', class_='css-bka657-LeagueEntryCSS')
            for league in leagues:
                league_info = {
                    'name': league.find('span').text.strip(),
                    'url': league.get('href'),
                    # 'logo': league.find('img').get('src') if league.find('img') else None
                }
                leagues_data["top_leagues"].append(league_info)
                
    return leagues_data

# Usage
leagues = scrape_league_list(soup)

# Save to JSON
# with open("leagues_data.json", "w", encoding="utf-8") as f:
#     json.dump(leagues, f, indent=2, ensure_ascii=False)

print("‚úÖ Leagues data saved to leagues_data.json")
print("\nTop Leagues found:", len(leagues["top_leagues"]))
print("\nSample of leagues found:")
print(json.dumps(leagues["top_leagues"][:3], indent=2))


‚úÖ Leagues data saved to leagues_data.json

Top Leagues found: 10

Sample of leagues found:
[
  {
    "name": "Premier League",
    "url": "/leagues/47/overview/premier-league"
  },
  {
    "name": "Champions League",
    "url": "/leagues/42/overview/champions-league"
  },
  {
    "name": "LaLiga",
    "url": "/leagues/87/overview/laliga"
  }
]


In [44]:
def scrape_all_country_leagues(html_content):
    """
    Scrapes both country leagues and international leagues from HTML content
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    
    all_data = {
        "countries": [],
        # "timestamp": datetime.now().isoformat()
    }
    
    # Try multiple section selectors
    selectors = [
        "section.css-1v0mbdj-LeagueListSection",  # New layout
        "div.css-146405d-LeagueListContainerCSS", # Alternative layout
        "button.css-15d5019-LeagueListHeaderButton"  # Old layout
    ]
    
    country_sections = []
    for selector in selectors:
        if selector.startswith('section'):
            sections = soup.select(selector)
        else:
            sections = soup.find_all(selector.split('.')[0], class_=selector.split('.')[1])
        if sections:
            country_sections = sections
            print(f"‚úÖ Found {len(sections)} countries using selector: {selector}")
            break
    
    if not country_sections:
        print("‚ùå No country sections found with any selector")
        return all_data
    
    for section in country_sections:
        country_info = {
            'name': None,
            'code': None,
            'flag': None,
            'leagues': [],
            'total_leagues': 0
        }
        
        # Get country info
        header = section.select_one("button.css-15d5019-LeagueListHeaderButton")
        if header:
            button_text = header.select_one("div.css-1ib7wom-ButtonText")
            if button_text:
                img = button_text.find('img')
                span = button_text.find('span')
                if img and span:
                    country_info['name'] = span.text.strip()
                    country_info['flag'] = img.get('src')
                    country_info['code'] = img.get('src').split('/')[-1].split('.')[0]
                    print(f"üìç Processing country: {country_info['name']}")
        
        # Find leagues using multiple selectors
        league_selectors = [
            'css-1m05q61-LeagueListItem',
            'css-yx1h73-CountryLeagueEntryCSS',
            'css-bka657-LeagueEntryCSS'
        ]
        
        for selector in league_selectors:
            league_links = section.find_all('a', class_=selector)
            if league_links:
                for link in league_links:
                    name_span = link.select_one("span")
                    if name_span:
                        href = link.get('href', '')
                        league_info = {
                            'name': name_span.text.strip(),
                            'url': f"https://www.fotmob.com{href}" if href.startswith('/') else href,
                            'logo': link.find('img').get('src') if link.find('img') else None,
                            'id': href.split('/')[-2] if href and '/' in href else None
                        }
                        if league_info['name'] and league_info['url']:  # Only add if has name and URL
                            country_info['leagues'].append(league_info)
        
        country_info['total_leagues'] = len(country_info['leagues'])
        
        # Only add countries with valid data
        if country_info['name'] and country_info['leagues']:
            all_data["countries"].append(country_info)
            print(f"‚úÖ Added {country_info['total_leagues']} leagues for {country_info['name']}")
    
    return all_data

# Verify file and read content
try:
    print("\nüîç Reading HTML file...")
    with open('all_country_leagues.txt', 'r', encoding='utf-8') as f:
        html_content = f.read()
    print(f"‚úÖ Read {len(html_content):,} bytes from file")
    
    if len(html_content) < 1000:
        print("‚ö†Ô∏è Warning: File content seems too small")
except Exception as e:
    print(f"‚ùå Error reading file: {str(e)}")
    exit()

# Parse leagues
print("\nüîÑ Processing leagues...")
leagues_data = scrape_all_country_leagues(html_content)

# Print summary
print("\nüìä Summary:")
print(f"‚úÖ Found {len(leagues_data['countries'])} countries")
total_leagues = sum(country['total_leagues'] for country in leagues_data['countries'])
print(f"‚úÖ Found {total_leagues:,} total leagues")

# Save to JSON
output_file = "all_leagues_combined.json"
print(f"\nüíæ Saving to {output_file}...")
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(leagues_data, f, indent=2, ensure_ascii=False)
print(f"‚úÖ Data saved successfully")

# Print sample
if leagues_data['countries']:
    print("\nüìù Sample country data:")
    sample = leagues_data['countries'][0]
    print(json.dumps({
        'country': sample['name'],
        'code': sample['code'],
        'leagues_count': sample['total_leagues'],
        'sample_leagues': sample['leagues'][:2]
    }, indent=2))


üîç Reading HTML file...
‚úÖ Read 283,599 bytes from file

üîÑ Processing leagues...


‚úÖ Found 1 countries using selector: div.css-146405d-LeagueListContainerCSS
üìç Processing country: Russia
‚úÖ Added 478 leagues for Russia

üìä Summary:
‚úÖ Found 1 countries
‚úÖ Found 478 total leagues

üíæ Saving to all_leagues_combined.json...
‚úÖ Data saved successfully

üìù Sample country data:
{
  "country": "Russia",
  "code": "rus",
  "leagues_count": 478,
  "sample_leagues": [
    {
      "name": "Champions League",
      "url": "https://www.fotmob.com/leagues/42/overview/champions-league",
      "logo": "https://images.fotmob.com/image_resources/logo/leaguelogo/dark/42.png",
      "id": "overview"
    },
    {
      "name": "Europa League",
      "url": "https://www.fotmob.com/leagues/73/overview/europa-league",
      "logo": "https://images.fotmob.com/image_resources/logo/leaguelogo/dark/73.png",
      "id": "overview"
    }
  ]
}


### stats

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

url = "https://www.fotmob.com/matches/chelsea-vs-nottingham-forest/2vn3ne#4506599:tab=stats"

# Setup browser
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Remove this line if you want to see the browser
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

print("‚è≥ Loading page...")
driver.get(url)
time.sleep(5)

print("‚úÖ Page loaded.")
html = driver.page_source
print("HTML length:", len(html))

driver.quit()


‚è≥ Loading page...
‚úÖ Page loaded.
HTML length: 776647


In [2]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(html, 'html.parser')

main_content = soup.find('div', id='__next')
if main_content:
    print("‚úÖ Found #main-content")
else:
    print("‚ùå Couldn't find #main-content")


‚úÖ Found #main-content


In [24]:
match_css = main_content.find('div', class_='css-19auws2-MatchCSS edsvb150') if main_content else None
print("Found match_css:", bool(match_css))

inner_div = match_css.find('div') if match_css else None
left_column = inner_div.find('div', class_='css-10wb1x-Column-LeftColumn edsvb152') if inner_div else None

if left_column:
    print("‚úÖ Found left_column")
else:
    print("‚ùå Could not find left_column")


Found match_css: True
‚úÖ Found left_column


In [41]:
stats_section = left_column.find_all('section')
if not stats_section:
    print("‚ùå Stats section not found.")
    exit()

print("‚úÖ Stats section found.")

‚úÖ Stats section found.


In [42]:
stats_section

[<section><h1 style="position:absolute;left:-9999px">Nottingham Forest vs Chelsea (2025-05-25T15:00:00.000Z)</h1><div class="css-1g9gm8m-MFHeaderFullscreenCSS e1vkkyp0"><div class="css-1q3r0n9-MFHeaderTopBarMobileCSS e1pkr3n46"><div class="css-761wfz-MFHeaderButtonsCSS e1q00yv90"><button class="css-11ez6d8-FollowButtonCSS e45uhi50">Follow</button></div></div><div class="css-1jb4fga-HeaderTopBarDesktopCSS e1pkr3n45"><div class="css-1711rkn-GridContainer e1pkr3n40"><div class="css-ozxjdo-LeftGridItem e1pkr3n41"><button class="css-1600hk9-BackButton e1pkr3n44"><svg height="25px" version="1.1" viewbox="0 0 25 25" width="25px" xmlns="http://www.w3.org/2000/svg"><g fill="none" fill-rule="evenodd" id="Page-1" stroke="none" stroke-width="1"><g id="calendar-navigation"><circle cx="12.5" cy="12.5" fill="var(--DatePicker-DatepickerArrowButton-background)" fill-rule="nonzero" id="prefix__Ellipse_377" r="12.5"></circle><g id="prefix__ic_chevron_left" transform="translate(5.000000, 5.000000)"><polyg

In [124]:
soup.find_all('div', class_=lambda x: x and 'Score' in x)

[]

In [126]:
def scrape_match_details(soup):
    """
    Scrapes match details including date, time, teams, venue, and competition info
    """
    match_info = {}
    
    # Find header section
    header = soup.find('div', class_='css-1pf15hj-MFHeaderInfoBoxCSS')
    if not header:
        return match_info
    
    # Get date and time
    date_element = header.find('time')
    if date_element:
        match_info['datetime'] = date_element.get('datetime')
        match_info['formatted_date'] = date_element.text.strip()
    
    # Get competition info
    competition = header.find('div', class_='css-6k5lms-TournamentCSS')
    if competition:
        league_link = competition.find('a')
        if league_link:
            match_info['competition'] = {
                'name': league_link.text.strip(),
                'url': league_link.get('href'),
                # 'logo': competition.find('img')['src'] if competition.find('img') else None
            }
    
    
    # Get teams info (from the main content)
    
    # Get teams info from team markup elements
    team_sections = soup.find_all('div', class_=lambda x: x and 'TeamMarkup' in x)
    if len(team_sections) == 2:
        # Get team names (using desktop version of names)
        home_name = team_sections[0].find('span', class_=lambda x: x and 'TeamNameOnTabletUp' in x)
        away_name = team_sections[1].find('span', class_=lambda x: x and 'TeamNameOnTabletUp' in x)
        
        # Get team links and images
        home_link = team_sections[0].find_parent('a')
        away_link = team_sections[1].find_parent('a')
        
        home_img = team_sections[0].find('img')
        away_img = team_sections[1].find('img')
        
        match_info['teams'] = {
            'home': {
                'name': home_name.text.strip() if home_name else None,
                # 'url': home_link.get('href') if home_link else None,
                # 'logo': home_img.get('src') if home_img else None
            },
            'away': {
                'name': away_name.text.strip() if away_name else None,
                # 'url': away_link.get('href') if away_link else None,
                # 'logo': away_img.get('src') if away_img else None
            }
        }
        
        # Get score
        score_wrapper = soup.find('div', class_='css-1cf82ng-MFHeaderStatusWrapper')
        if score_wrapper:
            score_element = score_wrapper.find('span', class_=lambda x: x and 'Score' in x)
            if score_element:
                scores = score_element.text.strip().split(' - ')
                match_info['score'] = {
                    'home': scores[0] if len(scores) > 0 else None,
                    'away': scores[1] if len(scores) > 1 else None
                }
            
            # Get match status
            status_element = score_wrapper.find('span', class_=lambda x: x and 'StatusReason' in x)
            if status_element:
                match_info['status'] = status_element.text.strip()
    
    
    return match_info

# Usage:
match_details = scrape_match_details(soup)
print(json.dumps(match_details, indent=2))

{
  "datetime": "2025-05-25T15:00:00.000Z",
  "formatted_date": "Sun, May 25, 10:00 AM",
  "competition": {
    "name": "Premier League Round 38",
    "url": "/leagues/47/overview/premier-league"
  },
  "teams": {
    "home": {
      "name": "Nottingham Forest"
    },
    "away": {
      "name": "Chelsea"
    }
  },
  "score": {
    "home": "0",
    "away": "1"
  },
  "status": "Full time"
}


In [80]:
lf = left_column.find_all("li", class_=lambda c: c and "Stat" in c)

In [115]:
def scrape_stats(left_column):
    results = []

    # Correct class for stat blocks
    stat_blocks = left_column.find_all("li", class_=lambda c: c and "Stat" in c) # List item containing stats
    # print(f"{len(stat_blocks)} stat blocks found.")

    for i, block in enumerate(stat_blocks, 1):
        # print(f"\n--- Stat Block #{i} ---")
        # print(block.prettify())

        # Stat title comes from the <span> with class containing "StatTitle"
        title_tag = block.find("span", class_=lambda c: c and "StatTitle" in c)
        stat_title = title_tag.text.strip() if title_tag else None

        # Find all values (they‚Äôre wrapped in <span class="StatValue"> inside StatBox)
        value_tags = block.find_all("span", class_=lambda c: c and "StatValue" in c)
        home_value = value_tags[0].text.strip() if len(value_tags) > 0 else None
        away_value = value_tags[1].text.strip() if len(value_tags) > 1 else None

        # print(f"Extracted: {home_value} {stat_title} {away_value}")
        if home_value is not None or away_value is not None:
            results.append({
                stat_title: {
                    "home": home_value,
                    "away": away_value
                }
            })

    return results


In [95]:
import json
# Extract stats (assuming left_column is already defined)
stats = scrape_stats(left_column)

# Save to JSON
with open("match_stats.json", "w", encoding="utf-8") as f:
    json.dump(stats, f, indent=2, ensure_ascii=False)

# Print preview
# print(json.dumps(stats, indent=2, ensure_ascii=False))


46 stat blocks found.


In [117]:
print(json.dumps(scrape_stats(soup), indent=2))

[
  {
    "Expected goals (xG)": {
      "home": "1.20",
      "away": "1.09"
    }
  },
  {
    "Total shots": {
      "home": "10",
      "away": "6"
    }
  },
  {
    "Shots on target": {
      "home": "2",
      "away": "2"
    }
  },
  {
    "Big chances": {
      "home": "2",
      "away": "3"
    }
  },
  {
    "Big chances missed": {
      "home": "2",
      "away": "2"
    }
  },
  {
    "Accurate passes": {
      "home": "352 (84%)",
      "away": "327 (80%)"
    }
  },
  {
    "Fouls committed": {
      "home": "10",
      "away": "11"
    }
  },
  {
    "Corners": {
      "home": "7",
      "away": "4"
    }
  },
  {
    "Total shots": {
      "home": "10",
      "away": "6"
    }
  },
  {
    "Shots off target": {
      "home": "4",
      "away": "2"
    }
  },
  {
    "Shots on target": {
      "home": "2",
      "away": "2"
    }
  },
  {
    "Blocked shots": {
      "home": "4",
      "away": "2"
    }
  },
  {
    "Hit woodwork": {
      "home": "0",
      "away": "0"

### test

In [3]:
import json
from bs4 import BeautifulSoup
from get_match_stats import full_match_data

In [4]:
# Get all match data using the scrape_full_match_data function
match_data = full_match_data(soup)

# Save complete match data to JSON
with open("full_match_data.json", "w", encoding="utf-8") as f:
    json.dump(match_data, f, indent=2, ensure_ascii=False)

print("‚úÖ Data saved to full_match_data.json")


‚úÖ Data saved to full_match_data.json


In [None]:
# print(json.dumps(full_match_data(soup), indent=2, ensure_ascii=False))

{
  "match_details": {
    "teams": {
      "home": {
        "name": "Nottingham Forest",
        "url": "/teams/10203/overview/nottingham-forest",
        "logo": "https://images.fotmob.com/image_resources/logo/teamlogo/10203_small.png"
      },
      "away": {
        "name": "Chelsea",
        "url": "/teams/8455/overview/chelsea",
        "logo": "https://images.fotmob.com/image_resources/logo/teamlogo/8455_small.png"
      }
    },
    "score": {
      "home": "0",
      "away": "1"
    },
    "status": "Full time",
    "datetime": "2025-05-25T15:00:00.000Z",
    "formatted_date": "Sun, May 25, 10:00 AM",
    "competition": {
      "name": "Premier League Round 38",
      "url": "/leagues/47/overview/premier-league"
    }
  },
  "statistics": [
    {
      "Expected goals (xG)": {
        "home": "1.20",
        "away": "1.09"
      }
    },
    {
      "Total shots": {
        "home": "10",
        "away": "6"
      }
    },
    {
      "Shots on target": {
        "home": "2",
