In [None]:
# For Historical Data (la_liga_history.csv)
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Base URL for La Liga data on Fbref
base_url = "https://fbref.com/en/comps/12"

# Seasons to scrape data for
seasons = ["2023-2024", "2022-2023", "2021-2022", "2020-2021", "2019-2020"]

# Headers 
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

all_data = []

for season in seasons:
    print(f"Fetching data for season: {season}")
    season_url = f"{base_url}/{season}/{season}-La-Liga-Stats"

    # Request the page for the season's stats
    response = requests.get(season_url, headers=headers)
    time.sleep(random.uniform(10, 15)) 
    soup = BeautifulSoup(response.text, "html.parser")

    # Find the standings table to extract team links
    stats_table = soup.select_one('#stats_squads_standard_for')
    if stats_table:
        print(f"Stats table found for {season}!")
        squad_links = stats_table.select('a[href]')
        all_shooting_links = []

        # Extract links to team shooting stats
        for link in squad_links:
            href = link.get('href')
            if "/squads/" in href: 
                team_id = href.split('/')[3]
                team_name = href.split('/')[-1].split('-Stats')[0]
                shooting_link = (
                    f"https://fbref.com/en/squads/{team_id}/{season}/matchlogs/c12/shooting/{team_name}-Match-Logs-La-Liga"
                )
                all_shooting_links.append((shooting_link, team_name))

        # Scrape data from each team's shooting stats page
        for shooting_link, team_name in all_shooting_links:
            print(f"Fetching shooting data from: {shooting_link}")
            response = requests.get(shooting_link, headers=headers)
            time.sleep(10)  
            soup = BeautifulSoup(response.text, "html.parser")

            # Locate the shooting stats table
            shooting_stats = soup.select_one('#matchlogs_for')
            if shooting_stats:
                try:
                   
                    df = pd.read_html(str(shooting_stats))[0]

                    if isinstance(df.columns, pd.MultiIndex):
                        df.columns = [' '.join(col).strip() for col in df.columns]

                    # Clean column names
                    df.columns = df.columns.str.replace(r'^For [A-Za-zÀ-ÿ\s\-]+ ', '', regex=True)
                    df.columns = df.columns.str.replace(r'^Standard ', '', regex=True)

                    # Map and rename relevant columns
                    column_mapping = {
                        "Date": "Date",
                        "Time": "Time",
                        "Round": "Round",
                        "Day": "Day",
                        "Venue": "Venue",
                        "Result": "Result",
                        "GF": "GF",
                        "GA": "GA",
                        "Opponent": "Opponent",
                        "Gls": "Gls",
                        "Sh": "Sh",
                        "SoT": "SoT",
                        "SoT%": "SoT%",
                        "G/Sh": "G/Sh",
                        "G/SoT": "G/SoT",
                        "Dist": "Dist",
                        "FK": "FK",
                        "PK": "PK",
                        "PKatt": "PKatt",
                    }
                    df = df.rename(columns=column_mapping)

                    # Ensure all desired columns are present
                    for col in column_mapping.values():
                        if col not in df.columns:
                            df[col] = None  

                    df = df[list(column_mapping.values())]

                    df["Team"] = team_name
                    df["Season"] = season

                    all_data.append(df)

                except ValueError as e:
                    print(f"Error reading the table for {team_name}: {e}")
            else:
                print(f"Shooting table NOT found for {team_name} in {season}!")

# Combine all collected data into a single DataFrame
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    print("All data combined successfully!")

    desired_columns = ['Team', 'Season', 'Date', 'Time', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA', 'Opponent', 'Gls', 'Sh', 'SoT', 'SoT%', 'G/Sh', 'G/SoT', 'Dist', 'FK', 'PK', 'PKatt']
    combined_df = combined_df[desired_columns]
    combined_df = combined_df.drop_duplicates()

    combined_df.to_csv("la_liga_history.csv", index=False)
    print("Success.")
else:
    print("Error.")

In [1]:
# For recent data (recent_la_liga.csv)
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# URL for La Liga 24/25 fixtures
url = "https://fbref.com/en/comps/12/2024-2025/2024-2025-La-Liga-Stats"

# Headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

# Request and parse the page
response = requests.get(url, headers=headers)
time.sleep(5)  # Avoid hitting rate limits
soup = BeautifulSoup(response.text, "html.parser")

# Initialize data storage
all_data = []
season = "2024-2025"  # Define the season

# Find the standings table to extract team links
stats_table = soup.select_one('#stats_squads_standard_for')
if stats_table:
    print(f"Stats table found for {season}!")
    squad_links = stats_table.select('a[href]')
    all_shooting_links = []

    # Extract links to team shooting stats
    for link in squad_links:
        href = link.get('href')
        if "/squads/" in href: 
            team_id = href.split('/')[3]
            team_name = href.split('/')[-1].split('-Stats')[0]
            shooting_link = (
                f"https://fbref.com/en/squads/{team_id}/{season}/matchlogs/c12/shooting/{team_name}-Match-Logs-La-Liga"
            )
            all_shooting_links.append((shooting_link, team_name))

    # Scrape data from each team's shooting stats page
    for shooting_link, team_name in all_shooting_links:
        print(f"Fetching shooting data from: {shooting_link}")
        response = requests.get(shooting_link, headers=headers)
        time.sleep(10)  
        soup = BeautifulSoup(response.text, "html.parser")

        # Locate the shooting stats table
        shooting_stats = soup.select_one('#matchlogs_for')
        if shooting_stats:
            try:
                df = pd.read_html(str(shooting_stats))[0]

                # Clean column names
                if isinstance(df.columns, pd.MultiIndex):
                    df.columns = [' '.join(col).strip() for col in df.columns]

                df.columns = df.columns.str.replace(r'^For [A-Za-zÀ-ÿ\s\-]+ ', '', regex=True)
                df.columns = df.columns.str.replace(r'^Standard ', '', regex=True)

                # Map and rename relevant columns
                column_mapping = {
                    "Date": "Date",
                    "Time": "Time",
                    "Round": "Round",
                    "Day": "Day",
                    "Venue": "Venue",
                    "Result": "Result",
                    "GF": "GF",
                    "GA": "GA",
                    "Opponent": "Opponent",
                    "Gls": "Gls",
                    "Sh": "Sh",
                    "SoT": "SoT",
                    "SoT%": "SoT%",
                    "G/Sh": "G/Sh",
                    "G/SoT": "G/SoT",
                    "Dist": "Dist",
                    "FK": "FK",
                    "PK": "PK",
                    "PKatt": "PKatt",
                }
                df = df.rename(columns=column_mapping)

                # Ensure all desired columns are present
                for col in column_mapping.values():
                    if col not in df.columns:
                        df[col] = None  

                df = df[list(column_mapping.values())]

                # Filter for specific matchweeks
                desired_rounds = [f"Matchweek {i}" for i in range(17, 23)]  # Matchweeks 17 to 22
                if "Round" in df.columns:
                    filtered_df = df[df["Round"].isin(desired_rounds)]

                    # Add team and season info
                    filtered_df["Team"] = team_name
                    filtered_df["Season"] = season

                    # Append filtered data to the combined dataset
                    all_data.append(filtered_df)

            except ValueError as e:
                print(f"Error reading the table for {team_name}: {e}")
        else:
            print(f"Shooting table NOT found for {team_name} in {season}!")

# Combine all collected data into a single DataFrame
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    print("All data combined successfully!")

    desired_columns = ['Team', 'Season', 'Date', 'Time', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA', 'Opponent', 'Gls', 'Sh', 'SoT', 'SoT%', 'G/Sh', 'G/SoT', 'Dist', 'FK', 'PK', 'PKatt']
    combined_df = combined_df[desired_columns]
    combined_df = combined_df.drop_duplicates()

    combined_df.to_csv("la_liga_recent1.csv", index=False)
    print("Success.")
else:
    print("Error")

Stats table found for 2024-2025!
Fetching shooting data from: https://fbref.com/en/squads/8d6fd021/2024-2025/matchlogs/c12/shooting/Alaves-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/2b390eca/2024-2025/matchlogs/c12/shooting/Athletic-Club-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/db3b9613/2024-2025/matchlogs/c12/shooting/Atletico-Madrid-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/206d90db/2024-2025/matchlogs/c12/shooting/Barcelona-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/fc536746/2024-2025/matchlogs/c12/shooting/Real-Betis-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/f25da7fb/2024-2025/matchlogs/c12/shooting/Celta-Vigo-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/a8661628/2024-2025/matchlogs/c12/shooting/Espanyol-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/7848bd64/2024-2025/matchlogs/c12/shooting/Getafe-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/9024a00a/2024-2025/matchlogs/c12/shooting/Girona-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/0049d422/2024-2025/matchlogs/c12/shooting/Las-Palmas-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/7c6f2c78/2024-2025/matchlogs/c12/shooting/Leganes-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/2aa12281/2024-2025/matchlogs/c12/shooting/Mallorca-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/03c57e2b/2024-2025/matchlogs/c12/shooting/Osasuna-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/98e8af82/2024-2025/matchlogs/c12/shooting/Rayo-Vallecano-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/53a2f082/2024-2025/matchlogs/c12/shooting/Real-Madrid-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/e31d1cd9/2024-2025/matchlogs/c12/shooting/Real-Sociedad-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/ad2be733/2024-2025/matchlogs/c12/shooting/Sevilla-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/dcc91a7b/2024-2025/matchlogs/c12/shooting/Valencia-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/17859612/2024-2025/matchlogs/c12/shooting/Valladolid-Match-Logs-La-Liga


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


Fetching shooting data from: https://fbref.com/en/squads/2a8183b3/2024-2025/matchlogs/c12/shooting/Villarreal-Match-Logs-La-Liga
All data combined successfully!
Success.


  df = pd.read_html(str(shooting_stats))[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Team"] = team_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["Season"] = season


In [None]:
# Games for 24/25 season (la_liga_24_25.csv)
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# URL for La Liga 24/25 fixtures
url = "https://fbref.com/en/comps/12/2024-2025/schedule/2024-2025-La-Liga-Scores-and-Fixtures"

# Headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

# Request and parse the page
response = requests.get(url, headers=headers)
time.sleep(5)  # Avoid hitting rate limits
soup = BeautifulSoup(response.text, "html.parser")

# Locate the schedule table
stats_table = soup.select_one('#sched_2024-2025_12_1')

if stats_table:
    print("Schedule table found!")

    # Extract and clean the table data
    df = pd.read_html(str(stats_table))[0]
    desired_columns = ['Wk', 'Day', 'Date', 'Time', 'Venue', 'Home', 'Away']
    df = df[desired_columns].drop_duplicates()

    # Save the data to a CSV file
    df.to_csv("la_liga_24_25.csv", index=False)
    print("Success.")
else:
    print("Error.")