In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from datetime import datetime

# Fetch the webpage content
fixtures_url = "https://www.90min.com/posts/la-liga-fixtures-2023-24-season"
data = requests.get(fixtures_url)
soup = BeautifulSoup(data.text, 'html.parser')

# Select all tables with the specified class
fixtures_tables = soup.select('table.table_1yarp85')

all_matches = []

# Process each table separately
for fixture in fixtures_tables:
    matches_info = fixture.find_all('p')

    # Process the matches information within each table
    for i in range(0, len(matches_info), 3):
        # Skip headers like 'Date', 'Time', and 'Fixture'
        if matches_info[i].get_text(strip=True) in ['Date', 'Time', 'Fixture']:
            continue

        # Parse and reformat the date
        date_text = matches_info[i].get_text(strip=True)
        try:
            date_object = datetime.strptime(date_text, '%d/%m/%Y')
            formatted_date = date_object.strftime('%Y-%m-%d')
        except ValueError:
            formatted_date = date_text  # Handle cases where the date is not in expected format

        time = matches_info[i + 1].get_text(strip=True)
        fixture_string = matches_info[i + 2].get_text(strip=True)

        # Remove digits (scores) from the fixture string
        fixture_no_scores = ''.join([char for char in fixture_string if not char.isdigit()])

        # Split the fixture string to get home and away teams
        if ' - ' in fixture_no_scores:
            home_team, away_team = fixture_no_scores.split(' - ')
        elif ' v ' in fixture_no_scores:
            home_team, away_team = fixture_no_scores.split(' v ')
        elif 'P-P' in fixture_no_scores:
            home_team, away_team = fixture_no_scores.split(' P-P ')
        else:
            home_team = away_team = 'TBD'

        # Clean up any extra spaces
        home_team = home_team.strip()
        away_team = away_team.strip()

        all_matches.append((formatted_date, time, home_team, away_team))

# Create a DataFrame with column names
matches_fixtures_df = pd.DataFrame(all_matches, columns=['Date', 'Time', 'Team', 'Opponent'])

In [2]:
matches_fixtures_df.columns = [c.lower() for c in matches_fixtures_df.columns]

In [3]:
matches_fixtures_df.to_csv("matches_fixtures.csv")

In [4]:
matches_fixtures_df

Unnamed: 0,date,time,team,opponent
0,2023-08-11,18:30,Almeria,Rayo Vallecano
1,2023-08-11,21:00,Sevilla,Valencia
2,2023-08-12,16:00,Real Sociedad,Girona
3,2023-08-12,18:30,Las Palmas,Mallorca
4,2023-08-12,20:30,Athletic Club,Real Madrid
...,...,...,...,...
376,2024-05-26,TBC,Osasuna,Villarreal
377,2024-05-26,TBC,Rayo Vallecano,Athletic Club
378,2024-05-26,TBC,Real Madrid,Real Betis
379,2024-05-26,TBC,Real Sociedad,Atletico Madrid
