In [1]:
import pandas as pd

In [2]:
year = 2006

url = f'https://www.transfermarkt.com/laliga/gesamtspielplan/wettbewerb/ES1?saison_id={year}&spieltagVon=1&spieltagBis=38'
filename = f'laliga_fixtures_{year}_{year + 1 % 100}.csv'

# url = 'https://www.transfermarkt.com/laliga/gesamtspielplan/wettbewerb/ES1?saison_id=2019&spieltagVon=1&spieltagBis=38'
# filename = 'laliga_fixtures_2019_20.csv'

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

URL = url
HEADERS = {"User-Agent": "Mozilla/5.0"}

def scrape_laliga_fixtures(url):
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")

    all_matches = []

    # Each Matchday is inside a div with class "box"
    for box in soup.select("div.box"):
        matchday_header = box.select_one(".content-box-headline")
        if not matchday_header:
            continue
        matchday = matchday_header.get_text(strip=True)

        # Each row of matches is in <tr>, skipping those that are just time separators
        for row in box.select("tbody tr"):
            # Skip rows without actual match (like "bg_blau_20" which are just separators)
            if "bg_blau_20" in row.get("class", []):
                continue

            cells = row.find_all("td")
            if len(cells) < 7:
                continue

            date = cells[0].get_text(strip=True)
            time = cells[1].get_text(strip=True)
            home_team = cells[2].get_text(" ", strip=True)
            result = cells[4].get_text(strip=True)
            away_team = cells[6].get_text(" ", strip=True)

            match_data = {
                "Matchday": matchday,
                "Date": date,
                "Time": time,
                "Home": home_team,
                "Result": result,
                "Away": away_team
            }
            all_matches.append(match_data)

    return pd.DataFrame(all_matches)


if __name__ == "__main__":
    df = scrape_laliga_fixtures(URL)
    print(df.head(20))  # show first 20 matches
    df.to_csv(filename, index=False)


      Matchday        Date      Time                 Home Result  \
0   1.Matchday  Sat8/26/06  10:00 PM             Valencia    2:1   
1   1.Matchday  Sun8/27/06   7:00 PM             Espanyol    0:1   
2   1.Matchday                             Dep. La Coruña    3:2   
3   1.Matchday                                 CA Osasuna    0:2   
4   1.Matchday                                Real Madrid    0:0   
5   1.Matchday                               Recr. Huelva    1:1   
6   1.Matchday                                     Racing    0:1   
7   1.Matchday               9:00 PM        Athletic Club    1:1   
8   1.Matchday  Mon8/28/06   9:00 PM        Celta de Vigo    2:3   
9   1.Matchday  Tue8/29/06   9:00 PM           Sevilla FC    4:0   
10  2.Matchday   Sat9/9/06   8:00 PM       (4.) Barcelona    3:0   
11  2.Matchday              10:00 PM        (6.) Atlético    0:1   
12  2.Matchday  Sun9/10/06   7:00 PM     (16.) Real Betis    3:0   
13  2.Matchday                                (2

In [4]:
df

Unnamed: 0,Matchday,Date,Time,Home,Result,Away
0,1.Matchday,Sat8/26/06,10:00 PM,Valencia,2:1,Real Betis
1,1.Matchday,Sun8/27/06,7:00 PM,Espanyol,0:1,Gimnàstic
2,1.Matchday,,,Dep. La Coruña,3:2,Real Zaragoza
3,1.Matchday,,,CA Osasuna,0:2,Getafe
4,1.Matchday,,,Real Madrid,0:0,Villarreal
...,...,...,...,...,...,...
375,38.Matchday,,9:00 PM,(20.) Gimnàstic,1:5,Barcelona (2.)
376,38.Matchday,,,(13.) CA Osasuna,1:2,Atlético (7.)
377,38.Matchday,,,(1.) Real Madrid,3:1,RCD Mallorca (11.)
378,38.Matchday,,,(8.) Recr. Huelva,1:1,Real Zaragoza (5.)


In [7]:
df_name = pd.read_html('https://fbref.com/en/comps/12/2024-2025/schedule/2024-2025-La-Liga-Scores-and-Fixtures'
                       , attrs={"id": "sched_2024-2025_12_1"})[0]

In [8]:
df_name

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Thu,2024-08-15,19:00,Athletic Club,0.3,1–1,0.8,Getafe,47845.0,San Mamés,Alejandro Muñíz,Match Report,
1,1.0,Thu,2024-08-15,21:30,Betis,1.4,1–1,1.6,Girona,54084.0,Estadio Benito Villamarín,Miguel Ángel Ortiz Arias,Match Report,
2,1.0,Fri,2024-08-16,19:00,Celta Vigo,0.8,2–1,1.6,Alavés,22477.0,Estadio Abanca Balaídos,Alejandro Quintero,Match Report,
3,1.0,Fri,2024-08-16,20:30,Las Palmas,1.4,2–2,1.8,Sevilla,24843.0,Estadio de Gran Canaria,Francisco Hernández,Match Report,
4,1.0,Sat,2024-08-17,19:00,Osasuna,1.7,1–1,1.0,Leganés,19561.0,Estadio El Sadar,Juan Pulido,Match Report,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421,38.0,Sat,2025-05-24,21:00,Alavés,2.0,1–1,1.6,Osasuna,19274.0,Estadio de Mendizorroza,Guillermo Cuadra,Match Report,
422,38.0,Sat,2025-05-24,21:00,Getafe,0.4,1–2,1.5,Celta Vigo,12862.0,Coliseum Alfonso Pérez,Juan Martínez,Match Report,
423,38.0,Sun,2025-05-25,14:00,Girona,0.0,0–4,2.7,Atlético Madrid,11546.0,Estadi Municipal de Montilivi,Jesús Gil,Match Report,
424,38.0,Sun,2025-05-25,16:15,Villarreal,1.1,4–2,1.5,Sevilla,17758.0,Estadio de la Cerámica,Javier Alberola,Match Report,


In [9]:
df_name['Wk'].isnull().sum()

np.int64(46)

In [15]:
df_name = df_name.dropna(subset=['Wk'])

In [17]:
df_name.shape

(380, 14)