# Web Scraping through TransferMrkt to Get Team Names

## Importing Libraries

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import os
import time

## Setup

In [2]:
# Setup Selenium
options = Options()
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
)

def get_soup(url):
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(url)
    time.sleep(5)
    html = driver.page_source
    driver.quit()
    return BeautifulSoup(html, "html.parser")

# League info: slug and code
LEAGUE_INFO = {
    "premier_league": {"slug": "premier-league", "code": "GB1"},
    "serie_a": {"slug": "serie-a", "code": "IT1"},
    "la_liga": {"slug": "laliga", "code": "ES1"},
    "bundesliga": {"slug": "bundesliga", "code": "L1"},
    "ligue_1": {"slug": "ligue-1", "code": "FR1"}
}

BASE_URL = "https://www.transfermarkt.com"

## Scraping

In [None]:
for league_name, info in LEAGUE_INFO.items():
    print(f"\n=== Scraping clubs for {league_name.replace('_', ' ').title()} ===")
    league_url = f"{BASE_URL}/{info['slug']}/startseite/wettbewerb/{info['code']}"
    soup = get_soup(league_url)

    table = soup.find("table", class_="items")
    clubs = []

    if table:
        for row in table.find_all("tr", class_=["odd", "even"]):
            name_tag = row.select_one("td.hauptlink a")
            if name_tag:
                club_name = name_tag.text.strip()
                clubs.append({"Club Name": club_name})

    df = pd.DataFrame(clubs)
    df.to_csv(f"../../../data/teams/raw/{league_name}_teams.csv", index=False)
    print(f"Saved to ../../../data/teams/raw/{league_name}_teams.csv")


=== Scraping clubs for Premier League ===
Saved to ../../data/teams/raw/premier_league_teams.csv

=== Scraping clubs for Serie A ===
Saved to ../../data/teams/raw/serie_a_teams.csv

=== Scraping clubs for La Liga ===
Saved to ../../data/teams/raw/la_liga_teams.csv

=== Scraping clubs for Bundesliga ===
Saved to ../../data/teams/raw/bundesliga_teams.csv

=== Scraping clubs for Ligue 1 ===
Saved to ../../data/teams/raw/ligue_1_teams.csv
