In [2]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import time

df_combined = pd.DataFrame()

# Set the number of pages to scrape
for page in range(1, 21):
    url = f"https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page={page}"
    print(f"Scraping: {url}")

    time.sleep(60)  

    HEADERS = {
        'User-Agent': ('Mozilla/5.0 (X11; Linux x86_64) '
                       'AppleWebKit/537.36 (KHTML, like Gecko) '
                       'Chrome/44.0.2403.157 Safari/537.36'),
        'Accept-Language': 'en-US,en;q=0.5'
    }

    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        continue
    else:
        print(f"Page {page} loaded successfully.")

    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the main table containing player data
    table = soup.find('table', class_='items')
    if not table:
        print("No data table found on this page.")
        continue

    rows = table.find_all('tr')[1:]  # Skip header

    # Lists to store data
    names, ages, clubs, market_values, profile_urls, club_urls = [], [], [], [], [], []

    for row in rows:
        try:
            cells = row.find_all('td')
            if len(cells) < 4: # skips row with fewer than 4 <td> columns (likely not player data).
                continue

            # Player name and profile URL
            name_cell = row.find('td', class_='hauptlink')
            name = name_cell.find('a')['title'].strip()
            profile_url = "https://www.transfermarkt.co.uk" + name_cell.find('a')['href']

            # Age
            age = None
            for cell in row.find_all('td', class_='zentriert'):
                age_text = cell.get_text(strip=True)
                if age_text.isdigit() and 15 <= int(age_text) <= 45:
                    age = int(age_text)
                    break

            # Club and club URL
            club_name, club_url = 'N/A', 'N/A'
            club_link = row.find('a', href=re.compile(r'/(verein|startseite)/'))
            if club_link:
                club_name = club_link.get('title', '').strip()
                club_url = "https://www.transfermarkt.co.uk" + club_link.get('href')

            # Market value
            market_value = 'N/A'
            for cell in cells:
                value_link = cell.find('a', href=re.compile(r'marktwerterlauf'))
                if value_link:
                    market_value = value_link.get_text(strip=True)
                    break
                cell_text = cell.get_text(strip=True)
                if re.search(r'€\d+\.?\d*[mk]?', cell_text, re.IGNORECASE):
                    market_value = cell_text
                    break

            # Append data
            names.append(name)
            ages.append(age if age else 'N/A')
            clubs.append(club_name)
            market_values.append(market_value)
            profile_urls.append(profile_url)
            club_urls.append(club_url)

        except Exception as e:
            print(f"Error parsing row: {e}")
            continue

    # Store in DataFrame
    data = {
        'Name': names,
        'Age': ages,
        'Club': clubs,
        'Market Value': market_values,
        'Profile URL': profile_urls,
        'Club URL': club_urls,
        'Page URL': url
    }

    df = pd.DataFrame(data)
    df_combined = pd.concat([df_combined, df], ignore_index=True)

# Save results
df_combined.to_csv('transfermarkt_players_flat.csv', index=False)
df_combined.to_json('transfermarkt_players_flat.json', orient='records', indent=2)

print("Scraping completed.")
print(df_combined.head())
print(df_combined.tail())

Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=1
Page 1 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=2
Page 2 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=3
Page 3 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=4
Page 4 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=5
Page 5 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=6
Page 6 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=7
Page 7 loaded successfully.
Scraping: https://www.transfermarkt.co.uk/spieler-statistik/wertvollstespieler/marktwertetop?page=8
Page

In [3]:
proof = pd.read_csv('transfermarkt_players_flat.csv')

proof.head()

Unnamed: 0,Name,Age,Club,Market Value,Profile URL,Club URL,Page URL
0,Lamine Yamal,17,FC Barcelona,€200.00m,https://www.transfermarkt.co.uk/lamine-yamal/p...,https://www.transfermarkt.co.uk/fc-barcelona/s...,https://www.transfermarkt.co.uk/spieler-statis...
1,Jude Bellingham,22,Real Madrid,€180.00m,https://www.transfermarkt.co.uk/jude-bellingha...,https://www.transfermarkt.co.uk/real-madrid/st...,https://www.transfermarkt.co.uk/spieler-statis...
2,Erling Haaland,24,Manchester City,€180.00m,https://www.transfermarkt.co.uk/erling-haaland...,https://www.transfermarkt.co.uk/manchester-cit...,https://www.transfermarkt.co.uk/spieler-statis...
3,Kylian Mbappé,26,Real Madrid,€180.00m,https://www.transfermarkt.co.uk/kylian-mbappe/...,https://www.transfermarkt.co.uk/real-madrid/st...,https://www.transfermarkt.co.uk/spieler-statis...
4,Vinicius Junior,24,Real Madrid,€170.00m,https://www.transfermarkt.co.uk/vinicius-junio...,https://www.transfermarkt.co.uk/real-madrid/st...,https://www.transfermarkt.co.uk/spieler-statis...


In [4]:
proof.tail()

Unnamed: 0,Name,Age,Club,Market Value,Profile URL,Club URL,Page URL
495,Konstantinos Karetsas,17,KRC Genk,€20.00m,https://www.transfermarkt.co.uk/konstantinos-k...,https://www.transfermarkt.co.uk/krc-genk/start...,https://www.transfermarkt.co.uk/spieler-statis...
496,Facundo Buonanotte,20,Brighton & Hove Albion,€20.00m,https://www.transfermarkt.co.uk/facundo-buonan...,https://www.transfermarkt.co.uk/brighton-amp-h...,https://www.transfermarkt.co.uk/spieler-statis...
497,Wesley,21,CR Flamengo,€20.00m,https://www.transfermarkt.co.uk/wesley/profil/...,https://www.transfermarkt.co.uk/flamengo-rio-d...,https://www.transfermarkt.co.uk/spieler-statis...
498,Chris Rigg,18,Sunderland AFC,€20.00m,https://www.transfermarkt.co.uk/chris-rigg/pro...,https://www.transfermarkt.co.uk/afc-sunderland...,https://www.transfermarkt.co.uk/spieler-statis...
499,Vitor Roque,20,Sociedade Esportiva Palmeiras,€20.00m,https://www.transfermarkt.co.uk/vitor-roque/pr...,https://www.transfermarkt.co.uk/se-palmeiras-s...,https://www.transfermarkt.co.uk/spieler-statis...
