In [1]:
import logging
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pyautogui
import pandas as pd

In [2]:


# Configure logging with UTF-8 encoding
logging.basicConfig(
    filename="scrape_players.log",  
    level=logging.INFO, 
    format="%(asctime)s - %(levelname)s - %(message)s",  
    encoding="utf-8",  
)

def scrape_players(driver, max_players=None):
    """
    Scrapes player data from the website and logs progress.

    Parameters:
        driver: Selenium WebDriver instance
        max_players: Optional maximum number of players to scrape

    Returns:
        A list of player details
    """
    players = []
    wait = WebDriverWait(driver, 10)
    
    # Pagination control
    def get_next_button():
        try:
            return wait.until(
                EC.element_to_be_clickable((By.XPATH, "//ul[@class='tm-pagination']/li[last()-1]/a")),
                message="Next button not clickable"
            )
        except Exception as e:
            logging.warning("Next button not found or not clickable: %s", e)
            return None

    # Fetch the first "Next" button
    next_button = get_next_button()
    
    page = 0
    while next_button:
        logging.info("Scraping page %d...", page + 1)
        for num in range(1, 26):  # 25 players per page
            try:
                age_element = driver.find_element(By.XPATH, f'//*[@id="yw1"]/table/tbody/tr[{num}]/td[3]')
                age = age_element.text
                name = driver.find_element(By.XPATH, f'//*[@id="yw1"]/table/tbody/tr[{num}]/td[2]/table/tbody/tr[1]/td[2]/a')
                position = driver.find_element(By.XPATH, f'//*[@id="yw1"]/table/tbody/tr[{num}]/td[2]/table/tbody/tr[2]/td')
                club = driver.find_element(By.XPATH, f'//*[@id="yw1"]/table/tbody/tr[{num}]/td[5]/a/img')
                value = driver.find_element(By.XPATH, f'//*[@id="yw1"]/table/tbody/tr[{num}]/td[6]/a')

                # Extracting nationalities
                nat_box = driver.find_element(By.XPATH, f'//*[@id="yw1"]/table/tbody/tr[{num}]/td[4]')
                nationalities = nat_box.find_elements(By.CLASS_NAME, "flaggenrahmen")
                nat_list = [nat.get_attribute("title") for nat in nationalities]

                player = [
                    name.text,
                    position.text,
                    int(age),
                    ", ".join(nat_list),
                    club.get_attribute("title"),
                    float(value.text[1:-1])  
                ]
                players.append(player)

                # logging.info("Player scraped: %s", player[0])  # Log the player's name

                # Break if max_players limit is reached
                if max_players and len(players) >= max_players:
                    logging.info("Reached max_players limit: %d players scraped.", max_players)
                    return players
            except Exception as e:
                logging.error("Error processing player %d on page %d: %s", num, page + 1, e)
                continue

        # Move to next page
        page += 1
        next_button.click()
        wait.until(EC.staleness_of(next_button))
        next_button = get_next_button()
    
    logging.info("Scraping completed. Total players scraped: %d", len(players))
    return players


In [5]:



# Initialize WebDriver (e.g., ChromeDriver)
driver = webdriver.Chrome()

# Navigate to the desired page
driver.get("https://www.transfermarkt.com/spieler-statistik/wertvollstespieler/marktwertetop")


# pyautogui.position()  


time.sleep(6)
pyautogui.moveTo(466,556)
pyautogui.click()


# Scrape players
all_players = scrape_players(driver, max_players=500)


# Output results
players_df=pd.DataFrame(all_players,columns=["Name","Position","Age","Nationalities","Club","Market_value_(m€)"])
players_df.to_csv("players_mv_adv_2024.csv")


# Close the WebDriver
driver.quit()


In [6]:
players_df

Unnamed: 0,Name,Position,Age,Nationalities,Club,Market_value_(m€)
0,Erling Haaland,Centre-Forward,24,Norway,Manchester City,200.0
1,Vinicius Junior,Left Winger,24,"Brazil, Spain",Real Madrid,200.0
2,Jude Bellingham,Attacking Midfield,21,"England, Ireland",Real Madrid,180.0
3,Kylian Mbappé,Centre-Forward,25,"France, Cameroon",Real Madrid,180.0
4,Lamine Yamal,Right Winger,17,"Spain, Equatorial Guinea",FC Barcelona,150.0
...,...,...,...,...,...,...
495,Antoine Semenyo,Right Winger,24,"Ghana, England",AFC Bournemouth,20.0
496,Martin Baturina,Attacking Midfield,21,Croatia,GNK Dinamo Zagreb,20.0
497,Leonardo Balerdi,Centre-Back,25,"Argentina, Italy",Olympique Marseille,20.0
498,Strahinja Pavlović,Centre-Back,23,Serbia,AC Milan,20.0
