In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import numpy as np

In [None]:
# Base URL of the page
base_url = "https://www.transfermarkt.com/spieler-statistik/wertvollstespieler/marktwertetop?land_id=0&ausrichtung=alle&spielerposition_id=alle&altersklasse=alle&jahrgang=0&kontinent_id=0"

# Headers to mimic a web browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}

# Function to extract player data from a page
def extract_players_data(page_number):
    # Construct the URL for the current page
    url = f"{base_url}&page={page_number}"

    # Make an HTTP request to get the page content
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # Parse the page content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all rows with player data
        players_data = []
        player_rows = soup.find_all('tr', class_=['odd', 'even'])
        for row in player_rows:
            # Extract the name and profile link
            name_tag = row.find('a', title=True, href=True)
            if name_tag:
                name = name_tag['title']  # Player's name
                profile_href = name_tag['href']  # Profile link

                # Extract the player ID from the URL using a regex
                match = re.search(r'/spieler/(\d+)', profile_href)
                player_id = match.group(1) if match else None

                # Extract nationalities
                nationality_cell = row.find_all('td', class_='zentriert')[2]
                nationalities = []
                if nationality_cell:
                    flags = nationality_cell.find_all('img', title=True)
                    nationalities = [flag['title'] for flag in flags]

                # Add to the players data list
                players_data.append({
                    'name': name,
                    'player_id': player_id,
                    'nationalities': nationalities
                })

        return players_data
    else:
        print(f"Error accessing page {page_number}: {response.status_code}")
        return []

# Loop through the first 10 pages
all_players_data = []
for page_number in range(1, 11):  # Pages 1 to 10
    page_data = extract_players_data(page_number)
    all_players_data.extend(page_data)

# Display data for the first 10 players
for player in all_players_data[:10]:
    print(player)


{'name': 'Erling Haaland', 'player_id': '418560', 'nationalities': ['Norway']}
{'name': 'Vinicius Junior', 'player_id': '371998', 'nationalities': ['Brazil', 'Spain']}
{'name': 'Jude Bellingham', 'player_id': '581678', 'nationalities': ['England', 'Ireland']}
{'name': 'Kylian Mbappé', 'player_id': '342229', 'nationalities': ['France', 'Cameroon']}
{'name': 'Lamine Yamal', 'player_id': '937958', 'nationalities': ['Spain', 'Equatorial Guinea']}
{'name': 'Phil Foden', 'player_id': '406635', 'nationalities': ['England']}
{'name': 'Bukayo Saka', 'player_id': '433177', 'nationalities': ['England', 'Nigeria']}
{'name': 'Florian Wirtz', 'player_id': '598577', 'nationalities': ['Germany']}
{'name': 'Jamal Musiala', 'player_id': '580195', 'nationalities': ['Germany', 'England']}
{'name': 'Federico Valverde', 'player_id': '369081', 'nationalities': ['Uruguay', 'Spain']}


In [None]:
# Función para obtener datos de mercado de un jugador
def market_values(jugador_id):
    url = f"https://www.transfermarkt.com/ceapi/marketValueDevelopment/graph/{jugador_id}"

    # Cabeceras para simular una solicitud de navegador
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    response = requests.get(url, headers=headers)

    # Verificar si la solicitud fue exitosa
    if response.status_code == 200:
        # Parsear el JSON de la respuesta
        try:
            datos = response.json()
            if "list" in datos:
                return datos["list"]
            else:
                print(f"No se encontraron datos de mercado para el jugador con ID {jugador_id}.")
                return []
        except Exception as e:
            print(f"Error al parsear los datos para el jugador con ID {jugador_id}: {e}")
            return []
    else:
        print(f"Error al obtener datos para el jugador con ID {jugador_id}. Código de estado: {response.status_code}")
        return []



def position(player_id, player_name):
    # URL template for the player profile
    url = f"https://www.transfermarkt.com/{player_name}/profil/spieler/{player_id}"

    # User-Agent header to mimic a real browser
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    # Send the request with headers
    response = requests.get(url, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the "Position" label and its following bold span
        position_label = soup.find('span', text="Position:")

        if position_label:
            # Get the next sibling which should be the bold span with the position text
            position_tag = position_label.find_next('span', {'class': 'info-table__content info-table__content--bold'})

            if position_tag:
                # Extract the position text
                position_text = position_tag.get_text(strip=True)
                return position_text
        return None
    else:
        # Handle errors if the request fails
        print(f"Error: Received status code {response.status_code}")
        return None



In [None]:
# Lista para almacenar los datos de todos los jugadores
datos_jugadores = []

for jugador in all_players_data:

    jugador_id = jugador["player_id"]
    jugador_nombre = jugador["name"]
    nationalities = jugador["nationalities"]

    datos_mercado = market_values(jugador_id)
    positon = position(jugador_id,jugador_nombre)
    for dato in datos_mercado:
        # Extraer las columnas necesarias y renombrar
        datos_jugadores.append({
            "ID": jugador_id,
            "Player": jugador_nombre,
            "Age": dato.get("age", None),
            "Nationality": nationalities,
            "Position": positon,
            "Market Value": dato.get("y", None),
            "Date": dato.get("datum_mw", None),
            "Club": dato.get("verein", None)
        })

# Crear un DataFrame con los datos obtenidos
df = pd.DataFrame(datos_jugadores)

  position_label = soup.find('span', text="Position:")


In [None]:
df.head()

Unnamed: 0,ID,Player,Age,Nationality,Position,Market Value,Date,Club
0,418560,Erling Haaland,16,[Norway],Attack - Centre-Forward,200000,"Dec 18, 2016",Bryne FK
1,418560,Erling Haaland,17,[Norway],Attack - Centre-Forward,300000,"Dec 23, 2017",Molde FK
2,418560,Erling Haaland,18,[Norway],Attack - Centre-Forward,2000000,"Sep 10, 2018",Molde FK
3,418560,Erling Haaland,18,[Norway],Attack - Centre-Forward,5000000,"Dec 30, 2018",Molde FK
4,418560,Erling Haaland,18,[Norway],Attack - Centre-Forward,5000000,"Jun 3, 2019",Red Bull Salzburg


In [None]:
# Convert the 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Function to determine the season based on the player's market value date
def get_season(date):
    # If the month is between August (8) and December (12), it's the current season
    if date.month >= 8:
        return f"{str(date.year)[2:]}/{str(date.year + 1)[2:]}"  # Format as "YY/YY"
    # If the month is between January (1) and May (5), it's the previous season
    elif date.month <= 5:
        return f"{str(date.year - 1)[2:]}/{str(date.year)[2:]}"  # Format as "YY/YY"
    return np.nan  # Return NaN for invalid dates

# Apply the function to create a 'Season' column
df['Season'] = df['Date'].apply(get_season)

# Group by Player, Season, and other relevant columns, including Nationalities
df_season = df.groupby(['ID', 'Player', 'Age', 'Season'], as_index=False).agg(
    {
        'Market Value': 'mean',
        'Club': 'first',  # Choose the first club for each player-season combination
        'Nationality': 'first',  # Take the first occurrence of Nationalities
        'Position': 'last'
    }
)

# Round the 'Market Value' column to the nearest hundred thousand
df_season['Market Value'] = df_season['Market Value'].round(-5)


In [None]:
df_season.head()

Unnamed: 0,ID,Player,Age,Season,Market Value,Club,Nationality,Position
0,1005649,Murillo,20,22/23,500000.0,Sport Club Corinthians Paulista,[Brazil],Defender - Centre-Back
1,1005649,Murillo,21,23/24,22800000.0,Nottingham Forest,[Brazil],Defender - Centre-Back
2,1005649,Murillo,22,24/25,40000000.0,Nottingham Forest,[Brazil],Defender - Centre-Back
3,1056993,Estêvão,16,23/24,10000000.0,SE Palmeiras U17,[Brazil],Attack - Right Winger
4,1056993,Estêvão,17,24/25,40000000.0,Sociedade Esportiva Palmeiras,[Brazil],Attack - Right Winger


In [None]:
def get_injury_stats(player_id, player_name):
    # Construct the URL for the player's injury stats page
    url = f"https://www.transfermarkt.com/{player_name}/verletzungen/spieler/{player_id}/plus/1"

    # Send a GET request to the URL
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an error for bad HTTP responses

    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Check if the player has no injury data (e.g., "No entries available" is displayed)
    empty_message = soup.find('span', {'class': 'empty'})
    if empty_message and 'No entries available' in empty_message.text:
        return {}  # Return empty dictionary if no injury data is available

    # Find the second responsive table
    table = soup.find_all('div', {'class': 'responsive-table'})[1].find('table', {'class': 'items'})

    # Initialize an empty dictionary to store injury stats
    injury_stats = {}

    # Process the rows in the table body
    for row in table.find('tbody').find_all('tr'):
        cols = row.find_all('td')

        # Extract the season, days injured, number of injuries, and games missed
        season_data = cols[0].text.strip()
        days_injured = cols[1].text.strip()
        injuries = cols[2].text.strip()
        games_missed = cols[3].text.strip()

        # Store the data in a dictionary with the season as the key
        injury_stats[season_data] = {
            'days_injured': days_injured,
            'injuries': injuries,
            'games_missed': games_missed
        }

    return injury_stats


In [None]:
# Initialize new columns for the injury data
df_season['days_injured'] = 0
df_season['injuries'] = 0
df_season['games_missed'] = 0

# Create a dictionary to store the injury data per player
player_injury_data = {}

# Loop through the DataFrame and get injury data for each player
for player_id, player_name in zip(df_season['ID'], df_season['Player']):
    # Check if we've already extracted injury data for this player
    if player_id not in player_injury_data:
        # Extract injury data for the current player (only once per player)
        injury_data = get_injury_stats(player_id, player_name)

        # Store the injury data in the dictionary (this can be empty if no injury data)
        player_injury_data[player_id] = injury_data

    # Now that we have the injury data, loop through the DataFrame rows for the current player and season
    for index, row in df_season[df_season['ID'] == player_id].iterrows():
        season = row['Season']

        # If injury data exists for this season, update the DataFrame
        if season in player_injury_data[player_id]:
            df_season.loc[index, 'days_injured'] = player_injury_data[player_id][season].get('days_injured', 0)
            df_season.loc[index, 'injuries'] = player_injury_data[player_id][season].get('injuries', 0)
            df_season.loc[index, 'games_missed'] = player_injury_data[player_id][season].get('games_missed', 0)
        else:
            # If no injury data exists for this season, leave the values as 0 (default behavior)
            df_season.loc[index, 'days_injured'] = 0
            df_season.loc[index, 'injuries'] = 0
            df_season.loc[index, 'games_missed'] = 0

# The DataFrame df_season is now updated with injury data for each player and season, or 0 if no data is found


  df_season.loc[index, 'days_injured'] = player_injury_data[player_id][season].get('days_injured', 0)
  df_season.loc[index, 'injuries'] = player_injury_data[player_id][season].get('injuries', 0)
  df_season.loc[index, 'games_missed'] = player_injury_data[player_id][season].get('games_missed', 0)


In [None]:
# Replace ' days' with an empty string and handle NaN values
df_season['days_injured'] = df_season['days_injured'].str.replace(' days', '').replace('nan', '0').fillna(0)

# Convert to integers, errors='coerce' will convert invalid parsing to NaN
df_season['days_injured'] = pd.to_numeric(df_season['days_injured'], errors='coerce').astype('Int64')

df_season.head()

Unnamed: 0,ID,Player,Age,Season,Market Value,Club,Nationality,Position,days_injured,injuries,games_missed
0,1005649,Murillo,20,22/23,500000.0,Sport Club Corinthians Paulista,[Brazil],Defender - Centre-Back,0,0,0
1,1005649,Murillo,21,23/24,22800000.0,Nottingham Forest,[Brazil],Defender - Centre-Back,0,0,0
2,1005649,Murillo,22,24/25,40000000.0,Nottingham Forest,[Brazil],Defender - Centre-Back,0,0,0
3,1056993,Estêvão,16,23/24,10000000.0,SE Palmeiras U17,[Brazil],Attack - Right Winger,0,0,0
4,1056993,Estêvão,17,24/25,40000000.0,Sociedade Esportiva Palmeiras,[Brazil],Attack - Right Winger,45,2,10


In [None]:
# Function to scrape stats for each player per season
def get_player_stats_per_season(player_id, player_name, season):
    # Construct the URL for the player's stats page
    url = f"https://www.transfermarkt.com/{player_name}/leistungsdaten/spieler/{player_id}/plus/1?saison={season}"
    print(f"Fetching URL: {url}")  # Debugging: Print URL being fetched

    # Initialize dictionaries to store stats
    comps = {
            'Premier League': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'LaLiga': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'Bundesliga': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'Serie A': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'Ligue 1': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'UEFA Champions League': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'Europa League': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0},
            'Other Competitions': {'appearances': 0, 'goals': 0, 'assists': 0, 'own_goals': 0,'substitutions_on': 0, 'substitutions_off': 0,
                              'yellow_cards': 0, 'second_yellow_cards': 0,'red_cards': 0, 'penalty_goals': 0,'minutes_per_goal': 0, 'minutes_played': 0}
    }

    # Send a GET request to the URL
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad HTTP responses
        print(f"Successfully fetched data for player: {player_name}, season: {season}")  # Debugging
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {url}\nError: {e}")
        return {}

    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Locate the table
    table = soup.find('table', {'class': 'items'})
    if table is None:
        print(f"No stats table found for player: {player_name}, season: {season}")
        return comps  # Return empty stats if table is not found

    print(f"Stats table found for player: {player_name}, season: {season}")  # Debugging

    # Process the rows in the table body
    for row in table.find('tbody').find_all('tr'):
        cols = row.find_all('td')
        if len(cols) < 14:  # Ensure the row has enough columns
            print(f"Skipping row with insufficient columns: {len(cols)}")
            continue

        # Extract competition name
        competition_name = cols[1].text.strip()
        print(f"Processing competition: {competition_name}")  # Debugging

        # Extract stats
        appearances = cols[2].text.strip()
        goals = cols[3].text.strip()
        assists = cols[4].text.strip()
        own_goals = cols[5].text.strip()
        substitutions_on = cols[6].text.strip()
        substitutions_off = cols[7].text.strip()
        yellow_cards = cols[8].text.strip()
        second_yellow_cards = cols[9].text.strip()
        red_cards = cols[10].text.strip()
        penalty_goals = cols[11].text.strip()
        minutes_per_goal = cols[12].text.strip().replace("'", "")
        minutes_played = cols[13].text.strip().replace("'", "")

        # Debugging: Print extracted stats
        print(f"Extracted stats for {competition_name}: Appearances={appearances}, Goals={goals}, Assists={assists}")

        # Ensure the competition exists in the dictionary
        if competition_name not in comps:
            print(f"Competition not found in dictionary, categorizing as 'Other Competitions'")
            competition_name = 'Other Competitions'

        # Update the stats for the competition
        comps[competition_name]['appearances'] += int(appearances) if appearances.isdigit() else 0
        comps[competition_name]['goals'] += int(goals) if goals.isdigit() else 0
        comps[competition_name]['assists'] += int(assists) if assists.isdigit() else 0
        comps[competition_name]['own_goals'] += int(own_goals) if own_goals.isdigit() else 0
        comps[competition_name]['substitutions_on'] += int(substitutions_on) if substitutions_on.isdigit() else 0
        comps[competition_name]['substitutions_off'] += int(substitutions_off) if substitutions_off.isdigit() else 0
        comps[competition_name]['yellow_cards'] += int(yellow_cards) if yellow_cards.isdigit() else 0
        comps[competition_name]['second_yellow_cards'] += int(second_yellow_cards) if second_yellow_cards.isdigit() else 0
        comps[competition_name]['red_cards'] += int(red_cards) if red_cards.isdigit() else 0
        comps[competition_name]['penalty_goals'] += int(penalty_goals) if penalty_goals.isdigit() else 0
        comps[competition_name]['minutes_per_goal'] += int(minutes_per_goal) if minutes_per_goal.isdigit() else 0
        comps[competition_name]['minutes_played'] += int(minutes_played) if minutes_played.isdigit() else 0

    print(f"Completed processing for player: {player_name}, season: {season}")  # Debugging
    return comps



# Function to extract the first year from the season string in 'YY/YY' format
def extract_first_year(season_str):
    first_year_short = season_str.split('/')[0]
    first_year_full = int('20' + first_year_short)
    return first_year_full


# Function to add necessary columns dynamically to the DataFrame
def add_columns_for_comps(df, comps):
    for comp, stats in comps.items():
        for stat in stats.keys():
            # Construct column name in the format "competition_stat"
            column_name = f"{comp.lower().replace(' ', '_')}_{stat}"
            if column_name not in df.columns:
                df[column_name] = 0  # Initialize the column with 0
    return df

# Function to update a DataFrame with the stats
def update_dataframe_with_stats(df):
    for index, row in df.iterrows():
        player_id = row['ID']
        player_name = row['Player']
        season = extract_first_year(row['Season'])

        # Get stats for the player in the season
        comps = get_player_stats_per_season(player_id, player_name, season)

        # Update DataFrame with stats for each competition
        for comp, stats in comps.items():
            for stat, value in stats.items():
                # Construct column name and update value
                column_name = f"{comp.lower().replace(' ', '_')}_{stat}"
                df.at[index, column_name] = value

In [None]:
# Update the dataframe with stats
update_dataframe_with_stats(df_season)

Fetching URL: https://www.transfermarkt.com/Murillo/leistungsdaten/spieler/1005649/plus/1?saison=2022
Successfully fetched data for player: Murillo, season: 2022
Stats table found for player: Murillo, season: 2022
Processing competition: Série A
Extracted stats for Série A: Appearances=13, Goals=-, Assists=-
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: Copa do Brasil
Extracted stats for Copa do Brasil: Appearances=7, Goals=-, Assists=-
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: Libertadores
Extracted stats for Libertadores: Appearances=4, Goals=-, Assists=-
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: Copinha
Extracted stats for Copinha: Appearances=4, Goals=-, Assists=-
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: Copa Sudamericana
Extracted stats for Copa Sudamericana: Appe

  df.at[index, column_name] = value
  df.at[index, column_name] = value
  df.at[index, column_name] = value
  df.at[index, column_name] = value


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Successfully fetched data for player: Cole Palmer, season: 2023
Stats table found for player: Cole Palmer, season: 2023
Processing competition: Premier League
Extracted stats for Premier League: Appearances=34, Goals=22, Assists=11
Processing competition: FA Cup
Extracted stats for FA Cup: Appearances=6, Goals=1, Assists=2
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: EFL Cup
Extracted stats for EFL Cup: Appearances=6, Goals=2, Assists=2
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: UEFA Super Cup
Extracted stats for UEFA Super Cup: Appearances=1, Goals=1, Assists=-
Competition not found in dictionary, categorizing as 'Other Competitions'
Processing competition: Community Shield
Extracted stats for Community Shield: Appearances=1, Goals=1, Assists=-
Competition not found in dictionary, categorizing as 'Other Competition

In [None]:
df_season.head()

Unnamed: 0,ID,Player,Age,Season,Market Value,Club,Nationality,Position,days_injured,injuries,...,other_competitions_assists,other_competitions_own_goals,other_competitions_substitutions_on,other_competitions_substitutions_off,other_competitions_yellow_cards,other_competitions_second_yellow_cards,other_competitions_red_cards,other_competitions_penalty_goals,other_competitions_minutes_per_goal,other_competitions_minutes_played
0,1005649,Murillo,20,22/23,500000.0,Sport Club Corinthians Paulista,[Brazil],Defender - Centre-Back,0,0,...,0.0,1.0,1.0,3.0,7.0,0.0,0.0,0.0,0.0,1544.0
1,1005649,Murillo,21,23/24,22800000.0,Nottingham Forest,[Brazil],Defender - Centre-Back,0,0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,372.0
2,1005649,Murillo,22,24/25,40000000.0,Nottingham Forest,[Brazil],Defender - Centre-Back,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1056993,Estêvão,16,23/24,10000000.0,SE Palmeiras U17,[Brazil],Attack - Right Winger,0,0,...,12.0,0.0,12.0,30.0,8.0,0.0,0.0,6.0,819.0,1025.0
4,1056993,Estêvão,17,24/25,40000000.0,Sociedade Esportiva Palmeiras,[Brazil],Attack - Right Winger,45,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
df_season.shape

(2264, 107)

In [None]:
df_season.to_csv('df_season.csv', index=False)

In [None]:
# prompt: get the unique players in the column player of df_season

unique_players = df_season['Player'].unique()
len(unique_players)

250