In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import unicodedata
import matplotlib.pyplot as plt

In [2]:
#function to parse team page on Transfermarket
def get_player_stats(url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    table_body = soup.select_one('#yw1 > table > tbody')
    rows = table_body.find_all('tr', class_=['odd', 'even'])
    
    players = []
    tbody = soup.select_one('#yw1 > table > tbody')
    rows = tbody.find_all('tr', class_=['odd', 'even'])
    
    for row in rows:
        cols = row.find_all('td')
        name_tag = cols[1].select_one('table > tbody > tr:nth-child(1) > td.hauptlink > div:nth-child(1) > span > a')
        position_tag = cols[1].select_one('table > tbody > tr:nth-child(1) > td.zentriert.rueckennummer.bg_Torwart')
        country_tag = cols[3].find('img')
        
        player = {
            'Player': name_tag['title'].strip() if name_tag else '',
            'Position': position_tag.text.strip() if position_tag else '',
            'Kit Number': cols[2].text.strip(),
            'Nationality': country_tag['alt'].strip() if country_tag else '',
            'In Squad': cols[4].text.strip(),
            'Appearances': cols[5].text.strip(),
            'Goals': cols[6].text.strip(),
            'Assists': cols[7].text.strip(),
            'Yellow Cards': cols[8].text.strip(),
            'Second Yellows': cols[9].text.strip(),
            'Straight Reds': cols[10].text.strip(),
            'Substituted On': cols[11].text.strip(),
            'Substituted Off': cols[12].text.strip(),
            'PPG': cols[13].text.strip(),
            'Minutes Played': cols[14].text.strip()
        }
        
        #handle "Not used during this season" cases
        if 'Not used during this season' in cols[5].text.strip():
            player.update({
                'Appearances': '0',
                'Goals': '0',
                'Assists': '0',
                'Yellow Cards': '0',
                'Second Yellows': '0',
                'Straight Reds': '0',
                'Substituted On': '0',
                'Substituted Off': '0',
                'PPG': '0',
                'Minutes Played': '0'
            })
        
        players.append(player)
    
    return players

#example URL for Real Madrid 2023 season
url = 'https://www.transfermarkt.com/real-madrid/leistungsdaten/verein/418/plus/1?reldata=%262023'
player_stats = get_player_stats(url)

#convert to DataFrame
df = pd.DataFrame(player_stats)
df.head(30)

Unnamed: 0,Player,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,Second Yellows,Straight Reds,Substituted On,Substituted Off,PPG,Minutes Played
0,,,,,Goalkeeper,31,,8,5,-,-,-,-,-,-
1,,,,,Goalkeeper,24,,55,31,-,-,2,-,-,-
2,,,,,Goalkeeper,28,,50,20,-,-,2,-,-,1
3,,,,,Goalkeeper,21,,8,Not used during this season,-,-,-,-,-,-
4,,,,,Goalkeeper,21,,3,Not used during this season,-,-,-,-,-,-
5,,,,,Goalkeeper,19,,17,Not used during this season,-,-,-,-,-,-
6,,,,,Goalkeeper,18,,21,Not used during this season,-,-,-,-,-,-
7,,,,,Centre-Back,25,,15,13,-,-,-,-,-,7
8,,,,,Centre-Back,30,,50,48,2,3,8,-,-,4
9,,,,,Left-Back,28,,44,37,1,-,6,-,-,4


In [6]:
#iterable version I can use for different teams and different seasons
def get_player_stats_redo(base_url, team_name, team_id, season_year):
    headers = {'User-Agent': 'Mozilla/5.0'}
    url = f"{base_url}/{team_name}/leistungsdaten/verein/{team_id}/plus/1?reldata=%26{season_year}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    match = re.search(r'reldata=%26(\d{4})', url)
    if match:
        year = int(match.group(1))
        season = f"{year}/{year + 1}"
    else:
        season = 'Unknown'

    #take the club name from the page header or URL (assuming it's in the URL for simplicity)
    club_name = soup.find('h1').text.strip() if soup.find('h1') else 'Unknown'

    
    players = []
    tbody = soup.select_one('#yw1 > table > tbody')
    rows = tbody.find_all('tr', class_=['odd', 'even'])
    
    for row in rows:
        cols = row.find_all('td')
        
        #taking the player name from the specific span class and a tag
        name_tag = row.select_one('span.hide-for-small > a')
        player_name = name_tag.text.strip() if name_tag else ''
            
        #taking the position
        position = cols[4].text.strip() if len(cols) > 4 else ''
        
        #taking the kit number
        kit_number = cols[0].text.strip() if len(cols) > 0 else ''
        
        #taking the nationality using the correct selector
        nationality_tag = row.select_one('td:nth-child(4) > img')
        nationality = nationality_tag['title'].strip() if nationality_tag else ''

        age = cols[5].text.strip() if len(cols) > 5 else ''
        
        #taking other statistics
        in_squad = cols[7].text.strip() if len(cols) > 7 else ''
        appearances = cols[8].text.strip() if len(cols) > 8 else ''
        goals = cols[9].text.strip() if len(cols) > 9 else ''
        assists = cols[10].text.strip() if len(cols) > 10 else ''
        yellow_cards = cols[11].text.strip() if len(cols) > 11 else ''
        second_yellows = cols[12].text.strip() if len(cols) > 12 else ''
        straight_reds = cols[13].text.strip() if len(cols) > 13 else ''
        substituted_on = cols[14].text.strip() if len(cols) > 14 else ''
        substituted_off = cols[15].text.strip() if len(cols) > 14 else ''
        ppg = cols[16].text.strip() if len(cols) > 14 else ''
        minutes_played = cols[17].text.strip() if len(cols) > 14 else ''
        
        #handle "Not used during this season" cases
        if 'Not used during this season' in appearances:
            appearances = '0'
            goals = '0'
            assists = '0'
            yellow_cards = '0'
            second_yellows = '0'
            straight_reds = '0'
            substituted_on = '0'
            substituted_off = '0'
            ppg = '0'
            minutes_played = '0'
        
        player = {
            'Player': player_name,
            'Age': age,
            'Position': position,
            'Kit Number': kit_number,
            'Nationality': nationality,
            'In Squad': in_squad,
            'Appearances': appearances,
            'Goals': goals,
            'Assists': assists,
            'Yellow Cards': yellow_cards,
            'Second Yellows': second_yellows,
            'Straight Reds': straight_reds,
            'Substituted On': substituted_on,
            'Substituted Off': substituted_off,
            'PPG': ppg,
            'Minutes Played': minutes_played,
            'Club': club_name,
            'Season': season
        }
        
        players.append(player)
    
    return players

In [7]:
#example usage for Real Madrid 2023 season
base_url = 'https://www.transfermarkt.com'
team_name = 'real-madrid'
team_id = 418
season_year = 2022
player_stats = get_player_stats_redo(base_url, team_name, team_id, season_year)

#convert to DataFrame
df = pd.DataFrame(player_stats)
#to check results
df.head(50)

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,Second Yellows,Straight Reds,Substituted On,Substituted Off,PPG,Minutes Played,Club,Season
0,Thibaut Courtois,30,Goalkeeper,1,Belgium,50,49,-,-,1,-,-,-,-,2.14,4.470',Real Madrid,2022/2023
1,Andriy Lunin,23,Goalkeeper,13,Ukraine,59,12,-,-,-,-,-,-,-,2.17,1.080',Real Madrid,2022/2023
2,Lucas Cañizares,20,Goalkeeper,30,Spain,11,0,0,0,0,0,0,0,0,0.0,0,Real Madrid,2022/2023
3,Mario de Luis,20,Goalkeeper,43,Spain,1,0,0,0,0,0,0,0,0,0.0,0,Real Madrid,2022/2023
4,Luis López,21,Goalkeeper,26,Spain,52,0,0,0,0,0,0,0,0,0.0,0,Real Madrid,2022/2023
5,Diego Piñeiro,18,Goalkeeper,38,Spain,1,0,0,0,0,0,0,0,0,0.0,0,Real Madrid,2022/2023
6,Fran González,17,Goalkeeper,44,Spain,1,0,0,0,0,0,0,0,0,0.0,0,Real Madrid,2022/2023
7,Éder Militão,24,Centre-Back,3,Brazil,55,51,7,1,9,-,-,3,6,2.12,4.231',Real Madrid,2022/2023
8,Antonio Rüdiger,29,Centre-Back,22,Germany,58,53,2,-,2,-,-,14,2,2.08,3.848',Real Madrid,2022/2023
9,Ferland Mendy,27,Left-Back,23,France,36,28,-,1,4,-,-,3,13,2.25,2.064',Real Madrid,2022/2023


In [5]:
#function to get a team for multiple years
def collect_data_for_multiple_seasons(base_url, team_name, team_id, season_years):
    all_players = []
    
    for year in season_years:
        player_stats = get_player_stats_redo(base_url, team_name, team_id, year)
        all_players.extend(player_stats)
    
    #convert to DataFrame
    df = pd.DataFrame(all_players)
    
    #save to CSV
    df.to_csv(f'{team_name}_multiple_seasons_stats.csv', index=False)
    return df

In [8]:
#to check results
base_url = 'https://www.transfermarkt.com'
team_name = 'fc-barcelona'
team_id = 131
season_years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]  # List of specific seasons you want to collect data for

df = collect_data_for_multiple_seasons(base_url, team_name, team_id, season_years)
df.head(100)

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,Second Yellows,Straight Reds,Substituted On,Substituted Off,PPG,Minutes Played,Club,Season
0,Rubén Miño,21,Goalkeeper,31,Spain,8,1,-,-,-,-,-,-,-,000,90',FC Barcelona,2010/2011
1,Oier Olazábal,20,Goalkeeper,38,Spain,7,0,0,0,0,0,0,0,0,0,0,FC Barcelona,2010/2011
2,Víctor Valdés,28,Goalkeeper,1,Spain,54,44,-,-,8,-,-,-,-,2.45,3.960',FC Barcelona,2010/2011
3,José Manuel Pinto,34,Goalkeeper,13,Spain,55,17,-,-,2,-,1,-,-,2.24,1.560',FC Barcelona,2010/2011
4,Marc Bartra,19,Centre-Back,32,Spain,9,5,1,-,1,-,-,1,-,2.60,416',FC Barcelona,2010/2011
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Alexis Sánchez,23,Centre-Forward,9,Chile,52,46,11,14,6,-,-,18,17,2.35,2.623',FC Barcelona,2012/2013
96,Cristian Tello,20,Left Winger,37,Spain,50,34,8,6,-,-,-,18,8,2.50,1.633',FC Barcelona,2012/2013
97,Pedro,24,Right Winger,17,Spain,53,45,10,11,6,-,-,6,22,2.20,3.414',FC Barcelona,2012/2013
98,Gerard Deulofeu,18,Centre-Forward,27,Spain,7,4,-,-,-,-,-,4,-,2.50,62',FC Barcelona,2012/2013


In [11]:
#Getting league tables so I have a list of teams to collect data for as well as their ranking
# Define the URL
url = "https://www.transfermarkt.com/laliga/tabelle/wettbewerb/ES1?saison_id=2023"

# take the year from the URL using regex
match = re.search(r'saison_id=(\d{4})', url)
if match:
    year = int(match.group(1))
    season = f"{year}/{year + 1}"
else:
    raise ValueError("Year not found in the URL")

# Set up headers to mimic a browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# Fetch the HTML content with headers
response = requests.get(url, headers=headers)
response.raise_for_status()  # Ensure we notice bad responses

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table containing the team data
table = soup.select_one('#yw1 > table > tbody')

# take the required data
team_data = []

if table:
    rows = table.find_all('tr')
    for row in rows:
        try:
            # Team name and ID
            team_anchor = row.select_one('td.no-border-links.hauptlink > a')
            team_name = team_anchor['title']
            
            # take team ID from the href attribute using regex
            team_href = team_anchor['href']
            team_id_match = re.search(r'/verein/(\d+)/', team_href)
            team_id = team_id_match.group(1) if team_id_match else 'N/A'

            # take the team link name
            team_link_name = team_href.split('/')[1]

            # Team placement
            team_placement = row.select_one('td.rechts.hauptlink').text.strip()

            team_data.append({
                'Team Name': team_name,
                'Team ID': team_id,
                'Placement': team_placement,
                'Season': season,
                'Team Link Name': team_link_name
            })
        except AttributeError as e:
            print(f"Error taking data for a row: {e}")

# Convert the data to a DataFrame
placement_id_df = pd.DataFrame(team_data)

In [12]:
#Testing previous cell
placement_id_df.head(20)

Unnamed: 0,Team Name,Team ID,Placement,Season,Team Link Name
0,Real Madrid,418,1,2023/2024,real-madrid
1,FC Barcelona,131,2,2023/2024,fc-barcelona
2,Girona FC,12321,3,2023/2024,fc-girona
3,Atlético de Madrid,13,4,2023/2024,atletico-madrid
4,Athletic Bilbao,621,5,2023/2024,athletic-bilbao
5,Real Sociedad,681,6,2023/2024,real-sociedad-san-sebastian
6,Real Betis Balompié,150,7,2023/2024,real-betis-sevilla
7,Villarreal CF,1050,8,2023/2024,fc-villarreal
8,Valencia CF,1049,9,2023/2024,fc-valencia
9,Deportivo Alavés,1108,10,2023/2024,deportivo-alaves


In [13]:
##same as previous function but loop for all the season I need
#define the base URL and headers
base_url = "https://www.transfermarkt.com/laliga/tabelle/wettbewerb/ES1?saison_id={year}"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

#initialize an empty DataFrame to store all the data
laliga_standings = pd.DataFrame(columns=['Team Name', 'Team ID', 'Placement', 'Season', 'Team Link Name'])

#loop over each year from 2023 to 2008
for year in range(2023, 2007, -1):
    url = base_url.format(year=year)
    
    #take the season string
    season = f"{year}/{year + 1}"
    
    #get the HTML content with headers
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    
    #parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')
    
    #find the table containing the team data
    table = soup.select_one('#yw1 > table > tbody')
    
    #take the required data
    team_data = []
    
    if table:
        rows = table.find_all('tr')
        for row in rows:
            try:
                #team name and ID
                team_anchor = row.select_one('td.no-border-links.hauptlink > a')
                team_name = team_anchor['title']
                
                #take team ID from the href attribute using regex
                team_href = team_anchor['href']
                team_id_match = re.search(r'/verein/(\d+)/', team_href)
                team_id = team_id_match.group(1) if team_id_match else 'N/A'
    
                #take the team link name
                team_link_name = team_href.split('/')[1]
    
                #team placement
                team_placement = row.select_one('td.rechts.hauptlink').text.strip()
    
                team_data.append({
                    'Team Name': team_name,
                    'Team ID': team_id,
                    'Placement': team_placement,
                    'Season': season,
                    'Team Link Name': team_link_name
                })
            except AttributeError as e:
                print(f"Error extacting data for a row: {e}")
    
    #convert the data to a DataFrame and append to the main DataFrame
    year_df = pd.DataFrame(team_data)
    laliga_standings = pd.concat([laliga_standings, year_df], ignore_index=True)

In [14]:
#testing previous cell
laliga_standings.tail(60)

Unnamed: 0,Team Name,Team ID,Placement,Season,Team Link Name
260,FC Barcelona,131,1,2010/2011,fc-barcelona
261,Real Madrid,418,2,2010/2011,real-madrid
262,Valencia CF,1049,3,2010/2011,fc-valencia
263,Villarreal CF,1050,4,2010/2011,fc-villarreal
264,Sevilla FC,368,5,2010/2011,fc-sevilla
265,Athletic Bilbao,621,6,2010/2011,athletic-bilbao
266,Atlético de Madrid,13,7,2010/2011,atletico-madrid
267,RCD Espanyol Barcelona,714,8,2010/2011,espanyol-barcelona
268,CA Osasuna,331,9,2010/2011,ca-osasuna
269,Sporting Gijón,2448,10,2010/2011,sporting-gijon


In [15]:
#define the base URL and headers
base_url = "https://www.transfermarkt.com/{league}/tabelle/wettbewerb/{competition}?saison_id={year}"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

#list of leagues and their respective competition codes
leagues = {
    'laliga': 'ES1',
    'premier-league': 'GB1',
    'bundesliga': 'L1',
    'serie-a': 'IT1',
    'ligue-1': 'FR1'
}

#initialize a dictionary to store DataFrames for each league
league_dfs = {league: pd.DataFrame(columns=[
    'Team Name', 'Team ID', 'Placement', 'Season', 'Team Link Name', 'League', 'Champions League', 'Domestic Cup']) 
    for league in leagues}

#loop over each league
for league, competition in leagues.items():
    #loop over each year from 2023 to 2008
    for year in range(2023, 2007, -1):
        url = base_url.format(league=league, competition=competition, year=year)
        
        #take the season string
        season = f"{year}/{year + 1}"
        
        #fetch the HTML content with headers
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Ensure we notice bad responses
        
        #parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        #find the table containing the team data
        table = soup.select_one('#yw1 > table > tbody')
        
        #take the required data
        team_data = []
        
        if table:
            rows = table.find_all('tr')
            for row in rows:
                try:
                    #team name and ID
                    team_anchor = row.select_one('td.no-border-links.hauptlink > a')
                    team_name = team_anchor['title']
                    
                    #take team ID from the href attribute using regex
                    team_href = team_anchor['href']
                    team_id_match = re.search(r'/verein/(\d+)/', team_href)
                    team_id = team_id_match.group(1) if team_id_match else 'N/A'
        
                    #take the team link name
                    team_link_name = team_href.split('/')[1]
        
                    #team placement
                    team_placement = row.select_one('td.rechts.hauptlink').text.strip()
        
                    team_data.append({
                        'Team Name': team_name,
                        'Team ID': team_id,
                        'Placement': team_placement,
                        'Season': season,
                        'Team Link Name': team_link_name,
                        'League': league,
                        'Champions League': 0,  # Default to 0
                        'Domestic Cup': 0       # Default to 0
                    })
                except AttributeError as e:
                    print(f"Error extracting data for a row: {e}")
        
        #convert the data to a DataFrame and append to the league's DataFrame
        year_df = pd.DataFrame(team_data)
        league_dfs[league] = pd.concat([league_dfs[league], year_df], ignore_index=True)

In [17]:
#testing previous cell
league_dfs['ligue-1'].tail(60)

Unnamed: 0,Team Name,Team ID,Placement,Season,Team Link Name,League,Champions League,Domestic Cup
258,LOSC Lille,1082,1,2010/2011,losc-lille,ligue-1,0,0
259,Olympique Marseille,244,2,2010/2011,olympique-marseille,ligue-1,0,0
260,Olympique Lyon,1041,3,2010/2011,olympique-lyon,ligue-1,0,0
261,Paris Saint-Germain,583,4,2010/2011,fc-paris-saint-germain,ligue-1,0,0
262,FC Sochaux-Montbéliard,750,5,2010/2011,fc-sochaux-montbeliard,ligue-1,0,0
263,Stade Rennais FC,273,6,2010/2011,fc-stade-rennes,ligue-1,0,0
264,FC Girondins Bordeaux,40,7,2010/2011,fc-girondins-bordeaux,ligue-1,0,0
265,FC Toulouse,415,8,2010/2011,fc-toulouse,ligue-1,0,0
266,AJ Auxerre,290,9,2010/2011,aj-auxerre,ligue-1,0,0
267,AS Saint-Étienne,618,10,2010/2011,as-saint-etienne,ligue-1,0,0


In [18]:
#define the base URL for player data scraping
player_data_base_url = 'https://www.transfermarkt.com'

# Function to get player stats
def get_player_stats_comprehensive(player_data_base_url, team_name, team_id, season_year, placement, league, champions_league, domestic_cup):
    headers = {'User-Agent': 'Mozilla/5.0'}
    url = f"{player_data_base_url}/{team_name}/leistungsdaten/verein/{team_id}/plus/1?reldata=%26{season_year}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    match = re.search(r'reldata=%26(\d{4})', url)
    if match:
        year = int(match.group(1))
        season = f"{year}/{year + 1}"
    else:
        season = 'Unknown'

    club_name = soup.find('h1').text.strip() if soup.find('h1') else 'Unknown'
    
    players = []
    tbody = soup.select_one('#yw1 > table > tbody')
    
    if not tbody:
        print(f"No table body found for URL: {url}")
        return players
    
    rows = tbody.find_all('tr', class_=['odd', 'even'])
    
    for row in rows:
        cols = row.find_all('td')
        
        name_tag = row.select_one('span.hide-for-small > a')
        player_name = name_tag.text.strip() if name_tag else ''
            
        position = cols[4].text.strip() if len(cols) > 4 else ''
        kit_number = cols[0].text.strip() if len(cols) > 0 else ''
        nationality_tag = row.select_one('td:nth-child(4) > img')
        nationality = nationality_tag['title'].strip() if nationality_tag else ''

        age = cols[5].text.strip() if len(cols) > 5 else ''
        in_squad = cols[7].text.strip() if len(cols) > 7 else ''
        appearances = cols[8].text.strip() if len(cols) > 8 else ''
        goals = cols[9].text.strip() if len(cols) > 9 else ''
        assists = cols[10].text.strip() if len(cols) > 10 else ''
        yellow_cards = cols[11].text.strip() if len(cols) > 11 else ''
        second_yellows = cols[12].text.strip() if len(cols) > 12 else ''
        straight_reds = cols[13].text.strip() if len(cols) > 13 else ''
        substituted_on = cols[14].text.strip() if len(cols) > 14 else ''
        substituted_off = cols[15].text.strip() if len(cols) > 14 else ''
        ppg = cols[16].text.strip() if len(cols) > 14 else ''
        minutes_played = cols[17].text.strip() if len(cols) > 14 else ''
        
        if 'Not used during this season' in appearances:
            appearances = '0'
            goals = '0'
            assists = '0'
            yellow_cards = '0'
            second_yellows = '0'
            straight_reds = '0'
            substituted_on = '0'
            substituted_off = '0'
            ppg = '0'
            minutes_played = '0'
        
        player = {
            'Player': player_name,
            'Age': age,
            'Position': position,
            'Kit Number': kit_number,
            'Nationality': nationality,
            'In Squad': in_squad,
            'Appearances': appearances,
            'Goals': goals,
            'Assists': assists,
            'Yellow Cards': yellow_cards,
            'Second Yellows': second_yellows,
            'Straight Reds': straight_reds,
            'Substituted On': substituted_on,
            'Substituted Off': substituted_off,
            'PPG': ppg,
            'Minutes Played': minutes_played,
            'Club': club_name, 
            'Season': season,
            'Team': team_name,
            'Placement': placement,
            'League': league,
            'Champions League': champions_league,
            'Domestic Cup': domestic_cup
        }
        
        players.append(player)
    
    return players

#initialize a comprehensive df to store all player data
comprehensive_df = pd.DataFrame(columns=[
    'Player', 'Age', 'Position', 'Kit Number', 'Nationality', 'In Squad', 'Appearances', 'Goals', 
    'Assists', 'Yellow Cards', 'Second Yellows', 'Straight Reds', 'Substituted On', 'Substituted Off', 
    'PPG', 'Minutes Played', 'Club', 'Season', 'Team', 'Placement', 'League', 'Champions League', 'Domestic Cup'
])

#loop over each league df to get player data
for league, df in league_dfs.items():
    grouped_df = df.groupby('Season')
    for season, group in grouped_df:
        for index, row in group.iterrows():
            team_name = row['Team Link Name']
            team_id = row['Team ID']
            season_year = int(season.split('/')[0])
            champions_league = row['Champions League']
            domestic_cup = row['Domestic Cup']
            placement = row['Placement']
            player_stats = get_player_stats_comprehensive(
                player_data_base_url, team_name, team_id, season_year, placement, league, champions_league, domestic_cup
            )
            comprehensive_df = pd.concat([comprehensive_df, pd.DataFrame(player_stats)], ignore_index=True)

In [19]:
#save the df to a CSV file
comprehensive_df.to_csv('comprehensive_df.csv', index=False)

In [20]:
comprehensive_df.describe()

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,...,Substituted Off,PPG,Minutes Played,Club,Season,Team,Placement,League,Champions League,Domestic Cup
count,58045.0,58045,58045,58045,58045,58045,58045,58045,58045,58045,...,58045,58045,58045,58045,58045,58045,58045,58045,58045,58045
unique,16160.0,34,16,100,166,67,66,61,34,24,...,41,282,4421,191,16,191,20,5,1,1
top,,19,Centre-Back,-,Spain,1,0,-,-,-,...,-,0,0,Udinese Calcio,2018/2019,udinese-calcio,16,serie-a,0,0
freq,1463.0,4267,10721,9720,7830,4121,7679,27144,26687,16638,...,15092,11323,7679,724,3915,724,2995,13681,58045,58045


In [28]:
###Some data cleaning
#Replace '-' with 0 so we can make features numerical later
comprehensive_df.replace('-', '0', inplace=True)

#getting rid of the the period and using a comma because so we can properly treat the value as a number
#comment out for rerun as this will give an error if successfully made into float
#comprehensive_df['Minutes Played'] = comprehensive_df['Minutes Played'].str.replace('.', ',')

#Change it to a float so its numerical
#comment out for rerun as this will give an error if successfully made into float
#comprehensive_df['Minutes Played'] = comprehensive_df['Minutes Played'].str.replace("'", "").str.replace(",", "").astype(float)

In [25]:
comprehensive_df.head()

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,...,Substituted Off,PPG,Minutes Played,Club,Season,Team,Placement,League,Champions League,Domestic Cup
0,Oier Olazábal,18,Goalkeeper,0,Spain,2,1,0,0,0,...,0,0.0,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,0,0
1,Víctor Valdés,26,Goalkeeper,1,Spain,52,49,0,0,3,...,0,2.35,4410.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,0,0
2,José Manuel Pinto,32,Goalkeeper,13,Spain,51,11,0,0,1,...,0,2.18,990.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,0,0
3,Albert Jorquera,29,Goalkeeper,25,Spain,20,1,0,0,0,...,0,0.0,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,0,0
4,Andreu Fontàs,18,Centre-Back,0,Spain,1,0,0,0,0,...,0,0.0,0.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,0,0


In [29]:
#updating Champions league winner column
champions_league_winners = {
    '2023/2024': 'real-madrid',
    '2022/2023': 'manchester-city',
    '2021/2022': 'real-madrid',
    '2020/2021': 'fc-chelsea',
    '2019/2020': 'fc-bayern-munchen',
    '2018/2019': 'fc-liverpool',
    '2017/2018': 'real-madrid',
    '2016/2017': 'real-madrid',
    '2015/2016': 'real-madrid',
    '2014/2015': 'fc-barcelona',
    '2013/2014': 'real-madrid',
    '2012/2013': 'fc-bayern-munchen',
    '2011/2012': 'fc-chelsea',
    '2010/2011': 'fc-barcelona',
    '2009/2010': 'inter-mailand',
    '2008/2009': 'fc-barcelona'
}

#function to update the Champions League column
def update_champions_league_winners(df, winners_dict):
    for season, winning_team in winners_dict.items():
        #update the Champions League column
        df.loc[(df['Season'] == season) & (df['Team'] == winning_team), 'Champions League'] = 1
    return df

comprehensive_df = update_champions_league_winners(comprehensive_df, champions_league_winners)

In [32]:
#incremental check

def search_team_season_records(df, team_name, season_year):
    """
    Search for player records of a specific team in a specific season.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to search within.
    team_name (str): The name of the team.
    season_year (str): The season year (e.g., '2022/2023').
    
    Returns:
    pd.DataFrame: Filtered DataFrame containing records for the specified team and season.
    """
    #filter the df
    records = df[(df['Team'] == team_name) & (df['Season'] == season_year)]
    
    #display
    print(records)
    
    return records

#test
team_name = 'fc-chelsea'
season_year = '2020/2021'
team_season_records = search_team_season_records(comprehensive_df, team_name, season_year)

                    Player Age            Position Kit Number    Nationality  \
20090                       25          Goalkeeper          1          Spain   
20091        Edouard Mendy  28          Goalkeeper         16        Senegal   
20092        Nathan Baxter  21          Goalkeeper         31        England   
20093          Karlo Ziger  19          Goalkeeper         40        Croatia   
20094       Jamal Blackman  26          Goalkeeper          0        England   
20095      Willy Caballero  38          Goalkeeper         13      Argentina   
20096  Andreas Christensen  24         Centre-Back          4        Denmark   
20097        Fikayo Tomori  22         Centre-Back         14        England   
20098          Reece James  20          Right-Back         24        England   
20099      Tino Livramento  17          Right-Back         57        England   
20100         Ben Chilwell  23           Left-Back         21        England   
20101      Antonio Rüdiger  27         C

In [37]:
#if you wish to specifically verify UCL Winners
def search_champions_league_winners(df):
    """
    Search for player records where the 'Champions League' column is 1.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to search within.
    
    Returns:
    pd.DataFrame: Filtered DataFrame containing records where the 'Champions League' column is 1.
    """
    #filter the df
    records = df[df['Champions League'] == 1]
    
    
    return records

#test
champions_league_winners_df = search_champions_league_winners(comprehensive_df)

champions_league_winners_df.head(50)

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,...,Substituted Off,PPG,Minutes Played,Club,Season,Team,Placement,League,Champions League,Domestic Cup
0,Oier Olazábal,18,Goalkeeper,0,Spain,2,1,0,0,0,...,0,0.0,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
1,Víctor Valdés,26,Goalkeeper,1,Spain,52,49,0,0,3,...,0,2.35,4410.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
2,José Manuel Pinto,32,Goalkeeper,13,Spain,51,11,0,0,1,...,0,2.18,990.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
3,Albert Jorquera,29,Goalkeeper,25,Spain,20,1,0,0,0,...,0,0.0,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
4,Andreu Fontàs,18,Centre-Back,0,Spain,1,0,0,0,0,...,0,0.0,0.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
5,Alberto Botía,19,Centre-Back,0,Spain,5,1,0,0,0,...,0,1.0,27.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
6,Martín Cáceres,21,Centre-Back,2,Uruguay,51,23,0,0,3,...,0,2.13,1523.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
7,Marc Muniesa,16,Centre-Back,0,Spain,2,1,0,0,0,...,0,0.0,31.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
8,Gerard Piqué,21,Centre-Back,3,Spain,50,45,3,1,7,...,1,2.27,3932.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0
9,Rafa Márquez,29,Centre-Back,4,Mexico,41,37,3,3,7,...,8,2.49,3020.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,0


In [38]:
#domestic cup winners
fa_cup_winners = {
    '2023/2024': 'manchester-united',
    '2022/2023': 'manchester-city',
    '2021/2022': 'fc-liverpool',
    '2020/2021': 'leicester-city',
    '2019/2020': 'fc-arsenal',
    '2018/2019': 'manchester-city',
    '2017/2018': 'fc-chelsea',
    '2016/2017': 'fc-arsenal',
    '2015/2016': 'manchester-united',
    '2014/2015': 'fc-arsenal',
    '2013/2014': 'fc-arsenal',
    '2012/2013': 'wigan-athletic',
    '2011/2012': 'fc-chelsea',
    '2010/2011': 'manchester-city',
    '2009/2010': 'fc-chelsea',
    '2008/2009': 'fc-chelsea'
}

dfb_pokal_winners = {
    '2023/2024': 'bayer-04-leverkusen',
    '2022/2023': 'rasenballsport-leipzig',
    '2021/2022': 'rasenballsport-leipzig',
    '2020/2021': 'borussia-dortmund',
    '2019/2020': 'fc-bayern-munchen',
    '2018/2019': 'fc-bayern-munchen',
    '2017/2018': 'eintracht-frankfurt',
    '2016/2017': 'borussia-dortmund',
    '2015/2016': 'fc-bayern-munchen',
    '2014/2015': 'vfl-wolfsburg',
    '2013/2014': 'fc-bayern-munchen',
    '2012/2013': 'fc-bayern-munchen',
    '2011/2012': 'borussia-dortmund',
    '2010/2011': 'fc-schalke-04',
    '2009/2010': 'fc-bayern-munchen',
    '2008/2009': 'sv-werder-bremen'
}

copa_del_rey_winners = {
    '2023/2024': 'athletic-bilbao',
    '2022/2023': 'real-madrid',
    '2021/2022': 'real-betis-sevilla',
    '2020/2021': 'fc-barcelona',
    '2019/2020': 'real-sociedad-san-sebastian',
    '2018/2019': 'fc-valencia',
    '2017/2018': 'fc-barcelona',
    '2016/2017': 'fc-barcelona',
    '2015/2016': 'fc-barcelona',
    '2014/2015': 'fc-barcelona',
    '2013/2014': 'real-madrid',
    '2012/2013': 'atletico-madrid',
    '2011/2012': 'fc-barcelona',
    '2010/2011': 'real-madrid',
    '2009/2010': 'fc-sevilla',
    '2008/2009': 'fc-barcelona'
}

coupe_de_france_winners = {
    '2023/2024': 'fc-paris-saint-germain',
    '2022/2023': 'fc-toulouse',
    '2021/2022': 'fc-nantes',
    '2020/2021': 'fc-paris-saint-germain',
    '2019/2020': 'fc-paris-saint-germain',
    '2018/2019': 'fc-stade-rennes',
    '2017/2018': 'fc-paris-saint-germain',
    '2016/2017': 'fc-paris-saint-germain',
    '2015/2016': 'fc-paris-saint-germain',
    '2014/2015': 'fc-paris-saint-germain',
    '2013/2014': 'ea-guingamp',
    '2012/2013': 'fc-girondins-bordeaux',
    '2011/2012': 'olympique-lyon',
    '2010/2011': 'losc-lille',
    '2009/2010': 'fc-paris-saint-germain',
    '2008/2009': 'ea-guingamp'
}

coppa_italia_winners = {
    '2023/2024': 'juventus-turin',
    '2022/2023': 'inter-mailand',
    '2021/2022': 'inter-mailand',
    '2020/2021': 'juventus-turin',
    '2019/2020': 'ssc-neapel',
    '2018/2019': 'lazio-rom',
    '2017/2018': 'juventus-turin',
    '2016/2017': 'juventus-turin',
    '2015/2016': 'juventus-turin',
    '2014/2015': 'juventus-turin',
    '2013/2014': 'ssc-neapel',
    '2012/2013': 'lazio-rom',
    '2011/2012': 'ssc-neapel',
    '2010/2011': 'inter-mailand',
    '2009/2010': 'inter-mailand',
    '2008/2009': 'lazio-rom'
}

#function to update the domestic cup column based on winners dictionaries
def update_domestic_cup_winners(df, winners_dict, column_name='Domestic Cup'):
    for season, winning_team in winners_dict.items():
        #update the Domestic Cup column
        df.loc[(df['Season'] == season) & (df['Team'] == winning_team), column_name] = 1
    return df


#assume ucl_winners_df is your df
#make sure the domestic cup column exists
comprehensive_df['Domestic Cup'] = 0

#update the Domestic Cup column for each competition
comprehensive_df = update_domestic_cup_winners(comprehensive_df, coppa_italia_winners)
comprehensive_df = update_domestic_cup_winners(comprehensive_df, coupe_de_france_winners)
comprehensive_df = update_domestic_cup_winners(comprehensive_df, copa_del_rey_winners)
comprehensive_df = update_domestic_cup_winners(comprehensive_df, dfb_pokal_winners)
comprehensive_df = update_domestic_cup_winners(comprehensive_df, fa_cup_winners)

#display the updated DataFrame
print(comprehensive_df[comprehensive_df['Domestic Cup'] == 1])

                  Player Age        Position Kit Number Nationality In Squad  \
0          Oier Olazábal  18      Goalkeeper          0       Spain        2   
1          Víctor Valdés  26      Goalkeeper          1       Spain       52   
2      José Manuel Pinto  32      Goalkeeper         13       Spain       51   
3        Albert Jorquera  29      Goalkeeper         25       Spain       20   
4          Andreu Fontàs  18     Centre-Back          0       Spain        1   
...                  ...  ..             ...        ...         ...      ...   
57379      Gonçalo Ramos  22  Centre-Forward          9    Portugal       50   
57380    Bradley Barcola  20     Left Winger         29      France       45   
57381  Randal Kolo Muani  24  Centre-Forward         23      France       46   
57382      Marco Asensio  27    Right Winger         11       Spain       37   
57383                     21  Centre-Forward         44      France        3   

      Appearances Goals Assists Yellow 

In [39]:
#display unique positions in the position column
unique_positions = comprehensive_df['Position'].unique()
print(unique_positions)

['Goalkeeper' 'Centre-Back' 'Left-Back' 'Right-Back' 'Defensive Midfield'
 'Central Midfield' 'Attacking Midfield' 'Right Winger' 'Left Winger'
 'Centre-Forward' 'Second Striker' 'Left Midfield' 'Right Midfield'
 'Midfield' 'Defender' 'Attack']


In [48]:
comprehensive_df['TOTY'] = 0
comprehensive_df['TOTY_Nominee'] = 0

def normalize_string(s):
    """
    Normalize a string by removing accents and converting to lowercase.
    """
    #decompose the unicode string into its base characters and accents
    s = unicodedata.normalize('NFKD', s)
    
    #encode to ASCII to remove accents then decode back to string
    s = s.encode('ASCII', 'ignore').decode('ASCII')
    
    #convert to lowercase
    s = s.lower()
    
    return s

#normalize all player names in the df and update the player column
comprehensive_df['Player'] = comprehensive_df['Player'].apply(normalize_string)

In [52]:
toty_players = {
'2023/2024': [
        ('alisson', 'fc-liverpool'), 
        ('jeremie frimpong', 'bayer-04-leverkusen'), 
        ('ruben dias', 'manchester-city'), 
        ('virgil van dijk', 'fc-liverpool'), 
        ('theo hernandez', 'ac-mailand'), 
        ('rodri', 'manchester-city'), 
        ('jude bellingham', 'real-madrid'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('lionel messi', 'inter-miami'), 
        ('kylian mbappe', 'fc-paris-saint-germain'), 
        ('erling haaland', 'manchester-city')
    ],
    '2022/2023': [
        ('thibaut courtois', 'real-madrid'), 
        ('achraf hakimi', 'fc-paris-saint-germain'), 
        ('virgil van dijk', 'fc-liverpool'), 
        ('eder militao', 'real-madrid'), 
        ('theo hernandez', 'ac-mailand'), 
        ('luka modric', 'real-madrid'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('jude bellingham', 'borussia-dortmund'), 
        ('kylian mbappe', 'fc-paris-saint-germain'), 
        ('lionel messi', 'fc-paris-saint-germain'), 
        ('karim benzema', 'real-madrid')
    ],
    '2021/2022': [
        ('gianluigi donnaruma', 'fc-paris-saint-germain'), 
        ('achraf hakimi', 'fc-paris-saint-germain'), 
        ('marquinhos', 'fc-paris-saint-germain'), 
        ('ruben dias', 'manchester-city'), 
        ('joao cancelo', 'manchester-city'), 
        ('jorginho', 'fc-chelsea'), 
        ('kante', 'fc-chelsea'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('kylian mbappe', 'fc-paris-saint-germain'), 
        ('robert lewandowski', 'fc-bayern-munchen'), 
        ('lionel messi', 'fc-paris-saint-germain')
    ],
    '2020/2021': [
        ('manuel neuer', 'fc-bayern-munchen'), 
        ('trent alexander-arnold', 'fc-liverpool'), 
        ('virgil van dijk', 'fc-liverpool'), 
        ('sergio ramos', 'real-madrid'), 
        ('alphonso davies', 'fc-bayern-munchen'), 
        ('joshua kimmich', 'fc-bayern-munchen'), 
        ('bruno fernandez', 'manchester-united'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('kylian mbappe', 'fc-paris-saint-germain'), 
        ('robert lewandowski', 'fc-bayern-munchen'), 
        ('cristiano ronaldo', 'juventus-turin')
    ],
    '2019/2020': [
        ('alisson', 'fc-liverpool'), 
        ('trent alexander-arnold', 'fc-liverpool'), 
        ('virgil van dijk', 'fc-liverpool'), 
        ('matthijs de ligt', 'juventus-turin'), 
        ('andrew robertson', 'fc-liverpool'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('frenkie de jong', 'fc-barcelona'), 
        ('kante', 'fc-chelsea'), 
        ('kylian mbappe', 'fc-paris-saint-germain'), 
        ('lionel messi', 'fc-barcelona'), 
        ('sadio mane', 'fc-liverpool')
    ],
    '2018/2019': [
        ('david de gea', 'manchester-united'), 
        ('virgil van dijk', 'fc-liverpool'), 
        ('raphael varane', 'real-madrid'), 
        ('sergio ramos', 'real-madrid'), 
        ('marcelo', 'real-madrid'), 
        ('luka modric', 'real-madrid'), 
        ('kante', 'fc-chelsea'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('kylian mbappe', 'fc-paris-saint-germain'), 
        ('cristiano ronaldo', 'juventus-turin'), 
        ('lionel messi', 'fc-barcelona')
    ],
    '2017/2018': [
        ('david de gea', 'manchester-united'), 
        ('dani alves', 'fc-paris-saint-germain'), 
        ('leonardo bonucci', 'ac-mailand'), 
        ('sergio ramos', 'real-madrid'), 
        ('marcelo', 'real-madrid'), 
        ('kevin de bruyne', 'manchester-city'), 
        ('kante', 'fc-chelsea'), 
        ('luka modric', 'real-madrid'), 
        ('lionel messi', 'fc-barcelona'), 
        ('harry kane', 'tottenham-hotspur'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2016/2017': [
        ('manuel neuer', 'fc-bayern-munchen'), 
        ('dani alves', 'juventus-turin'), 
        ('gerard pique', 'fc-barcelona'), 
        ('sergio ramos', 'real-madrid'), 
        ('marcelo', 'real-madrid'), 
        ('luka modric', 'real-madrid'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('toni kroos', 'real-madrid'), 
        ('lionel messi', 'fc-barcelona'), 
        ('luis suarez', 'fc-barcelona'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2015/2016': [
        ('manuel neuer', 'fc-bayern-munchen'), 
        ('dani alves', 'fc-barcelona'), 
        ('thiago silva', 'fc-paris-saint-germain'), 
        ('sergio ramos', 'real-madrid'), 
        ('marcelo', 'real-madrid'), 
        ('paul pogba', 'juventus-turin'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('luka modric', 'real-madrid'), 
        ('lionel messi', 'fc-barcelona'), 
        ('neymar', 'fc-barcelona'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2014/2015': [
        ('manuel neuer', 'fc-bayern-munchen'), 
        ('philipp lahm', 'fc-bayern-munchen'), 
        ('thiago silva', 'fc-paris-saint-germain'), 
        ('david luiz', 'fc-paris-saint-germain'), 
        ('sergio ramos', 'real-madrid'), 
        ('angel di maria', 'manchester-united'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('toni kroos', 'real-madrid'), 
        ('arjen robben', 'fc-bayern-munchen'), 
        ('lionel messi', 'fc-barcelona'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2013/2014': [
        ('manuel neuer', 'fc-bayern-munchen'), 
        ('dani alves', 'fc-barcelona'), 
        ('thiago silva', 'fc-paris-saint-germain'), 
        ('sergio ramos', 'real-madrid'), 
        ('philipp lahm', 'fc-bayern-munchen'), 
        ('xavi', 'fc-barcelona'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('franck ribery', 'fc-bayern-munchen'), 
        ('lionel messi', 'fc-barcelona'), 
        ('zlatan ibrahimovic', 'fc-paris-saint-germain'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2012/2013': [
        ('iker casillas', 'real-madrid'), 
        ('dani alves', 'fc-barcelona'), 
        ('sergio ramos', 'real-madrid'), 
        ('gerard pique', 'fc-barcelona'), 
        ('marcelo', 'real-madrid'), 
        ('xavi', 'fc-barcelona'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('xabi alonso', 'real-madrid'), 
        ('lionel messi', 'fc-barcelona'), 
        ('radamel falcao', 'atletico-madrid'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2011/2012': [
        ('iker casillas', 'real-madrid'), 
        ('dani alves', 'fc-barcelona'), 
        ('nemanja vidic', 'manchester-united'), 
        ('gerard pique', 'fc-barcelona'), 
        ('sergio ramos', 'real-madrid'), 
        ('xavi', 'fc-barcelona'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('xabi alonso', 'real-madrid'), 
        ('lionel messi', 'fc-barcelona'), 
        ('wayne rooney', 'manchester-united'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2010/2011': [
        ('iker casillas', 'real-madrid'), 
        ('maicon', 'inter-mailand'), 
        ('lucio', 'inter-mailand'), 
        ('gerard pique', 'fc-barcelona'), 
        ('sergio ramos', 'real-madrid'), 
        ('wesley sneijder', 'inter-mailand'), 
        ('xavi', 'fc-barcelona'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('lionel messi', 'fc-barcelona'), 
        ('david villa', 'fc-barcelona'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2009/2010': [
        ('iker casillas', 'real-madrid'), 
        ('dani alves', 'fc-barcelona'), 
        ('john terry', 'fc-chelsea'), 
        ('nemanja vidic', 'manchester-united'), 
        ('patrice evra', 'manchester-united'), 
        ('xavi', 'fc-barcelona'), 
        ('andres iniesta', 'fc-barcelona'), 
        ('steven gerrard', 'liverpool'), 
        ('lionel messi', 'fc-barcelona'), 
        ('fernando torres', 'fc-liverpool'), 
        ('cristiano ronaldo', 'real-madrid')
    ],
    '2008/2009': [
        ('iker casillas', 'real-madrid'), 
        ('sergio ramos', 'real-madrid'), 
        ('john terry', 'fc-chelsea'), 
        ('rio ferdinand', 'manchester-united'), 
        ('carles puyol', 'fc-barcelona'), 
        ('xavi', 'fc-barcelona'), 
        ('kaka', 'ac-mailand'), 
        ('steven gerrard', 'liverpool'), 
        ('lionel messi', 'fc-barcelona'), 
        ('fernando torres', 'fc-liverpool'), 
        ('cristiano ronaldo', 'real-madrid')
    ]
}

In [59]:
toty_nominees = {
'2023/2024': [
        ("Alisson", "Goalkeeper"),
        ("Ederson", "Goalkeeper"),
        ("Brice Samba", "Goalkeeper"),
        ("Gregor Kobel", "Goalkeeper"),
        ("Wojciech Szczęsny", "Goalkeeper"),
        ("Mike Maignan", "Goalkeeper"),
        ("Jan Oblak", "Goalkeeper"),
        ("Marc-Andre ter Stegen", "Goalkeeper"),
        ("William Saliba", "Defender"),
        ("Trent Alexander-Arnold", "Defender"),
        ("Virgil van Dijk", "Defender"),
        ("Ruben Dias", "Defender"),
        ("John Stones", "Defender"),
        ("Dante", "Defender"),
        ("Marquinhos", "Defender"),
        ("Kim Min Jae", "Defender"),
        ("Mats Hummels", "Defender"),
        ("Lucas Martínez Quarta", "Defender"),
        ("Alessandro Bastoni", "Defender"),
        ("Bremer", "Defender"),
        ("Theo Hernández", "Defender"),
        ("Giovanni Di Lorenzo", "Defender"),
        ("Nicolás Otamendi", "Defender"),
        ("Jules Koundé", "Defender"),
        ("Jesús Navas", "Defender"),
        ("Gayà", "Defender"),
        ("Jonathan Clauss", "Defender"),
        ("Jeremie Frimpong", "Defender"),
        ("Grimaldo", "Defender"),
        ("Federico Dimarco", "Defender"),
        ("Martin Ødegaard", "Midfielder"),
        ("Declan Rice", "Midfielder"),
        ("Kaoru Mitoma", "Midfielder"),
        ("Bernardo Silva", "Midfielder"),
        ("Kevin De Bruyne", "Midfielder"),
        ("Rodri", "Midfielder"),
        ("Bruno Guimarães", "Midfielder"),
        ("Jarrod Bowen", "Midfielder"),
        ("Alexandr Golovin", "Midfielder"),
        ("Florian Wirtz", "Midfielder"),
        ("Granit Xhaka", "Midfielder"),
        ("Jamal Musiala", "Midfielder"),
        ("Leroy Sané", "Midfielder"),
        ("Julian Brandt", "Midfielder"),
        ("Vincenzo Grifo", "Midfielder"),
        ("Nicolò Barella", "Midfielder"),
        ("Federico Chiesa", "Midfielder"),
        ("Adrien Rabiot", "Midfielder"),
        ("Riyad Mahrez", "Midfielder"),
        ("Sergej Milinković-Savić", "Midfielder"),
        ("Pedri", "Midfielder"),
        ("İlkay Gündoğan", "Midfielder"),
        ("Aleix García", "Midfielder"),
        ("Jude Bellingham", "Midfielder"),
        ("Toni Kroos", "Midfielder"),
        ("Luka Modrić", "Midfielder"),
        ("Aurélien Tchouaméni", "Midfielder"),
        ("Federico Valverde", "Midfielder"),
        ("Bukayo Saka", "Forward"),
        ("Mohamed Salah", "Forward"),
        ("Jack Grealish", "Forward"),
        ("Erling Haaland", "Forward"),
        ("Heung Min Son", "Forward"),
        ("Ousmane Dembélé", "Forward"),
        ("Lee Kang In", "Forward"),
        ("Kylian Mbappé", "Forward"),
        ("Randal Kolo Muani", "Forward"),
        ("Harry Kane", "Forward"),
        ("Loïs Openda", "Forward"),
        ("Serhou Guirassy", "Forward"),
        ("Lautaro Martínez", "Forward"),
        ("Rafael Leão", "Forward"),
        ("Khvicha Kvaratskhelia", "Forward"),
        ("Victor Osimhen", "Forward"),
        ("Domenico Berardi", "Forward"),
        ("Mehdi Taremi", "Forward"),
        ("Karim Benzema", "Forward"),
        ("Cristiano Ronaldo", "Forward"),
        ("Antoine Griezmann", "Forward"),
        ("Morata", "Forward"),
        ("Robert Lewandowski", "Forward"),
        ("Vini Jr.", "Forward"),
        ("Takefusa Kubo", "Forward"),
        ("Gerard Moreno", "Forward"),
        ("Lionel Messi", "Forward")
    ],


'2022/2023' : [
    ("Thibaut Courtois", "Goalkeeper"),
    ("Gregor Kobel", "Goalkeeper"),
    ("Mike Maignan", "Goalkeeper"),
    ("Alisson", "Goalkeeper"),
    ("Ederson", "Goalkeeper"),
    ("Wojciech Szczęsny", "Goalkeeper"),
    ("Kevin Trapp", "Goalkeeper"),
    ("Yassine Bounou", "Goalkeeper"),
    ("Hugo Lloris", "Goalkeeper"),
    ("Emiliano Martínez", "Goalkeeper"),
    ("Marcos Acuña", "Defender"),
    ("Marquinhos", "Defender"),
    ("Cristiano Biraghi", "Defender"),
    ("João Cancelo", "Defender"),
    ("Jonathan Clauss", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Alphonso Davies", "Defender"),
    ("Jeremie Frimpong", "Defender"),
    ("Éder Militão", "Defender"),
    ("Rúben Dias", "Defender"),
    ("Grimaldo", "Defender"),
    ("Achraf Hakimi", "Defender"),
    ("Theo Hernández", "Defender"),
    ("Reece James", "Defender"),
    ("Kalidou Koulibaly", "Defender"),
    ("Nicolás Otamendi", "Defender"),
    ("Gleison Bremer", "Defender"),
    ("Niklas Süle", "Defender"),
    ("Fikayo Tomori", "Defender"),
    ("Kieran Trippier", "Defender"),
    ("Virgil van Dijk", "Defender"),
    ("Joško Gvardiol", "Defender"),
    ("Jules Koundé", "Defender"),
    ("Cristian Romero", "Defender"),
    ("Dayot Upamecano", "Defender"),
    ("Nicolò Barella", "Midfielder"),
    ("Jude Bellingham", "Midfielder"),
    ("Steven Berghuis", "Midfielder"),
    ("Marcelo Brozović", "Midfielder"),
    ("Bernardo Silva", "Midfielder"),
    ("Kevin De Bruyne", "Midfielder"),
    ("Moussa Diaby", "Midfielder"),
    ("Nabil Fekir", "Midfielder"),
    ("Seko Fofana", "Midfielder"),
    ("Pedri", "Midfielder"),
    ("Vincenzo Grifo", "Midfielder"),
    ("Rodri", "Midfielder"),
    ("Daichi Kamada", "Midfielder"),
    ("Joshua Kimmich", "Midfielder"),
    ("Filip Kostić", "Midfielder"),
    ("Toni Kroos", "Midfielder"),
    ("Merino", "Midfielder"),
    ("Sergej Milinković-Savić", "Midfielder"),
    ("Luka Modrić", "Midfielder"),
    ("Martin Ødegaard", "Midfielder"),
    ("Parejo", "Midfielder"),
    ("Lorenzo Pellegrini", "Midfielder"),
    ("Declan Rice", "Midfielder"),
    ("Bukayo Saka", "Midfielder"),
    ("Aurélien Tchouaméni", "Midfielder"),
    ("Sandro Tonali", "Midfielder"),
    ("Federico Valverde", "Midfielder"),
    ("Casemiro", "Midfielder"),
    ("Sofyan Amrabat", "Midfielder"),
    ("Bruno Fernandes", "Midfielder"),
    ("Ritsu Doan", "Midfielder"),
    ("Enzo Fernández", "Midfielder"),
    ("Ivan Perišić", "Midfielder"),
    ("Adrian Rabiot", "Midfielder"),
    ("Iago Aspas", "Forward"),
    ("Wissam Ben Yedder", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Rafael Leão", "Forward"),
    ("Neymar Jr.", "Forward"),
    ("Vinicius Jr.", "Forward"),
    ("Ousmane Dembélé", "Forward"),
    ("João Félix", "Forward"),
    ("Gabriel Jesus", "Forward"),
    ("Phil Foden", "Forward"),
    ("Cody Gakpo", "Forward"),
    ("Erling Haaland", "Forward"),
    ("Borja Iglesias", "Forward"),
    ("Ciro Immobile", "Forward"),
    ("Harry Kane", "Forward"),
    ("Randal Kolo Muani", "Forward"),
    ("Dejan Kulusevski", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Sadio Mané", "Forward"),
    ("Lautaro Martínez", "Forward"),
    ("Kylian Mbappé", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Christopher Nkunku", "Forward"),
    ("Darwin Núñez", "Forward"),
    ("Victor Osimhen", "Forward"),
    ("Mohamed Salah", "Forward"),
    ("Heung Min Son", "Forward"),
    ("Martin Terrier", "Forward"),
    ("Dušan Vlahović", "Forward"),
    ("Olivier Giroud", "Forward"),
    ("Antoine Griezmann", "Forward")
],


'2021/2022' : [
    ("Thibaut Courtois", "Goalkeeper"),
    ("Gianluigi Donnarumma", "Goalkeeper"),
    ("Ederson", "Goalkeeper"),
    ("Mike Maignan", "Goalkeeper"),
    ("Emiliano Martínez", "Goalkeeper"),
    ("Édouard Mendy", "Goalkeeper"),
    ("Jan Oblak", "Goalkeeper"),
    ("David Alaba", "Defender"),
    ("Trent Alexander-Arnold", "Defender"),
    ("César Azpilicueta", "Defender"),
    ("Leonardo Bonucci", "Defender"),
    ("Giorgio Chiellini", "Defender"),
    ("Alphonso Davies", "Defender"),
    ("Christian Günter", "Defender"),
    ("Achraf Hakimi", "Defender"),
    ("Theo Hernández", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Jesús Navas", "Defender"),
    ("João Cancelo", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Simon Kjær", "Defender"),
    ("Jules Koundé", "Defender"),
    ("Marquinhos", "Defender"),
    ("Reinildo", "Defender"),
    ("Cristian Romero", "Defender"),
    ("Rúben Dias", "Defender"),
    ("Antonio Rüdiger", "Defender"),
    ("Luke Shaw", "Defender"),
    ("Milan Škriniar", "Defender"),
    ("Leonardo Spinazzola", "Defender"),
    ("Kieran Trippier", "Defender"),
    ("Kyle Walker", "Defender"),
    ("Nicolò Barella", "Midfielder"),
    ("Jude Bellingham", "Midfielder"),
    ("Bruno Fernandes", "Midfielder"),
    ("Casemiro", "Midfielder"),
    ("Dani Olmo", "Midfielder"),
    ("Kevin De Bruyne", "Midfielder"),
    ("Luis Díaz", "Midfielder"),
    ("Fabinho", "Midfielder"),
    ("Nabil Fekir", "Midfielder"),
    ("Phil Foden", "Midfielder"),
    ("Leon Goretzka", "Midfielder"),
    ("Jorginho", "Midfielder"),
    ("N'Golo Kanté", "Midfielder"),
    ("Joshua Kimmich", "Midfielder"),
    ("Filip Kostić", "Midfielder"),
    ("Manuel Locatelli", "Midfielder"),
    ("Lucas Paquetá", "Midfielder"),
    ("Marcos Llorente", "Midfielder"),
    ("Luka Modrić", "Midfielder"),
    ("Mason Mount", "Midfielder"),
    ("Thomas Müller", "Midfielder"),
    ("Pedri", "Midfielder"),
    ("Declan Rice", "Midfielder"),
    ("Heung Min Son", "Midfielder"),
    ("Marco Verratti", "Midfielder"),
    ("Florian Wirtz", "Midfielder"),
    ("Karim Benzema", "Forward"),
    ("Federico Chiesa", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Jonathan David", "Forward"),
    ("Gerard Moreno", "Forward"),
    ("Jack Grealish", "Forward"),
    ("Erling Haaland", "Forward"),
    ("Ciro Immobile", "Forward"),
    ("Lorenzo Insigne", "Forward"),
    ("Harry Kane", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Romelu Lukaku", "Forward"),
    ("Lautaro Martínez", "Forward"),
    ("Kylian Mbappé", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar Jr", "Forward"),
    ("Oyarzabal", "Forward"),
    ("Dimitri Payet", "Forward"),
    ("Mohamed Salah", "Forward"),
    ("Luis Suárez", "Forward"),
    ("Dušan Tadić", "Forward"),
    ("Dušan Vlahović", "Forward")
],



'2020/2021' : [
    ("Manuel Neuer", "Goalkeeper"),
    ("Alisson Becker", "Goalkeeper"),
    ("Ederson", "Goalkeeper"),
    ("Thibaut Courtois", "Goalkeeper"),
    ("Steve Mandanda", "Goalkeeper"),
    ("Lukáš Hrádecký", "Goalkeeper"),
    ("Keylor Navas", "Goalkeeper"),
    ("Virgil van Dijk", "Defender"),
    ("Alphonso Davies", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Trent Alexander-Arnold", "Defender"),
    ("Theo Hernández", "Defender"),
    ("Andrew Robertson", "Defender"),
    ("Marquinhos", "Defender"),
    ("Presnel Kimpembe", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Angeliño", "Defender"),
    ("Kevin De Bruyne", "Midfielder"),
    ("Bruno Fernandes", "Midfielder"),
    ("Joshua Kimmich", "Midfielder"),
    ("Heung Min Son", "Midfielder"),
    ("Thiago", "Midfielder"),
    ("Jordan Henderson", "Midfielder"),
    ("Renato Sanches", "Midfielder"),
    ("Alejandro Gómez", "Midfielder"),
    ("Toni Kroos", "Midfielder"),
    ("Houssem Aouar", "Midfielder"),
    ("Cristiano Ronaldo", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Kylian Mbappé", "Forward"),
    ("Neymar Jr", "Forward"),
    ("Erling Braut Haaland", "Forward"),
    ("Sadio Mané", "Forward"),
    ("Francesco Caputo", "Forward"),
    ("Mohamed Salah", "Forward"),
    ("Danny Ings", "Forward")
],



'2019/2020' : [
    ("Jan Oblak", "Goalkeeper"),
    ("Marc-André ter Stegen", "Goalkeeper"),
    ("Alisson", "Goalkeeper"),
    ("Ederson", "Goalkeeper"),
    ("André Onana", "Goalkeeper"),
    ("Virgil van Dijk", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Kalidou Koulibaly", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Jan Vertonghen", "Defender"),
    ("Aymeric Laporte", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Marquinhos", "Defender"),
    ("Leonardo Bonucci", "Defender"),
    ("Milan Škriniar", "Defender"),
    ("Joshua Kimmich", "Defender"),
    ("Matthijs de Ligt", "Defender"),
    ("Alex Sandro", "Defender"),
    ("Andrew Robertson", "Defender"),
    ("José María Giménez", "Defender"),
    ("Trent Alexander-Arnold", "Defender"),
    ("Nicolás Tagliafico", "Defender"),
    ("Mohamed Salah", "Forward"),
    ("N'Golo Kanté", "Midfielder"),
    ("Paulo Dybala", "Midfielder"),
    ("Marco Reus", "Midfielder"),
    ("Christian Eriksen", "Midfielder"),
    ("Sadio Mané", "Forward"),
    ("David Silva", "Midfielder"),
    ("Raheem Sterling", "Forward"),
    ("Bernardo Silva", "Forward"),
    ("Marco Verratti", "Midfielder"),
    ("Fabinho", "Midfielder"),
    ("Frenkie de Jong", "Midfielder"),
    ("Hakim Ziyech", "Midfielder"),
    ("Jadon Sancho", "Midfielder"),
    ("Georginio Wijnaldum", "Midfielder"),
    ("Dušan Tadic", "Midfielder"),
    ("Kai Havertz", "Midfielder"),
    ("Jordan Henderson", "Midfielder"),
    ("Kevin De Bruyne", "Midfielder"),
    ("Lionel Messi", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Kylian Mbappé", "Forward"),
    ("Neymar Jr", "Forward"),
    ("Sadio Mané", "Forward"),
    ("Mohamed Salah", "Forward"),
    ("Eden Hazard", "Forward"),
    ("Harry Kane", "Forward"),
    ("Sergio Agüero", "Forward"),
    ("Pierre-Emerick Aubameyang", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Heung Min Son", "Forward"),
    ("Bernardo Silva", "Forward"),
    ("Roberto Firmino", "Forward")
],


'2018/2019' : [
    "Alisson Becker", "Goalkeeper",
    "Thibaut Courtois", "Goalkeeper",
    "David de Gea", "Goalkeeper",
    "Hugo Lloris", "Goalkeeper",
    "Jan Oblak", "Goalkeeper",
    "Jordi Alba", "Defender",
    "Dani Carvajal", "Defender",
    "Giorgio Chiellini", "Defender",
    "Diego Godín", "Defender",
    "Lucas Hernández", "Defender",
    "Mats Hummels", "Defender",
    "Joshua Kimmich", "Defender",
    "Kalidou Koulibaly", "Defender",
    "Dejan Lovren", "Defender",
    "Kostas Manolas", "Defender",
    "Marcelo", "Defender",
    "Sergio Ramos", "Defender",
    "Thiago Silva", "Defender",
    "Kieran Trippier", "Defender",
    "Samuel Umtiti", "Defender",
    "Virgil van Dijk", "Defender",
    "Raphaël Varane", "Defender",
    "Jan Vertonghen", "Defender",
    "Šime Vrsaljko", "Defender",
    "Kyle Walker", "Defender",
    "Sergio Busquets", "Midfielder",
    "Casemiro", "Midfielder",
    "Kevin De Bruyne", "Midfielder",
    "Fernandinho", "Midfielder",
    "Isco", "Midfielder",
    "N’Golo Kanté", "Midfielder",
    "Toni Kroos", "Midfielder",
    "Blaise Matuidi", "Midfielder",
    "Luka Modrić", "Midfielder",
    "Ivan Perišić", "Midfielder",
    "Paul Pogba", "Midfielder",
    "Ivan Rakitić", "Midfielder",
    "Marco Reus", "Midfielder",
    "David Silva", "Midfielder",
    "Sergio Agüero", "Forward",
    "Gareth Bale", "Forward",
    "Edinson Cavani", "Forward",
    "Antoine Griezmann", "Forward",
    "Eden Hazard", "Forward",
    "Harry Kane", "Forward",
    "Robert Lewandowski", "Forward",
    "Mario Mandžukić", "Forward",
    "Sadio Mané", "Forward",
    "Kylian Mbappé", "Forward",
    "Lionel Messi", "Forward",
    "Neymar Jr", "Forward",
    "Cristiano Ronaldo", "Forward",
    "Mohamed Salah", "Forward",
    "Luis Suárez", "Forward"
],

'2018/2019' : [
    ("Alisson Becker", "Goalkeeper"),
    ("Thibaut Courtois", "Goalkeeper"),
    ("David de Gea", "Goalkeeper"),
    ("Hugo Lloris", "Goalkeeper"),
    ("Jan Oblak", "Goalkeeper"),
    ("Jordi Alba", "Defender"),
    ("Dani Carvajal", "Defender"),
    ("Giorgio Chiellini", "Defender"),
    ("Diego Godín", "Defender"),
    ("Lucas Hernández", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Joshua Kimmich", "Defender"),
    ("Kalidou Koulibaly", "Defender"),
    ("Dejan Lovren", "Defender"),
    ("Kostas Manolas", "Defender"),
    ("Marcelo", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Kieran Trippier", "Defender"),
    ("Samuel Umtiti", "Defender"),
    ("Virgil van Dijk", "Defender"),
    ("Raphaël Varane", "Defender"),
    ("Jan Vertonghen", "Defender"),
    ("Šime Vrsaljko", "Defender"),
    ("Kyle Walker", "Defender"),
    ("Sergio Busquets", "Midfielder"),
    ("Casemiro", "Midfielder"),
    ("Kevin De Bruyne", "Midfielder"),
    ("Fernandinho", "Midfielder"),
    ("Isco", "Midfielder"),
    ("N’Golo Kanté", "Midfielder"),
    ("Toni Kroos", "Midfielder"),
    ("Blaise Matuidi", "Midfielder"),
    ("Luka Modrić", "Midfielder"),
    ("Ivan Perišić", "Midfielder"),
    ("Paul Pogba", "Midfielder"),
    ("Ivan Rakitić", "Midfielder"),
    ("Marco Reus", "Midfielder"),
    ("David Silva", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Gareth Bale", "Forward"),
    ("Edinson Cavani", "Forward"),
    ("Antoine Griezmann", "Forward"),
    ("Eden Hazard", "Forward"),
    ("Harry Kane", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Mario Mandžukić", "Forward"),
    ("Sadio Mané", "Forward"),
    ("Kylian Mbappé", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar Jr", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Mohamed Salah", "Forward"),
    ("Luis Suárez", "Forward")
],


'2016/2017' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("David De Gea", "Goalkeeper"),
    ("Keylor Navas", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("Jan Oblak", "Goalkeeper"),
    ("Jordi Alba", "Defender"),
    ("David Alaba", "Defender"),
    ("Jerome Boateng", "Defender"),
    ("Leonardo Bonucci", "Defender"),
    ("Dani Carvajal", "Defender"),
    ("Giorgio Chiellini", "Defender"),
    ("Diego Godin", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Pique", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Raphael Varane", "Defender"),
    ("Sergio Busquets", "Midfielder"),
    ("Kevin De Bruyne", "Midfielder"),
    ("Eden Hazard", "Midfielder"),
    ("Andres Iniesta", "Midfielder"),
    ("N’Golo Kante", "Midfielder"),
    ("Toni Kroos", "Midfielder"),
    ("Luka Modric", "Midfielder"),
    ("Mesut Ozil", "Midfielder"),
    ("Dimitri Payet", "Midfielder"),
    ("Paul Pogba", "Midfielder"),
    ("Ivan Rakitic", "Midfielder"),
    ("David Silva", "Midfielder"),
    ("Marco Verratti", "Midfielder"),
    ("Arturo Vidal", "Midfielder"),
    ("Sergio Aguero", "Forward"),
    ("Gareth Bale", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Paulo Dybala", "Forward"),
    ("Antoine Griezmann", "Forward"),
    ("Gonzalo Higuain", "Forward"),
    ("Zlatan Ibrahimovic", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Thomas Muller", "Forward"),
    ("Neymar", "Forward"),
    ("Alexis Sanchez", "Forward"),
    ("Luis Suarez", "Forward"),
    ("Jamie Vardy", "Forward")
],


'2015/2016' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("David De Gea", "Goalkeeper"),
    ("Keylor Navas", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("David Alaba", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Daniel Alves", "Defender"),
    ("Jérôme Boateng", "Defender"),
    ("Daniel Carvajal", "Defender"),
    ("Giorgio Chiellini", "Defender"),
    ("David Luiz", "Defender"),
    ("Diego Godín", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Branislav Ivanovic", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("Marcelo", "Defender"),
    ("Javier Mascherano", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("John Terry", "Defender"),
    ("Raphaël Varane", "Defender"),
    ("Thiago Alcantara", "Midfielder"),
    ("Xabi Alonso", "Midfielder"),
    ("Sergio Busquets", "Midfielder"),
    ("Eden Hazard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Toni Kroos", "Midfielder"),
    ("Luka Modric", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Paul Pogba", "Midfielder"),
    ("Ivan Rakitic", "Midfielder"),
    ("James Rodríguez", "Midfielder"),
    ("David Silva", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Marco Verratti", "Midfielder"),
    ("Arturo Vidal", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Gareth Bale", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Douglas Costa", "Forward"),
    ("Zlatan Ibrahimovic", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Thomas Müller", "Forward"),
    ("Neymar Jr.", "Forward"),
    ("Arjen Robben", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Alexis Sánchez", "Forward"),
    ("Luis Suárez", "Forward"),
    ("Carlos Tevez", "Forward")
],


'2014/2015' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("David De Gea", "Goalkeeper"),
    ("Keylor Navas", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("David Alaba", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Dani Alves", "Defender"),
    ("Jérôme Boateng", "Defender"),
    ("Dani Carvajal", "Defender"),
    ("David Luiz", "Defender"),
    ("Filipe Luís", "Defender"),
    ("Diego Godín", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Branislav Ivanović", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("Marcelo", "Defender"),
    ("Javier Mascherano", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Raphaël Varane", "Defender"),
    ("Pablo Zabaleta", "Defender"),
    ("Xabi Alonso", "Midfielder"),
    ("Ángel Di María", "Midfielder"),
    ("Cesc Fàbregas", "Midfielder"),
    ("Eden Hazard", "Midfielder"),
    ("Xavi Hernández", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Toni Kroos", "Midfielder"),
    ("Luka Modrić", "Midfielder"),
    ("Mesut Özil", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Paul Pogba", "Midfielder"),
    ("James Rodríguez", "Midfielder"),
    ("Bastian Schweinsteiger", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Arturo Vidal", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Gareth Bale", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Diego Costa", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Zlatan Ibrahimović", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Thomas Müller", "Forward"),
    ("Neymar", "Forward"),
    ("Marco Reus", "Forward"),
    ("Franck Ribéry", "Forward"),
    ("Arjen Robben", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Luis Suárez", "Forward")
],


'2013/2014' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("Petr Cech", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("Víctor Valdés", "Goalkeeper"),
    ("David Alaba", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Dani Alves", "Defender"),
    ("Leighton Baines", "Defender"),
    ("Jérôme Boateng", "Defender"),
    ("Ashley Cole", "Defender"),
    ("Dante", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Branislav Ivanovic", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("David Luiz", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Marcelo", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Raphaël Varane", "Defender"),
    ("Nemanja Vidic", "Defender"),
    ("Pablo Zabaleta", "Defender"),
    ("Xabi Alonso", "Midfielder"),
    ("Gareth Bale", "Midfielder"),
    ("Sergio Busquets", "Midfielder"),
    ("Steven Gerrard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Isco", "Midfielder"),
    ("Mesut Özil", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Marco Reus", "Midfielder"),
    ("Franck Ribéry", "Midfielder"),
    ("Arjen Robben", "Midfielder"),
    ("Bastian Schweinsteiger", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Arturo Vidal", "Midfielder"),
    ("Xavi", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Mario Balotelli", "Forward"),
    ("Edinson Cavani", "Forward"),
    ("Diego Costa", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Didier Drogba", "Forward"),
    ("Radamel Falcao", "Forward"),
    ("Zlatan Ibrahimovic", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Mario Mandzukic", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar", "Forward"),
    ("Robin van Persie", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Luis Suarez", "Forward")
],


'2012/2013' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("Petr Cech", "Goalkeeper"),
    ("Joe Hart", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("Jordi Alba", "Defender"),
    ("Gareth Bale", "Defender"),
    ("Giorgio Chiellini", "Defender"),
    ("Ashley Cole", "Defender"),
    ("Dani Alves", "Defender"),
    ("David Luiz", "Defender"),
    ("Patrice Evra", "Defender"),
    ("Rio Ferdinand", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Branislav Ivanovic", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("Marcelo", "Defender"),
    ("Javier Mascherano", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Carles Puyol", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("John Terry", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Xabi Alonso", "Midfielder"),
    ("Sergio Busquets", "Midfielder"),
    ("Cesc Fabregas", "Midfielder"),
    ("Steven Gerrard", "Midfielder"),
    ("Eden Hazard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Frank Lampard", "Midfielder"),
    ("Luka Modric", "Midfielder"),
    ("Mesut Özil", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Franck Ribéry", "Midfielder"),
    ("David Silva", "Midfielder"),
    ("Bastian Schweinsteiger", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Xavi Hernández", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Mario Balotelli", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Edinson Cavani", "Forward"),
    ("Didier Drogba", "Forward"),
    ("Samuel Eto’o", "Forward"),
    ("Radamel Falcao", "Forward"),
    ("Mario Gomez", "Forward"),
    ("Zlatan Ibrahimovic", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar", "Forward"),
    ("Robin van Persie", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Luis Suarez", "Forward")
],


'2011/2012' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("Petr Cech", "Goalkeeper"),
    ("Joe Hart", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("Jordi Alba", "Defender"),
    ("Gareth Bale", "Defender"),
    ("Giorgio Chiellini", "Defender"),
    ("Ashley Cole", "Defender"),
    ("Dani Alves", "Defender"),
    ("David Luiz", "Defender"),
    ("Patrice Evra", "Defender"),
    ("Rio Ferdinand", "Defender"),
    ("Mats Hummels", "Defender"),
    ("Branislav Ivanovic", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("Marcelo", "Defender"),
    ("Javier Mascherano", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Carles Puyol", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("John Terry", "Defender"),
    ("Thiago Silva", "Defender"),
    ("Xabi Alonso", "Midfielder"),
    ("Sergio Busquets", "Midfielder"),
    ("Cesc Fabregas", "Midfielder"),
    ("Steven Gerrard", "Midfielder"),
    ("Eden Hazard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Frank Lampard", "Midfielder"),
    ("Luka Modric", "Midfielder"),
    ("Mesut Özil", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Franck Ribéry", "Midfielder"),
    ("David Silva", "Midfielder"),
    ("Bastian Schweinsteiger", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Xavi Hernández", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Mario Balotelli", "Forward"),
    ("Karim Benzema", "Forward"),
    ("Edinson Cavani", "Forward"),
    ("Didier Drogba", "Forward"),
    ("Samuel Eto’o", "Forward"),
    ("Radamel Falcao", "Forward"),
    ("Mario Gomez", "Forward"),
    ("Zlatan Ibrahimovic", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar", "Forward"),
    ("Robin van Persie", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Luis Suarez", "Forward")
],


'2010/2011' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("Petr Cech", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("Víctor Valdés", "Goalkeeper"),
    ("Dani Alves", "Defender"),
    ("Gareth Bale", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Ashley Cole", "Defender"),
    ("David Luiz", "Defender"),
    ("Filipe Luís", "Defender"),
    ("Diego Godín", "Defender"),
    ("Branislav Ivanović", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("David Alaba", "Defender"),
    ("Javier Mascherano", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Carles Puyol", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("John Terry", "Defender"),
    ("Raphaël Varane", "Defender"),
    ("Nemanja Vidić", "Defender"),
    ("Xabi Alonso", "Midfielder"),
    ("Gareth Bale", "Midfielder"),
    ("Sergio Busquets", "Midfielder"),
    ("Steven Gerrard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Isco", "Midfielder"),
    ("Mesut Özil", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Marco Reus", "Midfielder"),
    ("Franck Ribéry", "Midfielder"),
    ("Arjen Robben", "Midfielder"),
    ("Bastian Schweinsteiger", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Arturo Vidal", "Midfielder"),
    ("Xavi Hernández", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Mario Balotelli", "Forward"),
    ("Edinson Cavani", "Forward"),
    ("Diego Costa", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Didier Drogba", "Forward"),
    ("Radamel Falcao", "Forward"),
    ("Zlatan Ibrahimović", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar", "Forward"),
    ("Robin van Persie", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Luis Suárez", "Forward")
],


'2009/2010' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("Petr Cech", "Goalkeeper"),
    ("Joe Hart", "Goalkeeper"),
    ("Manuel Neuer", "Goalkeeper"),
    ("Dani Alves", "Defender"),
    ("Gareth Bale", "Defender"),
    ("Jordi Alba", "Defender"),
    ("Ashley Cole", "Defender"),
    ("David Luiz", "Defender"),
    ("Filipe Luís", "Defender"),
    ("Diego Godín", "Defender"),
    ("Branislav Ivanović", "Defender"),
    ("Vincent Kompany", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("David Alaba", "Defender"),
    ("Javier Mascherano", "Defender"),
    ("Pepe", "Defender"),
    ("Gerard Piqué", "Defender"),
    ("Carles Puyol", "Defender"),
    ("Sergio Ramos", "Defender"),
    ("Thiago Silva", "Defender"),
    ("John Terry", "Defender"),
    ("Raphaël Varane", "Defender"),
    ("Nemanja Vidić", "Defender"),
    ("Xabi Alonso", "Midfielder"),
    ("Gareth Bale", "Midfielder"),
    ("Sergio Busquets", "Midfielder"),
    ("Steven Gerrard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Isco", "Midfielder"),
    ("Mesut Özil", "Midfielder"),
    ("Andrea Pirlo", "Midfielder"),
    ("Marco Reus", "Midfielder"),
    ("Franck Ribéry", "Midfielder"),
    ("Arjen Robben", "Midfielder"),
    ("Bastian Schweinsteiger", "Midfielder"),
    ("Yaya Touré", "Midfielder"),
    ("Arturo Vidal", "Midfielder"),
    ("Xavi Hernández", "Midfielder"),
    ("Sergio Agüero", "Forward"),
    ("Mario Balotelli", "Forward"),
    ("Edinson Cavani", "Forward"),
    ("Diego Costa", "Forward"),
    ("Cristiano Ronaldo", "Forward"),
    ("Didier Drogba", "Forward"),
    ("Radamel Falcao", "Forward"),
    ("Zlatan Ibrahimović", "Forward"),
    ("Robert Lewandowski", "Forward"),
    ("Lionel Messi", "Forward"),
    ("Neymar", "Forward"),
    ("Robin van Persie", "Forward"),
    ("Wayne Rooney", "Forward"),
    ("Luis Suárez", "Forward")
],


'2008/2009' : [
    ("Gianluigi Buffon", "Goalkeeper"),
    ("Iker Casillas", "Goalkeeper"),
    ("Petr Cech", "Goalkeeper"),
    ("Edwin van der Sar", "Goalkeeper"),
    ("Pepe Reina", "Goalkeeper"),
    ("Daniel Alves", "Defender"),
    ("Patrice Evra", "Defender"),
    ("Rio Ferdinand", "Defender"),
    ("Philipp Lahm", "Defender"),
    ("Paolo Maldini", "Defender"),
    ("Alessandro Nesta", "Defender"),
    ("Carles Puyol", "Defender"),
    ("John Terry", "Defender"),
    ("Nemanja Vidić", "Defender"),
    ("Andrea Pirlo", "Midfielder"),
    ("Cesc Fàbregas", "Midfielder"),
    ("Steven Gerrard", "Midfielder"),
    ("Andrés Iniesta", "Midfielder"),
    ("Frank Lampard", "Midfielder"),
    ("Lionel Messi", "Midfielder"),
    ("Xavi", "Midfielder"),
    ("Cristiano Ronaldo", "Forward"),
    ("Samuel Eto'o", "Forward"),
    ("Didier Drogba", "Forward"),
    ("Zlatan Ibrahimović", "Forward"),
    ("Thierry Henry", "Forward"),
    ("David Villa", "Forward"),
    ("Fernando Torres", "Forward")
]

}


In [57]:
def normalize_player_names(toty_nominees):
    normalized_toty_nominees = {}
    
    for season, players in toty_nominees.items():
        normalized_players = []
        
        for player, position in players:
            normalized_player = (normalize_string(player), position)
            normalized_players.append(normalized_player)
        
        normalized_toty_nominees[season] = normalized_players
    
    return normalized_toty_nominees

In [58]:
normalized_toty_nominees = normalize_player_names(toty_nominees)
print(normalized_toty_nominees)

{'2023/2024': [('alisson', 'Goalkeeper'), ('ederson', 'Goalkeeper'), ('brice samba', 'Goalkeeper'), ('gregor kobel', 'Goalkeeper'), ('wojciech szczesny', 'Goalkeeper'), ('mike maignan', 'Goalkeeper'), ('jan oblak', 'Goalkeeper'), ('marc-andre ter stegen', 'Goalkeeper'), ('william saliba', 'Defender'), ('trent alexander-arnold', 'Defender'), ('virgil van dijk', 'Defender'), ('ruben dias', 'Defender'), ('john stones', 'Defender'), ('dante', 'Defender'), ('marquinhos', 'Defender'), ('kim min jae', 'Defender'), ('mats hummels', 'Defender'), ('lucas martinez quarta', 'Defender'), ('alessandro bastoni', 'Defender'), ('bremer', 'Defender'), ('theo hernandez', 'Defender'), ('giovanni di lorenzo', 'Defender'), ('nicolas otamendi', 'Defender'), ('jules kounde', 'Defender'), ('jesus navas', 'Defender'), ('gaya', 'Defender'), ('jonathan clauss', 'Defender'), ('jeremie frimpong', 'Defender'), ('grimaldo', 'Defender'), ('federico dimarco', 'Defender'), ('martin degaard', 'Midfielder'), ('declan rice

In [60]:
#loop through the toty_nominees dictionary and update the TOTY column
for season, players in toty_nominees.items():
    for player, team in players:
        #normalize player name
        player_normalized = normalize_string(player)

        #update the TOTY column where season, player name, and team name match
        mask = (
            (comprehensive_df['Season'] == season) & 
            (comprehensive_df['Player'].apply(normalize_string) == player_normalized)
        )
        comprehensive_df.loc[mask, 'TOTY_Nominee'] = 1


In [63]:
#function to update the TOTY column
def update_toty_column(df, toty_dict):
    for season, players in toty_dict.items():
        for player, team in players:
            normalized_player = normalize_string(player)
            df.loc[(df['Season'] == season) & 
                   (df['Player'] == normalized_player) & 
                   (df['Team'] == team), 'TOTY'] = 1
    return df

#update the TOTY column based on the dictionary
comprehensive_df = update_toty_column(comprehensive_df, toty_players)

In [64]:
comprehensive_df.head(50)

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,...,Minutes Played,Club,Season,Team,Placement,League,Champions League,Domestic Cup,TOTY,TOTY_Nominee
0,oier olazabal,18,Goalkeeper,0,Spain,2,1,0,0,0,...,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
1,victor valdes,26,Goalkeeper,1,Spain,52,49,0,0,3,...,4410.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
2,jose manuel pinto,32,Goalkeeper,13,Spain,51,11,0,0,1,...,990.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
3,albert jorquera,29,Goalkeeper,25,Spain,20,1,0,0,0,...,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
4,andreu fontas,18,Centre-Back,0,Spain,1,0,0,0,0,...,0.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
5,alberto botia,19,Centre-Back,0,Spain,5,1,0,0,0,...,27.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
6,martin caceres,21,Centre-Back,2,Uruguay,51,23,0,0,3,...,1523.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
7,marc muniesa,16,Centre-Back,0,Spain,2,1,0,0,0,...,31.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
8,gerard pique,21,Centre-Back,3,Spain,50,45,3,1,7,...,3932.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
9,rafa marquez,29,Centre-Back,4,Mexico,41,37,3,3,7,...,3020.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0


In [79]:
comprehensive_df.to_csv('comprehensive_df.csv', index=False)

In [68]:
#replace "Not in squad during this season" with 0
comprehensive_df['Appearances'] = comprehensive_df['Appearances'].replace("Not in squad during this season", 0)

#convert the appearances column to numeric
comprehensive_df['Appearances'] = pd.to_numeric(comprehensive_df['Appearances'])

In [72]:
#convert specified columns to numeric values
numeric_columns = ['Age', 'In Squad', 'Goals', 'Assists', 'Yellow Cards', 'Second Yellows', 'Straight Reds', 'Substituted On', 'Substituted Off', 'PPG', 'Placement']

for col in numeric_columns:
    comprehensive_df[col] = pd.to_numeric(comprehensive_df[col], errors='coerce')

In [73]:
comprehensive_vague_positions_df = comprehensive_df.copy()

#function to categorize positions
def categorize_position(position):
    if "Back" in position or "Defender" in position:
        return "Defender"
    elif "Midfield" in position:
        return "Midfielder"
    elif "Goalkeeper" in position:
        return "Goalkeeper"
    else:
        return "Attacker"

#apply the function to the position column
comprehensive_vague_positions_df['Position'] = comprehensive_vague_positions_df['Position'].apply(categorize_position)

In [74]:
comprehensive_vague_positions_df.head(20)

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,...,Minutes Played,Club,Season,Team,Placement,League,Champions League,Domestic Cup,TOTY,TOTY_Nominee
0,oier olazabal,18,Goalkeeper,0,Spain,2,1,0,0,0,...,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
1,victor valdes,26,Goalkeeper,1,Spain,52,49,0,0,3,...,4410.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
2,jose manuel pinto,32,Goalkeeper,13,Spain,51,11,0,0,1,...,990.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
3,albert jorquera,29,Goalkeeper,25,Spain,20,1,0,0,0,...,90.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
4,andreu fontas,18,Defender,0,Spain,1,0,0,0,0,...,0.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
5,alberto botia,19,Defender,0,Spain,5,1,0,0,0,...,27.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
6,martin caceres,21,Defender,2,Uruguay,51,23,0,0,3,...,1523.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
7,marc muniesa,16,Defender,0,Spain,2,1,0,0,0,...,31.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
8,gerard pique,21,Defender,3,Spain,50,45,3,1,7,...,3932.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0
9,rafa marquez,29,Defender,4,Mexico,41,37,3,3,7,...,3020.0,FC Barcelona,2008/2009,fc-barcelona,1,laliga,1,1,0,0


In [82]:
comprehensive_vague_positions_df.dtypes

Player               object
Age                   int64
Position             object
Kit Number           object
Nationality          object
In Squad              int64
Appearances           int64
Goals                 int64
Assists               int64
Yellow Cards          int64
Second Yellows        int64
Straight Reds         int64
Substituted On        int64
Substituted Off       int64
PPG                 float64
Minutes Played      float64
Club                 object
Season               object
Team                 object
Placement             int64
League               object
Champions League     object
Domestic Cup          int64
TOTY                  int64
TOTY_Nominee          int64
dtype: object

In [80]:
#check for NaN values in the df
print("NaN values in each column:")
print(comprehensive_vague_positions_df.isnull().sum())

NaN values in each column:
Player                 0
Age                    0
Position               0
Kit Number             0
Nationality            0
In Squad               0
Appearances            0
Goals                  0
Assists                0
Yellow Cards           0
Second Yellows         0
Straight Reds          0
Substituted On         0
Substituted Off        0
PPG                 2371
Minutes Played         0
Club                   0
Season                 0
Team                   0
Placement              0
League                 0
Champions League       0
Domestic Cup           0
TOTY                   0
TOTY_Nominee           0
dtype: int64


In [89]:
def update_player_name(appearances, goals, assists, placement, season_str, new_player_name):
    # Locate the row that matches the criteria
    row_index = comprehensive_vague_positions_df[
        (comprehensive_vague_positions_df['Appearances'] == appearances) & 
        (comprehensive_vague_positions_df['Goals'] == goals) & 
        (comprehensive_vague_positions_df['Assists'] == assists) & 
        (comprehensive_vague_positions_df['Placement'] == placement) & 
        (comprehensive_vague_positions_df['Season'] == season_str)
    ].index
    
    # Check if the row exists
    if not row_index.empty:
        # Update the 'Player' feature for the matching row
        comprehensive_vague_positions_df.loc[row_index, 'Player'] = new_player_name
        print(f"Updated player name to {new_player_name} for season {season_str}.")
    else:
        print(f"No matching entry found for season {season_str} with the given criteria.")

#test
update_player_name(
    appearances=26, 
    goals=2, 
    assists=5, 
    placement=1, 
    season_str='2022/2023', 
    new_player_name='joao cancelo'
)

Updated player name to joao cancelo for season 2022/2023.


In [93]:
#function to find players based on input criteria
def find_players(appearances, goals, assists, placement, season, df):
    #filter the df based on the input criteria
    filtered_df = df[
        (df['Appearances'] == appearances) & 
        (df['Goals'] == goals) & 
        (df['Assists'] == assists) & 
        (df['Placement'] == placement) & 
        (df['Season'] == season)
    ]
    
    #return the filtered df with all original columns
    return filtered_df

# Example usage
example_appearances = 48
example_goals = 11
example_assists = 11
example_placement = 2
example_season = '2023/2024'

result = find_players(example_appearances, example_goals, example_assists, example_placement, example_season, comprehensive_vague_positions_df)
result

Unnamed: 0,Player,Age,Position,Kit Number,Nationality,In Squad,Appearances,Goals,Assists,Yellow Cards,...,Minutes Played,Club,Season,Team,Placement,League,Champions League,Domestic Cup,TOTY,TOTY_Nominee
22469,martin degaard,24,Midfielder,8,Norway,49,48,11,11,2,...,4053.0,Arsenal FC,2023/2024,fc-arsenal,2,premier-league,0,0,0,1


In [94]:
comprehensive_vague_positions_df.to_csv('comprehensive_vague_positions_df.csv', index=False)