# Notebook to update manually edited player_country data

In [65]:
import pandas as pd
from unidecode import unidecode
from pathlib import Path

get ids of unmatched players from processed data file.

In [66]:
processed_df = pd.read_csv('../data/processed/euro24_players_country_draft.csv')

In [67]:
processed_df[processed_df['url'].isna()]

Unnamed: 0,player_id,Players,Price,Total pts,Selected,MD pts,Pts per €,Pts per MD,PotM pts,Goals,...,Name,Country,url,Active,Years Played,Position_y,Clubs Played,IOC,index,name_deaccented
123,123,G. Sudakov,6.5,0,0,0,0,0,0,0,...,,,,,,,,,123,
230,230,Tosun,6.0,0,0,0,0,0,0,0,...,,,,,,,,,230,
238,238,V. Tsygankov,6.0,0,0,0,0,0,0,0,...,,,,,,,,,238,
348,348,J. Brunn Larsen,5.5,0,0,0,0,0,0,0,...,,,,,,,,,348,
358,358,O. Pikhalonok,5.5,0,0,0,0,0,0,0,...,,,,,,,,,358,
578,578,G. Tsitaishvili,5.0,0,0,0,0,0,0,0,...,,,,,,,,,578,
586,586,Uçan,5.0,0,0,0,0,0,0,0,...,,,,,,,,,586,
593,593,I. Konoplianka,5.0,0,0,0,0,0,0,0,...,,,,,,,,,593,
651,651,S. Lobjanidze,4.5,0,0,0,0,0,0,0,...,,,,,,,,,651,
655,655,V. Vladimer,4.5,0,0,0,0,0,0,0,...,,,,,,,,,655,


In [68]:
unmatched_player_id_lst = processed_df[processed_df['url'].isna()]['player_id'].tolist()

update value in clean data, using fbref_url column to join with collected player data in the `data/processed/players` directory.

In [69]:
clean_df = pd.read_csv('../data/clean/euro24_players_country_LOOKUP.csv')

In [70]:
unmatched_player_filt = clean_df['player_id'].isin(unmatched_player_id_lst)
clean_df[unmatched_player_filt]

Unnamed: 0,player_id,Players,Price,img_url,Next Match,Position_fnt,Name_fbref,Country,Position_fbref,Clubs Played,Years Played,fbref_url,name_deaccented
123,123,G. Sudakov,6.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,MID,Heorhiy Sudakov,UKR,MF,Shakhtar Donetsk,2020-2024,/en/players/a793f5a8/Heorhiy-Sudakov,
230,230,Tosun,6.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,FWD,Cenk Tosun,TUR,FW,Besiktas,2009-2024,/en/players/9a104dee/Cenk-Tosun,Cenk Tosun
238,238,V. Tsygankov,6.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,FWD,Viktor Tsyhankov,UKR,"FW, MF",Girona,2016-2024,/en/players/f7e87032/Viktor-Tsyhankov,Viktor Tsyhankov
348,348,J. Brunn Larsen,5.5,https://gaming.uefa.com/en/eurofantasy/static-...,SVN v DEN,MID,Jacob Bruun Larsen,DEN,,,,/en/players/4e204552/Jacob-Bruun-Larsen,
358,358,O. Pikhalonok,5.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,MID,Oleksandr Pikhalyonok,UKR,,"SC Dnipro-1, Ukraine, FC Mariupol, Shakhtar Do...",,/en/players/04e2af21/Oleksandr-Pikhalyonok,
578,578,G. Tsitaishvili,5.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,GEO,,,,/en/players/69986380/Heorhii-Tsitaishvili,
586,586,Uçan,5.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,TUR,,,,/en/players/fe154b26/Salih-Ucan,
593,593,I. Konoplianka,5.0,https://gaming.uefa.com/en/eurofantasy/static-...,ROU v UKR,MID,Yevhen Konoplyanka,UKR,"FW,MF","Ukraine, CFR Cluj, Dnipro Dnipropetrovsk, Scha...",2007-2024,/en/players/d9a5d930/Yevhen-Konoplyanka,Yevhen Konoplyanka
651,651,S. Lobjanidze,4.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,GEO,,,,/en/players/84d6ce13/Saba-Lobzhanidze,
655,655,V. Vladimer,4.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,GEO,,,,/en/players/98b5ed87/Vladimer-Mamuchashvili,


In [71]:
def read_active_players_in_countries(country_lst, verbose=False):
    main_dir = Path.cwd().resolve().parents[0]
    out_path = main_dir / "data" / "processed" / "players"

    res = {}
    for cnt_code in country_lst:
        if verbose:
            print(f"Reading player data for country {cnt_code}")
        try:
            res[cnt_code] = pd.read_csv(out_path / f"{cnt_code}_players.csv")
            res[cnt_code].dropna(subset=['Name'], inplace=True)
            res[cnt_code]['name_deaccented'] = res[cnt_code]['Name'].apply(lambda x: unidecode(x))
        except Exception as e:
            print(f"\t{e}")
    return res

cnt_codes_Lst = clean_df[unmatched_player_filt]['Country'].unique().tolist()
print(cnt_codes_Lst)
country_df_map = read_active_players_in_countries(cnt_codes_Lst, verbose=True)
# concatenate all the dataframes
country_df = pd.concat(country_df_map.values(), ignore_index=True)

['UKR', 'TUR', 'DEN', 'GEO', 'GER']
Reading player data for country UKR
Reading player data for country TUR
Reading player data for country DEN
Reading player data for country GEO
Reading player data for country GER


In [72]:
country_df.head()

Unnamed: 0,Name,url,Active,Years Played,Position,Clubs Played,IOC,name_deaccented
0,Abdulla Abdullaiev,/en/players/d57bf319/Abdulla-Abdullaiev,True,2022-2024,FW,FK Metalist 1925 Kharkiv,UKR,Abdulla Abdullaiev
1,Yehor Abramov,/en/players/b7290e03/Yehor-Abramov,True,2022-2023,MF,,UKR,Yehor Abramov
2,Volodymyr Adamyuk,/en/players/eeba2b90/Volodymyr-Adamyuk,True,2015-2024,DF,"SC Dnipro-1, FK L'viv, FC Dnipro, Veres Rivne,...",UKR,Volodymyr Adamyuk
3,Daniil Alefirenko,/en/players/05ccb628/Daniil-Alefirenko,True,2020-2024,MF,"Zorya Luhansk, Chornomorets Odesa",UKR,Daniil Alefirenko
4,Akhmed Alibekov,/en/players/9deaa88c/Akhmed-Alibekov,True,2017-2023,MF,"Slovan Liberec, FC Lviv, Dynamo Kyiv, Zorya Lu...",UKR,Akhmed Alibekov


In [73]:
join_df = pd.merge(clean_df[unmatched_player_filt], country_df, how='left', left_on='fbref_url', right_on='url')
join_df

Unnamed: 0,player_id,Players,Price,img_url,Next Match,Position_fnt,Name_fbref,Country,Position_fbref,Clubs Played_x,...,fbref_url,name_deaccented_x,Name,url,Active,Years Played_y,Position,Clubs Played_y,IOC,name_deaccented_y
0,123,G. Sudakov,6.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,MID,Heorhiy Sudakov,UKR,MF,Shakhtar Donetsk,...,/en/players/a793f5a8/Heorhiy-Sudakov,,Heorhiy Sudakov,/en/players/a793f5a8/Heorhiy-Sudakov,True,2020-2024,MF,"Shakhtar Donetsk, Ukraine",UKR,Heorhiy Sudakov
1,230,Tosun,6.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,FWD,Cenk Tosun,TUR,FW,Besiktas,...,/en/players/9a104dee/Cenk-Tosun,Cenk Tosun,Cenk Tosun,/en/players/9a104dee/Cenk-Tosun,True,2009-2024,"FW,MF","Beşiktaş, Türkiye, Gaziantepspor, Everton, Cry...",TUR,Cenk Tosun
2,238,V. Tsygankov,6.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,FWD,Viktor Tsyhankov,UKR,"FW, MF",Girona,...,/en/players/f7e87032/Viktor-Tsyhankov,Viktor Tsyhankov,Viktor Tsyhankov,/en/players/f7e87032/Viktor-Tsyhankov,True,2016-2024,"FW,MF","Girona, Ukraine, Dynamo Kyiv",UKR,Viktor Tsyhankov
3,348,J. Brunn Larsen,5.5,https://gaming.uefa.com/en/eurofantasy/static-...,SVN v DEN,MID,Jacob Bruun Larsen,DEN,,,...,/en/players/4e204552/Jacob-Bruun-Larsen,,Jacob Bruun Larsen,/en/players/4e204552/Jacob-Bruun-Larsen,True,2016-2024,"FW,MF","Burnley, Denmark, Hoffenheim, Borussia Dortmun...",DEN,Jacob Bruun Larsen
4,358,O. Pikhalonok,5.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,MID,Oleksandr Pikhalyonok,UKR,,"SC Dnipro-1, Ukraine, FC Mariupol, Shakhtar Do...",...,/en/players/04e2af21/Oleksandr-Pikhalyonok,,Oleksandr Pikhalyonok,/en/players/04e2af21/Oleksandr-Pikhalyonok,True,2016-2024,MF,"SC Dnipro-1, Ukraine, FC Mariupol, Shakhtar Do...",UKR,Oleksandr Pikhalyonok
5,578,G. Tsitaishvili,5.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,GEO,,,...,/en/players/69986380/Heorhii-Tsitaishvili,,Heorhii Tsitaishvili,/en/players/69986380/Heorhii-Tsitaishvili,True,2018-2024,MF,"Georgia, Dynamo Kyiv, Lech Poznań, Chornomoret...",GEO,Heorhii Tsitaishvili
6,586,Uçan,5.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,TUR,,,...,/en/players/fe154b26/Salih-Ucan,,Salih Uçan,/en/players/fe154b26/Salih-Ucan,True,2012-2024,MF,"Beşiktaş, Türkiye, Alanyaspor, Fenerbahçe, Sio...",TUR,Salih Ucan
7,593,I. Konoplianka,5.0,https://gaming.uefa.com/en/eurofantasy/static-...,ROU v UKR,MID,Yevhen Konoplyanka,UKR,"FW,MF","Ukraine, CFR Cluj, Dnipro Dnipropetrovsk, Scha...",...,/en/players/d9a5d930/Yevhen-Konoplyanka,Yevhen Konoplyanka,Yevhen Konoplyanka,/en/players/d9a5d930/Yevhen-Konoplyanka,True,2007-2024,"FW,MF","Ukraine, CFR Cluj, Dnipro Dnipropetrovsk, Scha...",UKR,Yevhen Konoplyanka
8,651,S. Lobjanidze,4.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,GEO,,,...,/en/players/84d6ce13/Saba-Lobzhanidze,,Saba Lobzhanidze,/en/players/84d6ce13/Saba-Lobzhanidze,True,2017-2024,"FW,MF","Georgia, Atlanta United, Randers, Hatayspor, A...",GEO,Saba Lobzhanidze
9,655,V. Vladimer,4.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,,GEO,,,...,/en/players/98b5ed87/Vladimer-Mamuchashvili,,Vladimer Mamuchashvili,/en/players/98b5ed87/Vladimer-Mamuchashvili,True,2021-2024,MF,Georgia,GEO,Vladimer Mamuchashvili


In [74]:
print(f"{join_df['url'].isna().sum()} players not matched")

0 players not matched


In [75]:
# prepare join_df for update
df_right = join_df[['fbref_url', 'Name', 'Position', 'Clubs Played_y', 'Years Played_y', 'name_deaccented_y', 'player_id']]
df_right = df_right.rename(
    columns={
        'Name': 'Name_fbref', 
        'Position': 'Position_fbref', 
        'Clubs Played_y': 'Clubs Played',
        'Years Played_y': 'years_played', 
        'name_deaccented_y': 'name_deaccented'
    }
)
df_right.set_index('player_id', inplace=True)

In [76]:
df_right

Unnamed: 0_level_0,fbref_url,Name_fbref,Position_fbref,Clubs Played,years_played,name_deaccented
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
123,/en/players/a793f5a8/Heorhiy-Sudakov,Heorhiy Sudakov,MF,"Shakhtar Donetsk, Ukraine",2020-2024,Heorhiy Sudakov
230,/en/players/9a104dee/Cenk-Tosun,Cenk Tosun,"FW,MF","Beşiktaş, Türkiye, Gaziantepspor, Everton, Cry...",2009-2024,Cenk Tosun
238,/en/players/f7e87032/Viktor-Tsyhankov,Viktor Tsyhankov,"FW,MF","Girona, Ukraine, Dynamo Kyiv",2016-2024,Viktor Tsyhankov
348,/en/players/4e204552/Jacob-Bruun-Larsen,Jacob Bruun Larsen,"FW,MF","Burnley, Denmark, Hoffenheim, Borussia Dortmun...",2016-2024,Jacob Bruun Larsen
358,/en/players/04e2af21/Oleksandr-Pikhalyonok,Oleksandr Pikhalyonok,MF,"SC Dnipro-1, Ukraine, FC Mariupol, Shakhtar Do...",2016-2024,Oleksandr Pikhalyonok
578,/en/players/69986380/Heorhii-Tsitaishvili,Heorhii Tsitaishvili,MF,"Georgia, Dynamo Kyiv, Lech Poznań, Chornomoret...",2018-2024,Heorhii Tsitaishvili
586,/en/players/fe154b26/Salih-Ucan,Salih Uçan,MF,"Beşiktaş, Türkiye, Alanyaspor, Fenerbahçe, Sio...",2012-2024,Salih Ucan
593,/en/players/d9a5d930/Yevhen-Konoplyanka,Yevhen Konoplyanka,"FW,MF","Ukraine, CFR Cluj, Dnipro Dnipropetrovsk, Scha...",2007-2024,Yevhen Konoplyanka
651,/en/players/84d6ce13/Saba-Lobzhanidze,Saba Lobzhanidze,"FW,MF","Georgia, Atlanta United, Randers, Hatayspor, A...",2017-2024,Saba Lobzhanidze
655,/en/players/98b5ed87/Vladimer-Mamuchashvili,Vladimer Mamuchashvili,MF,Georgia,2021-2024,Vladimer Mamuchashvili


In [77]:
# update clean df using join_df
clean_df.update(df_right, overwrite=True)

In [79]:
clean_df.head()

Unnamed: 0,player_id,Players,Price,img_url,Next Match,Position_fnt,Name_fbref,Country,Position_fbref,Clubs Played,Years Played,fbref_url,name_deaccented
0,0,K. Mbappé,11.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,AUT v FRA,FWD,Kylian Mbappé,FRA,"FW,MF","Paris Saint-Germain, France, Monaco",2015-2024,/en/players/42fd9c7f/Kylian-Mbappe,Kylian Mbappe
1,1,H. Kane,11.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,SRB v ENG,FWD,Harry Kane,ENG,FW,"Tottenham Hotspur, England, Bayern Munich, Mil...",2010-2024,/en/players/21a66f6a/Harry-Kane,Harry Kane
2,2,C. Ronaldo,10.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,POR v CZE,FWD,Cristiano Ronaldo,POR,"FW,MF","Real Madrid, Portugal, Manchester United, Al-N...",2002-2024,/en/players/dea698d9/Cristiano-Ronaldo,Cristiano Ronaldo
3,3,J. Bellingham,9.5,https://gaming.uefa.com/en/eurofantasy/static-...,SRB v ENG,MID,Jude Bellingham,ENG,MF,"Real Madrid, England, Dortmund, Birmingham City",2019-2024,/en/players/57d88cf9/Jude-Bellingham,Jude Bellingham
4,4,K. De Bruyne,9.5,https://gaming.uefa.com/en/eurofantasy/static-...,BEL v SVK,MID,Kevin De Bruyne,BEL,"FW,MF","Manchester City, Belgium, Genk, Wolfsburg, Wer...",2008-2024,/en/players/e46012d4/Kevin-De-Bruyne,Kevin De Bruyne


In [80]:
clean_df.loc[unmatched_player_filt]

Unnamed: 0,player_id,Players,Price,img_url,Next Match,Position_fnt,Name_fbref,Country,Position_fbref,Clubs Played,Years Played,fbref_url,name_deaccented
123,123,G. Sudakov,6.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,MID,Heorhiy Sudakov,UKR,MF,"Shakhtar Donetsk, Ukraine",2020-2024,/en/players/a793f5a8/Heorhiy-Sudakov,Heorhiy Sudakov
230,230,Tosun,6.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,FWD,Cenk Tosun,TUR,"FW,MF","Beşiktaş, Türkiye, Gaziantepspor, Everton, Cry...",2009-2024,/en/players/9a104dee/Cenk-Tosun,Cenk Tosun
238,238,V. Tsygankov,6.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,FWD,Viktor Tsyhankov,UKR,"FW,MF","Girona, Ukraine, Dynamo Kyiv",2016-2024,/en/players/f7e87032/Viktor-Tsyhankov,Viktor Tsyhankov
348,348,J. Brunn Larsen,5.5,https://gaming.uefa.com/en/eurofantasy/static-...,SVN v DEN,MID,Jacob Bruun Larsen,DEN,"FW,MF","Burnley, Denmark, Hoffenheim, Borussia Dortmun...",,/en/players/4e204552/Jacob-Bruun-Larsen,Jacob Bruun Larsen
358,358,O. Pikhalonok,5.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,ROU v UKR,MID,Oleksandr Pikhalyonok,UKR,MF,"SC Dnipro-1, Ukraine, FC Mariupol, Shakhtar Do...",,/en/players/04e2af21/Oleksandr-Pikhalyonok,Oleksandr Pikhalyonok
578,578,G. Tsitaishvili,5.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,Heorhii Tsitaishvili,GEO,MF,"Georgia, Dynamo Kyiv, Lech Poznań, Chornomoret...",,/en/players/69986380/Heorhii-Tsitaishvili,Heorhii Tsitaishvili
586,586,Uçan,5.0,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,Salih Uçan,TUR,MF,"Beşiktaş, Türkiye, Alanyaspor, Fenerbahçe, Sio...",,/en/players/fe154b26/Salih-Ucan,Salih Ucan
593,593,I. Konoplianka,5.0,https://gaming.uefa.com/en/eurofantasy/static-...,ROU v UKR,MID,Yevhen Konoplyanka,UKR,"FW,MF","Ukraine, CFR Cluj, Dnipro Dnipropetrovsk, Scha...",2007-2024,/en/players/d9a5d930/Yevhen-Konoplyanka,Yevhen Konoplyanka
651,651,S. Lobjanidze,4.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,Saba Lobzhanidze,GEO,"FW,MF","Georgia, Atlanta United, Randers, Hatayspor, A...",,/en/players/84d6ce13/Saba-Lobzhanidze,Saba Lobzhanidze
655,655,V. Vladimer,4.5,https://img.uefa.com/imgml/TP/players/3/2024/3...,TUR v GEO,MID,Vladimer Mamuchashvili,GEO,MF,Georgia,,/en/players/98b5ed87/Vladimer-Mamuchashvili,Vladimer Mamuchashvili


In [81]:
clean_df.to_csv('../data/clean/euro24_players_country_LOOKUP_v2.csv', index=False)