## Golden Boy Award
The Golden Boy Award is given to the most promosing player under 21 years. The winner is determined based on the amount of votes given by football journalists around the world. The winners from the previous years are:

- 2018: M. de Ligt
- 2019: J. Felix
- 2020: E. Haaland
- 2021: Pedri
- 2022: Gavi
- 2023: t.b.a.

## Young Players with the Largest Potential Deficit and Financial Analysis

Players sorted based on the difference between the potential score and current score.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.transform import linear_cmap
from bokeh.palettes import Viridis256

In [2]:
folder_name = "./archive"
file_directory = os.listdir(folder_name)

In [3]:
# required_columns = {'ID', 
#                     'Flag', 
#                     'Potential', 
#                     'Nationality', 
#                     'Weak Foot', 
#                     'LongPassing', 
#                     'FKAccuracy', 
#                     'Acceleration', 
#                     'LongShots', 
#                     'Interceptions', 
#                     'GKKicking', 
#                     'Preferred Foot', 
#                     'Curve', 
#                     'Jersey Number', 
#                     'ShortPassing', 
#                     'Jumping', 
#                     'Balance', 
#                     'Body Type', 
#                     'SlidingTackle', 
#                     'Work Rate', 
#                     'Photo', 
#                     'StandingTackle', 
#                     'Value', 
#                     'Wage', 
#                     'Position', 
#                     'Overall', 
#                     'Club Logo', 
#                     'Loaned From', 
#                     'Aggression', 
#                     'Dribbling', 
#                     'Contract Valid Until', 
#                     'Reactions', 
#                     'Strength', 
#                     'International Reputation', 
#                     'Volleys', 
#                     'GKReflexes', 
#                     'Best Overall Rating', 
#                     'Stamina', 
#                     'Club', 
#                     'Best Position', 
#                     'Agility', 
#                     'Release Clause', 
#                     'Name', 
#                     'Real Face', 
#                     'GKHandling', 
#                     'GKDiving', 
#                     'Penalties', 
#                     'HeadingAccuracy', 
#                     'Height', 
#                     'Skill Moves', 
#                     'Weight', 
#                     'Positioning', 
#                     'Crossing', 
#                     'ShotPower', 
#                     'Finishing', 
#                     'Special', 
#                     'Marking', 
#                     'GKPositioning', 
#                     'Vision', 
#                     'Joined', 
#                     'Age', 
#                     'BallControl', 
#                     'Composure', 
#                     'SprintSpeed', 
#                     'DefensiveAwareness'}

In [4]:
required_columns = {'ID', 
                    'Potential', 
                    'Nationality', 
                    'Preferred Foot', 
                    'Value', 
                    'Wage', 
                    'Position', 
                    'Overall', 
                    'Contract Valid Until', 
                    'Club', 
                    'Release Clause', 
                    'Name', 
                    'Joined', 
                    'Age'
                   }


df_lst = []

for filename in file_directory:
    fifa_version = filename.split("_")[0]
    
    df_temp = pd.read_csv(f"{folder_name}/{filename}")
    if required_columns.issubset(set(df_temp.columns)):
        df_lst.append(filename)   

        
print(df_lst)

['FIFA18_official_data.csv', 'FIFA19_official_data.csv', 'FIFA20_official_data.csv', 'FIFA21_official_data.csv', 'FIFA22_official_data.csv', 'FIFA23_official_data.csv']


In [5]:
def pot_fig(df_21):
    # Create a Bokeh ColumnDataSource from the DataFrame
    source = ColumnDataSource(df_21)

    # Define a colormap for the Age values
    mapper = linear_cmap(field_name='Age', palette=Viridis256, low=df_21['Age'].min(), high=df_21['Age'].max())

    # Create a figure
    p = figure(title="Player potential growth", x_axis_label="Overall score", y_axis_label="Potential")

    # Add scatter glyphs
    scatter = p.circle(x="Overall", y="Potential", source=source, size=10, color=mapper, legend_field="Age")

    # Add a horizontal line for mean Potential
    mean_line = df_21["Potential"].mean()
    p.line([df_21["Overall"].min(), df_21["Overall"].max()], [mean_line, mean_line], color="red", line_dash="dashed", legend_label="Mean Potential")

    # Add a HoverTool
    hover = HoverTool()
    hover.tooltips = [("Name", "@Name"), ("Age", "@Age"), ("Potential", "@Potential"), ("Currently", "@Overall")]
    p.add_tools(hover)

    # Add legend to the top left corner
    p.legend.title = "Age"
    p.legend.label_text_font_size = "10pt"
    p.legend.location = "top_left"

    # Specify the output HTML file
    output_file(f"fifa{str(version)}.html")

    # Save and show the plot
    return show(p)


def val_fig(df_val):
    # Create a Bokeh ColumnDataSource from the DataFrame
    source = ColumnDataSource(df_val)

    # Define a colormap for the 'Difference' values
    mapper = linear_cmap(field_name='Age', palette=Viridis256, low=df_val['Age'].min(), high=df_val['Age'].max())

    # Create a figure
    p = figure(title="Valuation to potential of U-21 players", x_axis_label="Relative value to release clause ratio", y_axis_label="Difference between potential and current")

    # Add scatter glyphs
    scatter = p.circle(x="Value Difference Percentage", y="Difference", source=source, size=10, color=mapper, legend_field="Age")

    # Add a horizontal line for mean Difference
    mean_line = df_val["Difference"].mean()
    p.line([df_val["Value Difference Percentage"].min(), df_val["Value Difference Percentage"].max()], [mean_line, mean_line], color="red", line_dash="dashed", legend_label="Mean Difference")

    # Add a HoverTool
    hover = HoverTool()
    hover.tooltips = [("Name", "@{Name}"), ("Value", "@Value"), ("Age", "@Age"), ("Overall score", "@Overall"), ("Potential", "@Potential")]
    p.add_tools(hover)

    # Add legend to the top left corner
    p.legend.title = "Age"
    p.legend.label_text_font_size = "10pt"
    p.legend.location = "top_left"

    # Specify the output HTML file
    output_file(f"fifa_val_to_pot{str(version)}.html")

    # Save and show the plot
    return show(p)

In [6]:
for filename in df_lst:
    version = filename[4:6]
    df = pd.read_csv(f"./archive/FIFA{str(version)}_official_data.csv", usecols=required_columns).dropna()
    df_21 = df[(df["Age"] < 21)]# & (df["Potential"] >= 85)]
    df_21["Difference"] = df_21["Potential"] - df_21["Overall"]
    ranked_diff = df_21.sort_values("Difference", ascending=False)

    df_val = ranked_diff
    df_val["Value"] = ranked_diff["Value"].replace({"€": "", "K":"*1e3", "M":"*1e6"}, regex=True).map(pd.eval).astype(int)
    df_val["Release Clause"] = ranked_diff["Release Clause"].replace({"€": "", "K":"*1e3", "M":"*1e6"}, regex=True).map(pd.eval).astype(int)
    df_val["Wage"] = ranked_diff["Wage"].replace({"€": "", "K":"*1e3", "M":"*1e6"}, regex=True).map(pd.eval).astype(int)
    df_val["Value Difference"] = ranked_diff["Release Clause"] - ranked_diff["Value"]
    df_val["Value Difference Percentage"] = ranked_diff["Release Clause"]/(ranked_diff["Value"])
    
    pot_fig(df_21)
    val_fig(df_val)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_21["Difference"] = df_21["Potential"] - df_21["Overall"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_21["Difference"] = df_21["Potential"] - df_21["Overall"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_21["Difference"] = df_21["Potential"] - df_21["Overall"]
A value is trying to be s