In [107]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("fifa23.csv", sep=';', encoding='utf-8')
df_field_players = df[df['Best Position'].str.contains('GK') == False]


In [108]:
stats = ['Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
    'Dribbling', 'Curve', 'Freekick Accuracy', 'LongPassing', 'BallControl',
    'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
    'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots',
    'Aggression', 'Interceptions', 'Positioning', 'Vision',
    'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle']

for col in stats:
    df_field_players[col] = df_field_players[col] / df['Overall']

features = [
    'Height(in cm)', 'Weight(in kg)',
    'Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
    'Dribbling', 'Curve', 'Freekick Accuracy', 'LongPassing', 'BallControl',
    'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
    'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots',
    'Aggression', 'Interceptions', 'Positioning', 'Vision',
    'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle'
]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [109]:
position_groups = {
    'CB': 'DEF', 'LB': 'DEF', 'RB': 'DEF', 'LWB': 'DEF', 'RWB': 'DEF',
    'CDM': 'MID', 'CM': 'MID', 'CAM': 'MID', 'LM': 'MID', 'RM': 'MID',
    'ST': 'FWD', 'CF': 'FWD', 'LW': 'FWD', 'RW': 'FWD'
}

df_field_players['Position Group'] = df_field_players['Best Position'].map(position_groups)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Defensas

In [110]:
df_defenders = df_field_players[df_field_players['Position Group'] == 'DEF'].copy()

print(f"Total defenders: {df_defenders.shape[0]}")

defense_features = [
    'Height(in cm)', 'Weight(in kg)',
    'Crossing', 'Heading Accuracy', 'Short Passing',
    'Dribbling', 'LongPassing', 'BallControl',
    'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
    'Jumping', 'Stamina', 'Strength',
    'Aggression', 'Interceptions', 'Positioning',
    'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle'
]

X_defenders = df_defenders[defense_features].dropna()

scaler = StandardScaler()
X_defenders_scaled = scaler.fit_transform(X_defenders)

kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
kmeans.fit(X_defenders_scaled)

df_defenders['cluster'] = kmeans.labels_

cluster_means = df_defenders.groupby('cluster')[defense_features].mean()
print(cluster_means)


Total defenders: 6273
         Height(in cm)  Weight(in kg)  Crossing  Heading Accuracy  \
cluster                                                             
0           184.569775      77.924436  0.772126          0.970801   
1           189.170897      82.683587  0.564193          1.012869   
2           185.986882      78.422805  0.557086          0.974701   
3           178.100924      72.318408  0.963614          0.822647   
4           177.100760      70.941065  0.921653          0.796184   

         Short Passing  Dribbling  LongPassing  BallControl  Acceleration  \
cluster                                                                     
0             0.958343   0.839535     0.895835     0.918024      0.958454   
1             0.901447   0.672451     0.829791     0.830181      0.723861   
2             0.811345   0.620383     0.682849     0.754252      1.004760   
3             0.952262   0.947087     0.868876     0.956607      1.072662   
4             0.882283   0.93150

In [111]:
archetype_names = {
    0: "Ball-Playing CB",
    1: "Physical Tower CB",
    2: "Aggressive Fast CB",
    3: "Mix Full-Back",
    4: "Attacking Wing-Back"
}

df_defenders['archetype'] = df_defenders['cluster'].map(archetype_names)

for archetype, group in df_defenders.groupby('archetype'):
    print(f"\n🔹 {archetype}")
    display(group[['Known As', 'Best Position', 'Overall', 'Height(in cm)']].head(5))


🔹 Aggressive Fast CB


Unnamed: 0,Known As,Best Position,Overall,Height(in cm)
308,Heberto Gaúcho,CB,81,181
543,Ibañez,CB,79,185
844,J. Aidoo,CB,78,184
1269,Antonaldinho,CB,76,188
1319,Igor,CB,76,185



🔹 Attacking Wing-Back


Unnamed: 0,Known As,Best Position,Overall,Height(in cm)
1209,L. Advíncula,RB,76,180
1851,R. Henry,LWB,75,170
1926,Luis Pérez,RB,74,174
2055,R. Cannon,RWB,74,180
2074,F. Sacko,RB,74,179



🔹 Ball-Playing CB


Unnamed: 0,Known As,Best Position,Overall,Height(in cm)
25,Marquinhos,CB,88,183
46,D. Alaba,CB,86,180
91,L. Hernández,CB,84,184
129,J. Koundé,CB,84,181
133,Éder Militão,CB,84,186



🔹 Mix Full-Back


Unnamed: 0,Known As,Best Position,Overall,Height(in cm)
24,João Cancelo,LB,88,182
39,A. Robertson,LB,87,178
69,K. Walker,RB,85,183
71,Jordi Alba,LB,85,170
77,M. Acuña,LB,85,172



🔹 Physical Tower CB


Unnamed: 0,Known As,Best Position,Overall,Height(in cm)
9,V. van Dijk,CB,90,193
20,Rúben Dias,CB,88,187
30,A. Rüdiger,CB,87,190
40,K. Koulibaly,CB,87,187
50,Thiago Silva,CB,86,183


In [112]:
radar_features = [f for f in defense_features if f not in ['Height(in cm)', 'Weight(in kg)']]

fig = sp.make_subplots(
    rows=2, cols=3,  # 2 filas x 3 columnas (la última quedará vacía)
    specs=[[{'type': 'polar'}, {'type': 'polar'}, {'type': 'polar'}],
           [{'type': 'polar'}, {'type': 'polar'}, None]],
    subplot_titles=list(archetype_names.values())
)

row_col_map = [(1,1), (1,2), (1,3), (2,1), (2,2)]

for i, (cluster_id, archetype) in enumerate(archetype_names.items()):
    row, col = row_col_map[i]
    fig.add_trace(
        go.Scatterpolar(
            r=cluster_means.loc[cluster_id, radar_features].values,
            theta=radar_features,
            fill='toself',
            name=archetype,
        ),
        row=row, col=col
    )

fig.update_layout(
    height=800, width=1200,
    title_text="FIFA23 Defender Archetypes (Radar Charts)",
    showlegend=False
)

fig.show()

## Mediocentros


In [113]:
df_midfielders = df_field_players[df_field_players['Position Group'] == 'MID'].copy()

print(f"Total midfielders: {df_midfielders.shape[0]}")

midfield_features = [
    'Height(in cm)', 'Weight(in kg)',
    'Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
    'Dribbling','Curve','Freekick Accuracy', 'LongPassing', 'BallControl',
    'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
    'Jumping', 'Stamina', 'Strength','Long Shots',
    'Aggression', 'Interceptions', 'Positioning', 'Vision',
    'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle'
]

X_midfielders = df_midfielders[midfield_features].dropna()

scaler = StandardScaler()
X_midfielders_scaled = scaler.fit_transform(X_midfielders)


kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
kmeans.fit(X_midfielders_scaled)

df_midfielders['cluster'] = kmeans.labels_

cluster_means = df_midfielders.groupby('cluster')[midfield_features].mean()
print(cluster_means)


Total midfielders: 7058
         Height(in cm)  Weight(in kg)  Crossing  Finishing  Heading Accuracy  \
cluster                                                                        
0           176.794399      70.759027  0.933574   0.930306          0.709247   
1           181.160889      75.130804  0.918524   0.869726          0.823792   
2           181.272081      74.837563  0.752762   0.675077          0.856674   
3           176.096467      69.503085  0.863460   0.818207          0.781583   
4           175.514957      69.266382  0.906118   0.915162          0.686120   

         Short Passing   Volleys  Dribbling     Curve  Freekick Accuracy  ...  \
cluster                                                                   ...   
0             1.004064  0.855993   1.040336  0.964757           0.908211  ...   
1             1.033834  0.818292   0.985538  0.940150           0.891473  ...   
2             1.025079  0.601332   0.906574  0.710399           0.684608  ...   
3         

In [114]:
archetype_names = {
    0: "Attacking Midfielder",
    1: "Box-to-Box Midfielder",
    2: "Holding Midfielder",
    3: "Mobile Playmaker",
    4: "Attacking Wide Midfielder"
}

df_midfielders['archetype'] = df_midfielders['cluster'].map(archetype_names)

for archetype, group in df_midfielders.groupby('archetype'):
    print(f"\n🔹 {archetype}")
    display(group[['Known As', 'Best Position', 'Overall', 'Short Passing']].head(5))


🔹 Attacking Midfielder


Unnamed: 0,Known As,Best Position,Overall,Short Passing
0,L. Messi,CAM,91,1.0
15,S. Mané,LM,89,0.94382
23,Bernardo Silva,CAM,88,1.011364
49,P. Dybala,CAM,86,1.0
56,K. Coman,LM,86,0.930233



🔹 Attacking Wide Midfielder


Unnamed: 0,Known As,Best Position,Overall,Short Passing
126,M. Diaby,RM,84,0.952381
244,C. Pulisic,RM,82,0.939024
369,A. Saint-Maximin,CAM,81,0.938272
415,N. González,LM,80,1.0
537,S. Chukwueze,LM,79,0.924051



🔹 Box-to-Box Midfielder


Unnamed: 0,Known As,Best Position,Overall,Short Passing
3,K. De Bruyne,CM,91,1.021978
17,J. Kimmich,CDM,89,0.977528
28,T. Kroos,CM,88,1.056818
29,L. Modrić,CM,88,1.034091
34,T. Müller,CAM,87,0.977011



🔹 Holding Midfielder


Unnamed: 0,Known As,Best Position,Overall,Short Passing
13,Casemiro,CDM,89,0.94382
19,N. Kanté,CDM,89,0.921348
31,Rodri,CDM,87,0.988506
32,Fabinho,CDM,87,0.965517
72,Sergio Busquets,CDM,85,1.023529



🔹 Mobile Playmaker


Unnamed: 0,Known As,Best Position,Overall,Short Passing
36,M. Verratti,CM,87,1.034483
82,Pedri,CM,85,1.023529
141,A. Davies,LM,84,0.97619
233,N. Mazraoui,RM,82,1.012195
360,S. Lobotka,CM,81,1.049383


In [115]:
radar_features = [f for f in defense_features if f not in ['Height(in cm)', 'Weight(in kg)']]

fig = sp.make_subplots(
    rows=2, cols=3,
    specs=[[{'type': 'polar'}, {'type': 'polar'}, {'type': 'polar'}],
           [{'type': 'polar'}, {'type': 'polar'}, None]],
    subplot_titles=list(archetype_names.values())
)

row_col_map = [(1,1), (1,2), (1,3), (2,1), (2,2)]

for i, (cluster_id, archetype) in enumerate(archetype_names.items()):
    row, col = row_col_map[i]
    fig.add_trace(
        go.Scatterpolar(
            r=cluster_means.loc[cluster_id, radar_features].values,
            theta=radar_features,
            fill='toself',
            name=archetype,
        ),
        row=row, col=col
    )

fig.update_layout(
    height=800, width=1200,
    title_text="FIFA23 Midfielder Archetypes (Radar Charts)",
    showlegend=False
)

fig.show()

## Delanteros

In [119]:
df_forwards = df_field_players[df_field_players['Position Group'] == 'FWD'].copy()

print(f"Total forwards: {df_forwards.shape[0]}")

forward_features = [
    'Height(in cm)', 'Weight(in kg)',
    'Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
    'Dribbling','Curve','Freekick Accuracy', 'LongPassing', 'BallControl',
    'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
    'Jumping', 'Stamina', 'Strength','Long Shots',
    'Positioning', 'Vision'
]

X_forwards = df_forwards[forward_features].dropna()

scaler = StandardScaler()
X_forwards_scaled = scaler.fit_transform(X_forwards)


kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
kmeans.fit(X_forwards_scaled)

df_forwards['cluster'] = kmeans.labels_

cluster_means = df_forwards.groupby('cluster')[forward_features].mean()
cluster_means.to_csv("Delanteros.csv")
print(cluster_means)


Total forwards: 3147
         Height(in cm)  Weight(in kg)  Crossing  Finishing  Heading Accuracy  \
cluster                                                                        
0           174.853598      68.890819  0.928117   0.947800          0.703205   
1           181.486674      75.943221  0.856949   1.011295          0.945083   
2           188.992683      83.836585  0.678244   1.033331          1.059985   
3           185.660811      79.186486  0.602329   1.035259          1.017969   
4           179.666211      73.257182  0.753157   1.013428          0.915026   

         Short Passing   Volleys  Dribbling     Curve  Freekick Accuracy  ...  \
cluster                                                                   ...   
0             0.943352  0.854173   1.052271  0.916910           0.811358  ...   
1             0.951984  0.934738   0.996945  0.889308           0.814633  ...   
2             0.913015  0.953298   0.925381  0.774844           0.728813  ...   
3            

In [121]:
archetype_names = {
    0: "Explosive Winger",
    1: "Complete Forward",
    2: "Target Man",
    3: "Poacher",
    4: "Second Striker"
}

df_forwards['archetype'] = df_forwards['cluster'].map(archetype_names)

for archetype, group in df_forwards.groupby('archetype'):
    print(f"\n🔹 {archetype}")
    display(group[['Known As', 'Best Position', 'Overall', 'Short Passing']].head(5))


🔹 Complete Forward


Unnamed: 0,Known As,Best Position,Overall,Short Passing
1,K. Benzema,CF,91,0.978022
4,K. Mbappé,ST,91,0.934066
5,M. Salah,RW,90,0.933333
12,H. Son,LW,89,0.94382
59,L. Martínez,ST,86,0.883721



🔹 Explosive Winger


Unnamed: 0,Known As,Best Position,Overall,Short Passing
11,Neymar Jr,LW,89,0.955056
47,R. Sterling,LW,86,0.965116
48,R. Mahrez,RW,86,0.965116
63,Vinícius Jr.,LW,86,0.872093
95,D. Mertens,CF,84,1.0



🔹 Poacher


Unnamed: 0,Known As,Best Position,Overall,Short Passing
166,V. Osimhen,ST,83,0.86747
474,D. Calvert-Lewin,ST,80,0.8875
494,A. Belotti,ST,80,0.85
539,Beto,ST,79,0.848101
545,B. Dia,ST,79,0.936709



🔹 Second Striker


Unnamed: 0,Known As,Best Position,Overall,Short Passing
754,J. Martínez,ST,78,0.858974
1009,P. Daka,ST,77,0.883117
1230,E. Dennis,ST,76,0.855263
1258,E. Boateng,ST,76,0.868421
1460,A. Elis,ST,75,0.866667



🔹 Target Man


Unnamed: 0,Known As,Best Position,Overall,Short Passing
2,R. Lewandowski,ST,91,0.923077
8,Cristiano Ronaldo,ST,90,0.888889
10,H. Kane,ST,89,0.94382
21,E. Haaland,ST,88,0.840909
44,C. Immobile,ST,86,0.918605


In [122]:
radar_features = [f for f in forward_features if f not in ['Height(in cm)', 'Weight(in kg)']]

fig = sp.make_subplots(
    rows=2, cols=3,
    specs=[[{'type': 'polar'}, {'type': 'polar'}, {'type': 'polar'}],
           [{'type': 'polar'}, {'type': 'polar'}, None]],
    subplot_titles=list(archetype_names.values())
)

row_col_map = [(1,1), (1,2), (1,3), (2,1), (2,2)]

for i, (cluster_id, archetype) in enumerate(archetype_names.items()):
    row, col = row_col_map[i]
    fig.add_trace(
        go.Scatterpolar(
            r=cluster_means.loc[cluster_id, radar_features].values,
            theta=radar_features,
            fill='toself',
            name=archetype,
        ),
        row=row, col=col
    )

fig.update_layout(
    height=800, width=1200,
    title_text="FIFA23 Forward Archetypes (Radar Charts)",
    showlegend=False
)

fig.show()