In [112]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "iframe"

In [113]:
df = pd.read_csv("../data/players_dataset_with_fixed_wage_and_overall_ranges.csv")

In [114]:
def map_positions(position):
    if position == 'GK':
        return 'GOALKEEPER'
    elif position in ['CB', 'RB', 'LB', 'RWB', 'LWB']:
        return 'DEFENDER'
    elif position in ['CM', 'CDM', 'CAM', 'RM', 'LM']:
        return 'MIDFIELDER'
    elif position in ['ST', 'CF', 'RF', 'LF', 'RW', 'LW']:
        return 'FORWARD'
    else:
        return 'OTHER'

df['BROAD_POSITION'] = df['POSITION'].apply(map_positions)

In [115]:
original_df = df.copy()

In [116]:
wage_pos = df[['WAGE', 'BROAD_POSITION']].copy()

In [117]:
label_encoder = LabelEncoder()
wage_pos['BROAD_POSITION'] = label_encoder.fit_transform(wage_pos['BROAD_POSITION'])

In [118]:
scaler = StandardScaler()
wage_pos_scaled = scaler.fit_transform(wage_pos)

In [119]:
n_clusters = 4
kmeans_players = KMeans(n_clusters=n_clusters, random_state=0)
labels = kmeans_players.fit_predict(wage_pos_scaled)





In [120]:
label_encoder.classes_

array(['DEFENDER', 'FORWARD', 'GOALKEEPER', 'MIDFIELDER'], dtype=object)

In [121]:
wage_pos['Cluster_Label'] = labels

In [122]:
fig_clusters = px.scatter(wage_pos, x='WAGE', color='Cluster_Label', symbol='BROAD_POSITION',
                          labels={'WAGE': 'Wage', 'Cluster_Label': 'Cluster Label'},
                          title='Player Clusters Based on Wage and Position (KMeans)')
fig_clusters.show()

In [123]:
position_mapping = dict(zip(original_df['BROAD_POSITION'], original_df['BROAD_POSITION']))

In [124]:
wage_pos['BROAD_POSITION'] = wage_pos['BROAD_POSITION'].map(position_mapping)

In [125]:
fig_original_labels = px.scatter(original_df, x='WAGE', color='BROAD_POSITION', symbol='BROAD_POSITION',
                                 labels={'WAGE': 'Wage', 'BROAD_POSITION': 'Original Position'},
                                 title='Player Clusters Based on Wage and Position (Original Labels)')
fig_original_labels.show()