In [62]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
file_path = r'./final_output.csv'
data = pd.read_csv(file_path)

In [63]:
numeric_features = data.select_dtypes(include='number')
numeric_features.fillna(numeric_features.mean(), inplace=True)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(numeric_features)
num_clusters = 5
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
data['Cluster'] = kmeans.fit_predict(scaled_features)
cluster_teams = data.groupby('Cluster')['team'].apply(list).to_dict()
teams_in_same_cluster = {
    team: [other_team for other_team in cluster_teams[cluster] if other_team != team]
    for cluster, teams in cluster_teams.items()
    for team in teams
}
teams_in_same_cluster
data['Cluster'] = data['Cluster'].astype(str)

In [64]:
teams_in_same_cluster_df = pd.DataFrame(
    [(team, cluster) for team, clusters in teams_in_same_cluster.items() for cluster in clusters],
    columns=["team", "cluster_team"]
)
merged_data = data.merge(teams_in_same_cluster_df, on="team", how="left")
merged_data["cluster_members"] = merged_data["team"].map(teams_in_same_cluster)
merged_data.to_csv("last_output.csv", index=False)

In [65]:
merged_data


Unnamed: 0,team,normal_shots,normal_goals,normal_xg,standard_shots,standard_goals,standard_xg,slow_shots,slow_goals,slow_xg,...,shots pg,discipline,possession,pass%,aerialswon,rating,Market_Value,Cluster,cluster_team,cluster_members
0,arsenal,138.0,9.0,12.423194,48.0,8.0,6.212941,27.0,0.0,1.420184,...,13.8,443.0,55.3,86.4,13.7,6.79,€1.15bn,2,chelsea,"[chelsea, liverpool, newcastle_united]"
1,arsenal,138.0,9.0,12.423194,48.0,8.0,6.212941,27.0,0.0,1.420184,...,13.8,443.0,55.3,86.4,13.7,6.79,€1.15bn,2,liverpool,"[chelsea, liverpool, newcastle_united]"
2,arsenal,138.0,9.0,12.423194,48.0,8.0,6.212941,27.0,0.0,1.420184,...,13.8,443.0,55.3,86.4,13.7,6.79,€1.15bn,2,newcastle_united,"[chelsea, liverpool, newcastle_united]"
3,aston_villa,147.0,21.0,18.685291,56.0,6.0,6.755267,24.0,0.0,2.148478,...,12.8,432.0,50.5,85.7,9.5,6.57,€614.95m,0,bournemouth,"[bournemouth, brighton, crystal_palace, everto..."
4,aston_villa,147.0,21.0,18.685291,56.0,6.0,6.755267,24.0,0.0,2.148478,...,12.8,432.0,50.5,85.7,9.5,6.57,€614.95m,0,brighton,"[bournemouth, brighton, crystal_palace, everto..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,west_ham,214.0,22.0,27.723723,75.0,9.0,8.230252,38.0,5.0,5.528700,...,14.1,482.0,46.5,81.5,15.5,6.55,€439.30m,3,wolverhampton_wanderers,"[brentford, ipswich, leicester, wolverhampton_..."
87,wolverhampton_wanderers,164.0,23.0,21.753979,92.0,20.0,13.265654,23.0,1.0,1.282015,...,10.9,471.0,47.2,81.2,11.8,6.53,€388.30m,3,brentford,"[brentford, ipswich, leicester, west_ham]"
88,wolverhampton_wanderers,164.0,23.0,21.753979,92.0,20.0,13.265654,23.0,1.0,1.282015,...,10.9,471.0,47.2,81.2,11.8,6.53,€388.30m,3,ipswich,"[brentford, ipswich, leicester, west_ham]"
89,wolverhampton_wanderers,164.0,23.0,21.753979,92.0,20.0,13.265654,23.0,1.0,1.282015,...,10.9,471.0,47.2,81.2,11.8,6.53,€388.30m,3,leicester,"[brentford, ipswich, leicester, west_ham]"
