In [10]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

test


In [11]:
data = pd.read_csv('../data/2023-24/cleaned_players.csv')

df = pd.DataFrame(data)

In [12]:
print(df.columns)

Index(['first_name', 'second_name', 'goals_scored', 'assists', 'total_points',
       'minutes', 'goals_conceded', 'creativity', 'influence', 'threat',
       'bonus', 'bps', 'ict_index', 'clean_sheets', 'red_cards',
       'yellow_cards', 'selected_by_percent', 'now_cost', 'element_type'],
      dtype='object')


In [13]:
# Combinez certaines caractéristiques en une seule chaîne de texte
# Combine all columns into a single 'Caracteristiques' column
df['Caracteristiques'] = (
    df['goals_scored'].astype(str) + ' ' +
    df['assists'].astype(str) + 'b ' +
    df['total_points'].astype(str) + 'p ' +
    df['minutes'].astype(str) + 'cj ' +
    df['goals_conceded'].astype(str) + 'gc ' +
    df['creativity'].astype(str) + 'cr ' +
    df['influence'].astype(str) + 'inf ' +
    df['threat'].astype(str) + 'th ' +
    df['bonus'].astype(str) + 'bns ' +
    df['bps'].astype(str) + 'bps ' +
    df['ict_index'].astype(str) + 'ict ' +
    df['clean_sheets'].astype(str) + 'cs ' +
    df['red_cards'].astype(str) + 'rc ' +
    df['yellow_cards'].astype(str) + 'yc ' +
    df['selected_by_percent'].astype(str) + 'sbp ' +
    df['now_cost'].astype(str) + 'nc ' +
    df['element_type'].astype(str) + 'et'
)

In [14]:
# Utilisez TF-IDF pour créer une matrice de caractéristiques
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Caracteristiques'])

In [15]:
# Calcul de la similarité cosinus entre les joueurs
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [16]:
# Display the cosine similarity matrix
print(cosine_sim)

[[1.         0.51363111 0.20633454 ... 0.5765664  0.71901553 0.04825981]
 [0.51363111 1.         0.10761826 ... 0.54656044 0.5351279  0.04574825]
 [0.20633454 0.10761826 1.         ... 0.1567325  0.18358668 0.14489915]
 ...
 [0.5765664  0.54656044 0.1567325  ... 1.         0.6006972  0.10967889]
 [0.71901553 0.5351279  0.18358668 ... 0.6006972  1.         0.05027961]
 [0.04825981 0.04574825 0.14489915 ... 0.10967889 0.05027961 1.        ]]


In [17]:
# Fonction pour obtenir des recommandations
def get_recommendations(player, cosine_similarities, df):
    name_parts = player.split()
     # Assuming you want to match both first_name and second_name
    first_name_match = df['first_name'] == name_parts[0]
    second_name_match = df['second_name'] == name_parts[1]

    # Retrieve the index of the row where both first and second names match
    player_index = df.index[(first_name_match) & (second_name_match)]
    print(player_index[0]) 
     
    similar_players = list(enumerate(cosine_similarities[player_index[0]]))
    similar_players = sorted(similar_players, key=lambda x: x[1], reverse=True)
    similar_players = similar_players[1:4]  # Exclude the player itself
    similar_player_indices = [x[0] for x in similar_players]
    print(similar_player_indices)
    return df.iloc[similar_player_indices]

In [18]:
# Exemple d'utilisation
user_choice = 'Folarin Balogun'
recommendations = get_recommendations(user_choice, cosine_sim, df)
#print(f"Recommandations pour {user_choice}: {', '.join(recommendations)}")
print(recommendations)

0
[423, 649, 714]
    first_name      second_name  goals_scored  assists  total_points  minutes  \
423      Louie           Watson             0        0             0        0   
649     Tanguy  Ndombélé Alvaro             0        0             0        0   
714      Bruno    Cavaco Jordão             0        0             0        0   

     goals_conceded  creativity  influence  threat  bonus  bps  ict_index  \
423               0         0.0        0.0     0.0      0    0        0.0   
649               0         0.0        0.0     0.0      0    0        0.0   
714               0         0.0        0.0     0.0      0    0        0.0   

     clean_sheets  red_cards  yellow_cards  selected_by_percent  now_cost  \
423             0          0             0                  0.2        44   
649             0          0             0                  0.2        44   
714             0          0             0                  0.2        44   

    element_type                       

In [19]:
def get_recommendations(player, cosine_similarities, df, num_recommendations=5):
    name_parts = player.split()
    
    # Assuming you want to match both first_name and second_name
    first_name_match = df['first_name'] == name_parts[0]
    second_name_match = df['second_name'] == name_parts[1]

    # Retrieve the index of the row where both first and second names match
    player_index = df.index[(first_name_match) & (second_name_match)][0]

    # Get the element_type of the target player
    target_position = df.loc[player_index, 'element_type']

    # Calculate cosine similarities for the target player
    similar_players = list(enumerate(cosine_similarities[player_index]))
    
    # Sort the players by similarity in descending order
    similar_players = sorted(similar_players, key=lambda x: x[1], reverse=True)
    
    # Exclude the target player itself
    similar_players = similar_players[1:]

    # Filter similar players based on the same position (element_type)
    similar_player_indices = [x[0] for x in similar_players if df.loc[x[0], 'element_type'] == target_position]
    
    # Take only the top num_recommendations recommendations
    similar_player_indices = similar_player_indices[:num_recommendations]
    
    # Return the DataFrame with recommended players
    return df.iloc[similar_player_indices]


In [20]:

player_name = "Folarin Balogun"
recommendations = get_recommendations(player_name, cosine_sim, df)
print(recommendations)


    first_name second_name  goals_scored  assists  total_points  minutes  \
581        Sam    Surridge             0        0             0        0   
667       Dane    Scarlett             0        0             0        0   
561   Emmanuel      Dennis             0        0             0        0   
583      Hwang       Ui-jo             0        0             0        0   
496      Shola   Shoretire             0        0             0        0   

     goals_conceded  creativity  influence  threat  bonus  bps  ict_index  \
581               0         0.0        0.0     0.0      0    0        0.0   
667               0         0.0        0.0     0.0      0    0        0.0   
561               0         0.0        0.0     0.0      0    0        0.0   
583               0         0.0        0.0     0.0      0    0        0.0   
496               0         0.0        0.0     0.0      0    0        0.0   

     clean_sheets  red_cards  yellow_cards  selected_by_percent  now_cost  \
581

In [22]:

# Example of usage
user_choice = 'Max	Aarons'
recommendations = get_recommendations(user_choice, cosine_sim, df, num_recommendations=5)

# Print the top 5 recommendations
print(f"Top 5 Recommendations for {user_choice}:")
print(recommendations)



Top 5 Recommendations for Max	Aarons:
    first_name      second_name  goals_scored  assists  total_points  minutes  \
94       Milos           Kerkez             0        0            13      853   
484     Victor         Lindelöf             0        1            20      673   
114       Rico            Henry             0        1            13      401   
449      Rúben  Gato Alves Dias             0        0            27      735   
107     Nathan          Collins             1        1            39      909   

     goals_conceded  creativity  influence  threat  bonus  bps  ict_index  \
94               24        64.9      118.4    20.0      0   87       20.3   
484               9        85.3       99.2    16.0      0  120       20.1   
114               5        67.9       72.8    17.0      0   74       15.7   
449               6        45.0      142.8    69.0      1  137       25.8   
107              12        36.8      242.6    93.0      3  205       37.4   

     clean_s