# Système de recommandation Pierre

## L'objectif est de créer un système de recommandation de joueurs de Fantasy Premier League (FPL) à partir de leurs performances passées qui sont dans le dossier data du git.

In [2]:
# Import des librairies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import re
import datetime
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Import des données
path = r'../data/'

all_files = glob.glob(os.path.join(path, "*.csv"))

df_from_each_file = (pd.read_csv(f) for f in all_files)
df = pd.concat(df_from_each_file, ignore_index=True)
print(df.shape)
df.head()

(97052, 96)


Unnamed: 0,season_x,name,position,team_x,assists,bonus,bps,clean_sheets,creativity,element,...,ep_this,clean_sheets_per_90,expected_goals,expected_goal_involvements,penalties_order,now_cost_rank_type,value_season,creativity_rank_type,cost_change_start_fall,form_rank_type
0,2016-17,Aaron Cresswell,DEF,,0.0,0.0,0.0,0.0,0.0,454.0,...,,,,,,,,,,
1,2016-17,Aaron Lennon,MID,,0.0,0.0,6.0,0.0,0.3,142.0,...,,,,,,,,,,
2,2016-17,Aaron Ramsey,MID,,0.0,0.0,5.0,0.0,4.9,16.0,...,,,,,,,,,,
3,2016-17,Abdoulaye Doucouré,MID,,0.0,0.0,0.0,0.0,0.0,482.0,...,,,,,,,,,,
4,2016-17,Adam Forshaw,MID,,0.0,0.0,3.0,0.0,1.3,286.0,...,,,,,,,,,,


In [5]:
# On ne garde que les joueurs qui ont joué au moins 10 matchs (900 minutes) pour avoir des stats significatives de joueurs qui jouent
df = df[df['minutes'] >= 900]
print(df.shape)
df.head()

(100, 96)


Unnamed: 0,season_x,name,position,team_x,assists,bonus,bps,clean_sheets,creativity,element,...,ep_this,clean_sheets_per_90,expected_goals,expected_goal_involvements,penalties_order,now_cost_rank_type,value_season,creativity_rank_type,cost_change_start_fall,form_rank_type
96326,,Bukayo Saka,MID,,6.0,8.0,264.0,4.0,424.0,,...,4.1,0.38,4.15,7.48,1.0,5.0,8.3,3.0,-1.0,23.0
96327,,William Saliba,DEF,,1.0,5.0,250.0,5.0,53.8,,...,5.8,0.42,0.7,1.16,,9.0,10.2,58.0,-4.0,12.0
96332,,Benjamin White,DEF,,1.0,7.0,236.0,5.0,176.2,,...,3.0,0.48,0.56,1.38,,6.0,9.1,11.0,-3.0,51.0
96336,,Declan Rice,MID,,1.0,5.0,217.0,5.0,151.3,,...,5.8,0.44,0.9,1.64,,77.0,8.5,50.0,1.0,18.0
96346,,Matty Cash,DEF,,1.0,4.0,202.0,3.0,92.4,,...,2.2,0.26,3.43,4.74,,15.0,8.7,32.0,-7.0,39.0


In [8]:
# On ne garde que les colonnes qui nous intéressent
df = df[['name', 'team', 'position', 'total_points', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat', 'ict_index']]
df.head()

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index
96326,Bukayo Saka,Arsenal,MID,71.0,945.0,4.0,6.0,4.0,10.0,0.0,0.0,1.0,0.0,0.0,8.0,264.0,338.2,424.0,351.0,111.3
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,0.0,0.0,0.0,0.0,5.0,250.0,249.8,53.8,44.0,34.9
96332,Benjamin White,Arsenal,DEF,53.0,947.0,1.0,1.0,5.0,9.0,0.0,0.0,2.0,0.0,0.0,7.0,236.0,229.0,176.2,77.0,48.2
96336,Declan Rice,Arsenal,MID,46.0,1023.0,2.0,1.0,5.0,9.0,0.0,0.0,0.0,0.0,0.0,5.0,217.0,263.8,151.3,99.0,51.5
96346,Matty Cash,Aston Villa,DEF,45.0,1023.0,2.0,1.0,3.0,15.0,0.0,0.0,3.0,0.0,0.0,4.0,202.0,245.4,92.4,264.0,60.3


## Rules of FPL

La Fantasy Premier League (FPL) est un jeu en ligne populaire qui permet aux fans de football de créer leur propre équipe de joueurs de la Premier League anglaise et de gagner des points en fonction des performances réelles de ces joueurs au cours de la saison.

1. *Composition de l'équipe :*
   - Vous disposez d'un budget fixe pour sélectionner 15 joueurs (2 gardiens, 5 défenseurs, 5 milieux de terrain, et 3 attaquants).
   - Vous pouvez avoir un maximum de 3 joueurs de la même équipe.

2. *Budget :*
   - Vous devez respecter un budget de 100 M£ pour sélectionner votre équipe.

3. *Capitaine et vice-capitaine :*
   - Vous choisissez un capitaine qui gagne le double de points pour la journée sélectionnée.
   - Si votre capitaine ne joue pas, le vice-capitaine reçoit le double de points à sa place.

4. *Transferts :*
   - Vous avez un transfert gratuit chaque semaine.

5. *Points :*
   - Les joueurs gagnent des points pour des actions telles que marquer des buts, effectuer des passes décisives, garder une feuille blanche (pour les gardiens et défenseurs), etc.
   - Les joueurs peuvent perdre des points pour des actions négatives comme les cartons jaunes, les cartons rouges, etc.

6. *Blessures et suspensions :*
   - Les joueurs blessés ou suspendus peuvent être remplacés sans coûter de points de transfert, à condition que le remplaçant soit dans la même catégorie de joueur.


In [9]:
# On crée une colonne avec le nombre de points par match
df['points_per_match'] = df['total_points'] / (df['minutes'] / 90)
df.head()

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match
96326,Bukayo Saka,Arsenal,MID,71.0,945.0,4.0,6.0,4.0,10.0,0.0,...,1.0,0.0,0.0,8.0,264.0,338.2,424.0,351.0,111.3,6.761905
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,...,0.0,0.0,0.0,5.0,250.0,249.8,53.8,44.0,34.9,4.583333
96332,Benjamin White,Arsenal,DEF,53.0,947.0,1.0,1.0,5.0,9.0,0.0,...,2.0,0.0,0.0,7.0,236.0,229.0,176.2,77.0,48.2,5.036959
96336,Declan Rice,Arsenal,MID,46.0,1023.0,2.0,1.0,5.0,9.0,0.0,...,0.0,0.0,0.0,5.0,217.0,263.8,151.3,99.0,51.5,4.046921
96346,Matty Cash,Aston Villa,DEF,45.0,1023.0,2.0,1.0,3.0,15.0,0.0,...,3.0,0.0,0.0,4.0,202.0,245.4,92.4,264.0,60.3,3.958944


In [11]:
# Le but est de créer une équipe complète avec un budget de 100M£. L'équipe se compose de 2 gardiens, 5 défenseurs, 5 milieux de terrain et 3 attaquants (colonne position).
# On a un budget de 100M£ pour 15 joueurs. On va donc créer une colonne avec le prix par joueur.
df['price_per_player'] = 100 / df['total_points'] * df['points_per_match']
df.head()

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player
96326,Bukayo Saka,Arsenal,MID,71.0,945.0,4.0,6.0,4.0,10.0,0.0,...,0.0,0.0,8.0,264.0,338.2,424.0,351.0,111.3,6.761905,9.52381
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,...,0.0,0.0,5.0,250.0,249.8,53.8,44.0,34.9,4.583333,8.333333
96332,Benjamin White,Arsenal,DEF,53.0,947.0,1.0,1.0,5.0,9.0,0.0,...,0.0,0.0,7.0,236.0,229.0,176.2,77.0,48.2,5.036959,9.503696
96336,Declan Rice,Arsenal,MID,46.0,1023.0,2.0,1.0,5.0,9.0,0.0,...,0.0,0.0,5.0,217.0,263.8,151.3,99.0,51.5,4.046921,8.797654
96346,Matty Cash,Aston Villa,DEF,45.0,1023.0,2.0,1.0,3.0,15.0,0.0,...,0.0,0.0,4.0,202.0,245.4,92.4,264.0,60.3,3.958944,8.797654


In [12]:
# On crée une colonne avec le nombre de points par match
df['points_per_match'] = df['total_points'] / (df['minutes'] / 90)
df.head()

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player
96326,Bukayo Saka,Arsenal,MID,71.0,945.0,4.0,6.0,4.0,10.0,0.0,...,0.0,0.0,8.0,264.0,338.2,424.0,351.0,111.3,6.761905,9.52381
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,...,0.0,0.0,5.0,250.0,249.8,53.8,44.0,34.9,4.583333,8.333333
96332,Benjamin White,Arsenal,DEF,53.0,947.0,1.0,1.0,5.0,9.0,0.0,...,0.0,0.0,7.0,236.0,229.0,176.2,77.0,48.2,5.036959,9.503696
96336,Declan Rice,Arsenal,MID,46.0,1023.0,2.0,1.0,5.0,9.0,0.0,...,0.0,0.0,5.0,217.0,263.8,151.3,99.0,51.5,4.046921,8.797654
96346,Matty Cash,Aston Villa,DEF,45.0,1023.0,2.0,1.0,3.0,15.0,0.0,...,0.0,0.0,4.0,202.0,245.4,92.4,264.0,60.3,3.958944,8.797654


In [15]:
# On commence par créer une équipe avec un gardien, un défenseur, un milieu de terrain et un attaquant
# On va chercher le gardien qui a le meilleur rapport qualité/prix
df_gk = df[df['position'] == 'GKP']
df_gk = df_gk.sort_values(by=['price_per_player'], ascending=True)
df_gk.head()

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player
97040,José Malheiro de Sá,Wolves,GKP,36.0,1080.0,0.0,0.0,1.0,20.0,0.0,...,0.0,42.0,4.0,205.0,314.8,0.0,0.0,31.5,3.0,8.333333
96981,Guglielmo Vicario,Spurs,GKP,47.0,1080.0,0.0,0.0,4.0,15.0,0.0,...,0.0,39.0,5.0,251.0,332.4,0.0,0.0,33.2,3.916667,8.333333
96929,Wes Foderingham,Sheffield Utd,GKP,26.0,1080.0,0.0,0.0,0.0,31.0,0.0,...,0.0,57.0,4.0,219.0,400.8,0.0,0.0,40.2,2.166667,8.333333
96847,Nick Pope,Newcastle,GKP,47.0,1080.0,0.0,0.0,5.0,13.0,0.0,...,0.0,36.0,1.0,236.0,269.4,0.0,4.0,27.3,3.916667,8.333333
96816,André Onana,Man Utd,GKP,43.0,1080.0,0.0,0.0,4.0,16.0,0.0,...,0.0,45.0,2.0,233.0,342.4,0.2,0.0,34.3,3.583333,8.333333


In [16]:
# On prend le gardien avec le meilleur rapport qualité/prix
gk = df_gk.iloc[0]
gk

name                José Malheiro de Sá
team                             Wolves
position                            GKP
total_points                       36.0
minutes                          1080.0
goals_scored                        0.0
assists                             0.0
clean_sheets                        1.0
goals_conceded                     20.0
penalties_saved                     0.0
penalties_missed                    0.0
yellow_cards                        1.0
red_cards                           0.0
saves                              42.0
bonus                               4.0
bps                               205.0
influence                         314.8
creativity                          0.0
threat                              0.0
ict_index                          31.5
points_per_match                    3.0
price_per_player               8.333333
Name: 97040, dtype: object

In [19]:
# On va chercher le défenseur qui a le meilleur rapport qualité/prix
df_def = df[df['position'] == 'DEF']
df_def = df_def.sort_values(by=['price_per_player'], ascending=True)
defender = df_def.iloc[0]
defender

name                William Saliba
team                       Arsenal
position                       DEF
total_points                  55.0
minutes                     1080.0
goals_scored                   1.0
assists                        1.0
clean_sheets                   5.0
goals_conceded                10.0
penalties_saved                0.0
penalties_missed               0.0
yellow_cards                   0.0
red_cards                      0.0
saves                          0.0
bonus                          5.0
bps                          250.0
influence                    249.8
creativity                    53.8
threat                        44.0
ict_index                     34.9
points_per_match          4.583333
price_per_player          8.333333
Name: 96327, dtype: object

In [20]:
df_mid = df[df['position'] == 'MID']
df_mid = df_mid.sort_values(by=['price_per_player'], ascending=True)
midfielder = df_mid.iloc[0]
midfielder

name                Bruno Borges Fernandes
team                               Man Utd
position                               MID
total_points                          55.0
minutes                             1080.0
goals_scored                           3.0
assists                                2.0
clean_sheets                           4.0
goals_conceded                        16.0
penalties_saved                        0.0
penalties_missed                       0.0
yellow_cards                           4.0
red_cards                              0.0
saves                                  0.0
bonus                                 10.0
bps                                  218.0
influence                            343.8
creativity                           483.8
threat                               257.0
ict_index                            108.4
points_per_match                  4.583333
price_per_player                  8.333333
Name: 96789, dtype: object

In [21]:
df_fwd = df[df['position'] == 'FWD']
df_fwd = df_fwd.sort_values(by=['price_per_player'], ascending=True)
forward = df_fwd.iloc[0]
forward

name                Dominic Solanke
team                    Bournemouth
position                        FWD
total_points                   56.0
minutes                      1068.0
goals_scored                    6.0
assists                         1.0
clean_sheets                    2.0
goals_conceded                 25.0
penalties_saved                 0.0
penalties_missed                0.0
yellow_cards                    1.0
red_cards                       0.0
saves                           0.0
bonus                           6.0
bps                           169.0
influence                     278.2
creativity                    154.7
threat                        424.0
ict_index                      85.9
points_per_match           4.719101
price_per_player           8.426966
Name: 96401, dtype: object

In [22]:
# On crée une équipe avec un gardien, un défenseur, un milieu de terrain et un attaquant
team = pd.DataFrame([gk, defender, midfielder, forward])
team

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player
97040,José Malheiro de Sá,Wolves,GKP,36.0,1080.0,0.0,0.0,1.0,20.0,0.0,...,0.0,42.0,4.0,205.0,314.8,0.0,0.0,31.5,3.0,8.333333
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,...,0.0,0.0,5.0,250.0,249.8,53.8,44.0,34.9,4.583333,8.333333
96789,Bruno Borges Fernandes,Man Utd,MID,55.0,1080.0,3.0,2.0,4.0,16.0,0.0,...,0.0,0.0,10.0,218.0,343.8,483.8,257.0,108.4,4.583333,8.333333
96401,Dominic Solanke,Bournemouth,FWD,56.0,1068.0,6.0,1.0,2.0,25.0,0.0,...,0.0,0.0,6.0,169.0,278.2,154.7,424.0,85.9,4.719101,8.426966


In [23]:
# On calcule le prix de l'équipe
team['price'] = team['total_points'] / team['points_per_match'] * 100
team

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player,price
97040,José Malheiro de Sá,Wolves,GKP,36.0,1080.0,0.0,0.0,1.0,20.0,0.0,...,42.0,4.0,205.0,314.8,0.0,0.0,31.5,3.0,8.333333,1200.0
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,...,0.0,5.0,250.0,249.8,53.8,44.0,34.9,4.583333,8.333333,1200.0
96789,Bruno Borges Fernandes,Man Utd,MID,55.0,1080.0,3.0,2.0,4.0,16.0,0.0,...,0.0,10.0,218.0,343.8,483.8,257.0,108.4,4.583333,8.333333,1200.0
96401,Dominic Solanke,Bournemouth,FWD,56.0,1068.0,6.0,1.0,2.0,25.0,0.0,...,0.0,6.0,169.0,278.2,154.7,424.0,85.9,4.719101,8.426966,1186.666667


In [24]:
# On calcule le nombre de points de l'équipe
team['points'] = team['total_points']
team

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player,price,points
97040,José Malheiro de Sá,Wolves,GKP,36.0,1080.0,0.0,0.0,1.0,20.0,0.0,...,4.0,205.0,314.8,0.0,0.0,31.5,3.0,8.333333,1200.0,36.0
96327,William Saliba,Arsenal,DEF,55.0,1080.0,1.0,1.0,5.0,10.0,0.0,...,5.0,250.0,249.8,53.8,44.0,34.9,4.583333,8.333333,1200.0,55.0
96789,Bruno Borges Fernandes,Man Utd,MID,55.0,1080.0,3.0,2.0,4.0,16.0,0.0,...,10.0,218.0,343.8,483.8,257.0,108.4,4.583333,8.333333,1200.0,55.0
96401,Dominic Solanke,Bournemouth,FWD,56.0,1068.0,6.0,1.0,2.0,25.0,0.0,...,6.0,169.0,278.2,154.7,424.0,85.9,4.719101,8.426966,1186.666667,56.0


In [25]:
#On fait désormais une boucle pour compléter l'équipe

# On crée une liste avec les positions
positions = ['GKP', 'DEF', 'DEF', 'DEF', 'DEF', 'DEF', 'MID', 'MID', 'MID', 'MID', 'MID', 'FWD', 'FWD', 'FWD']

# On crée une liste avec les joueurs déjà sélectionnés
selected_players = [gk['name'], defender['name'], midfielder['name'], forward['name']]
selected_players


['José Malheiro de Sá',
 'William Saliba',
 'Bruno Borges Fernandes',
 'Dominic Solanke']

In [33]:
# On complète l'équipe
# On crée une fonction pour trouver le joueur avec le meilleur rapport qualité/prix
def find_best_player(df, position):
    df = df[df['position'] == position]
    df = df.sort_values(by=['price_per_player'], ascending=True)
    for player in df['name']:
        if player not in selected_players:
            selected_players.append(player)
            return df[df['name'] == player]

find_best_player(df, 'DEF')
find_best_player(df, 'MID')
find_best_player(df, 'MID')
find_best_player(df, 'MID')
find_best_player(df, 'MID')
find_best_player(df, 'FWD')
find_best_player(df, 'FWD')
find_best_player(df, 'FWD')

Unnamed: 0,name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,penalties_saved,...,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,points_per_match,price_per_player
96726,Carlton Morris,Luton,FWD,45.0,1010.0,3.0,1.0,1.0,21.0,0.0,...,0.0,0.0,7.0,119.0,153.0,108.3,306.0,56.5,4.009901,8.910891


In [34]:
#On rajoute les joueurs à l'équipe
selected_players.clear()
selected_players = [find_best_player(df, 'GKP'),find_best_player(df, 'GKP'),find_best_player(df,'DEF'),find_best_player(df,'DEF'),find_best_player(df,'DEF'),find_best_player(df,'DEF'),find_best_player(df,'DEF'),find_best_player(df,'MID'),find_best_player(df,'MID'),find_best_player(df,'MID'),find_best_player(df,'MID'),find_best_player(df,'MID'),find_best_player(df,'FWD'),find_best_player(df,'FWD'),find_best_player(df,'FWD')]
selected_players

[                      name    team position  total_points  minutes  \
 97040  José Malheiro de Sá  Wolves      GKP          36.0   1080.0   
 
        goals_scored  assists  clean_sheets  goals_conceded  penalties_saved  \
 97040           0.0      0.0           1.0            20.0              0.0   
 
        ...  red_cards  saves  bonus    bps  influence  creativity  threat  \
 97040  ...        0.0   42.0    4.0  205.0      314.8         0.0     0.0   
 
        ict_index  points_per_match  price_per_player  
 97040       31.5               3.0          8.333333  
 
 [1 rows x 22 columns],
                     name   team position  total_points  minutes  goals_scored  \
 96981  Guglielmo Vicario  Spurs      GKP          47.0   1080.0           0.0   
 
        assists  clean_sheets  goals_conceded  penalties_saved  ...  red_cards  \
 96981      0.0           4.0            15.0              0.0  ...        0.0   
 
        saves  bonus    bps  influence  creativity  threat  ict_in