In [425]:
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [426]:
# Chargement et prétraitement des données
def load_and_preprocess_data(file_path):
    original_data = pd.read_csv(file_path, low_memory=False)
    data = original_data.copy()
    data.drop_duplicates(inplace=True)
    data.dropna(inplace=True)
    return data

In [451]:
# Ingénierie des caractéristiques
def feature_engineering(data):
    selected_features = ['name', 'position', 'team_x', 'value', 'goals_scored', 'assists', 'clean_sheets','total_points', 'yellow_cards', 'red_cards', 'total_points']
    data = data[selected_features].copy()
    data['total_cards'] = data['yellow_cards'] + data['red_cards']
    return data

In [452]:
# Division des données en ensembles d'entraînement et de test
def split_data(data):
    X = data.drop(columns=['goals_scored', 'assists', 'total_points'])
    y = data[['goals_scored', 'assists', 'total_points']]
    X_encoded = pd.get_dummies(X)
    X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)
    dtrain = xgb.DMatrix(data=X_train, label=y_train, enable_categorical=True)
    dtest = xgb.DMatrix(data=X_test, label=y_test, enable_categorical=True)
    return dtrain, dtest, y_test

In [453]:
# Entraînement du modèle 
def train_XGBoost(dtrain):
    params = {'objective': 'reg:squarederror'}
    model = xgb.train(params, dtrain)
    return model

In [454]:
# Prédiction des performances des joueurs pour l'ensemble de test
def predict_player_performance(model, player_features, budget):
    player_features_array = player_features.values.reshape(1, -1)
    dmatrix = xgb.DMatrix(player_features_array)
    predicted_performance = model.predict(dmatrix)
    total_cost = 0  # Adapter cette logique en fonction de votre nouvelle approche
    if total_cost > budget:
        return "Le joueur sélectionné dépasse le budget."
    else:
        return predicted_performance

In [455]:
# Recommander les meilleurs joueurs disponibles en respectant les contraintes
def recommend_players(model, data, budget, team_constraints, captain=True):
    available_players = data[(data['value'] <= budget) & (data.groupby('team_x')['name'].transform('count') <= team_constraints)]
    predicted_performances = model.predict(available_players.drop(columns=['goals_scored', 'assists']))
    available_players['predicted_goals'] = predicted_performances[:, 0]
    available_players['predicted_assists'] = predicted_performances[:, 1]
    available_players['total_predicted_points'] = 2 * available_players['predicted_goals'] + available_players['predicted_assists']
    sorted_players = available_players.sort_values(by='total_predicted_points', ascending=False)
    captain_player = sorted_players.iloc[0]
    vice_captain_player = sorted_players.iloc[1]
    return sorted_players, captain_player, vice_captain_player

In [456]:
# Fonction pour effectuer un transfert tout en respectant les contraintes
def make_transfer(current_team, new_player, budget, team_constraints):
    if new_player['value'] <= budget and current_team.groupby('team_x')['name'].transform('count').max() < team_constraints:
        updated_team = current_team.append(new_player)
        updated_budget = budget - new_player['value']
        return updated_team, updated_budget
    else:
        return current_team, budget

In [457]:
# Fonction pour effectuer des transferts hebdomadaires
def weekly_transfers(current_team, budget, team_constraints, transfer_player, new_player):
    updated_team, updated_budget = make_transfer(current_team, new_player, budget, team_constraints)
    return updated_team, updated_budget

In [458]:
# Fonction pour évaluer l'algorithme en utilisant des métriques
def evaluate_algorithm(predictions, actual):
    correct_predictions = predictions == actual
    accuracy = correct_predictions.mean()
    return accuracy

In [459]:
# Chemin vers le fichier CSV
file_path = "/Users/melusinecaillard/RSPL/data/cleaned_merged_seasons.csv"

# Chargement et prétraitement des données
data = load_and_preprocess_data(file_path)
data['name'] = data['name'].astype('category')
data['position'] = data['position'].astype('category')
data['team_x'] = data['team_x'].astype('category')
data = feature_engineering(data)

In [460]:
print(data.head())

                     name position         team_x  value  goals_scored  \
19852      Aaron Connolly      FWD       Brighton     55             0   
19853     Aaron Cresswell      DEF       West Ham     50             0   
19854          Aaron Mooy      MID       Brighton     50             0   
19855      Aaron Ramsdale       GK  Sheffield Utd     50             0   
19856  Abdoulaye Doucouré      MID        Everton     55             0   

       assists  clean_sheets  total_points  yellow_cards  red_cards  \
19852        0             0             1             0          0   
19853        0             0             1             0          0   
19854        0             0             0             0          0   
19855        0             0             1             0          0   
19856        0             1             3             0          0   

       total_points  total_cards  
19852             1            0  
19853             1            0  
19854             0    

In [461]:
print(data.dtypes)

name            category
position        category
team_x          category
value              int64
goals_scored       int64
assists            int64
clean_sheets       int64
total_points       int64
yellow_cards       int64
red_cards          int64
total_points       int64
total_cards        int64
dtype: object


In [462]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 76317 entries, 19852 to 96168
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   name          76317 non-null  category
 1   position      76317 non-null  category
 2   team_x        76317 non-null  category
 3   value         76317 non-null  int64   
 4   goals_scored  76317 non-null  int64   
 5   assists       76317 non-null  int64   
 6   clean_sheets  76317 non-null  int64   
 7   total_points  76317 non-null  int64   
 8   yellow_cards  76317 non-null  int64   
 9   red_cards     76317 non-null  int64   
 10  total_points  76317 non-null  int64   
 11  total_cards   76317 non-null  int64   
dtypes: category(3), int64(9)
memory usage: 6.2 MB
None


In [463]:
# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_test = split_data(data)

In [464]:
# Entraînement du modèle de régression XGBoost
model = train_XGBoost(X_train)

In [465]:
# Prédiction des performances des joueurs pour l'ensemble de test
y_pred = model.predict(X_test)

In [466]:
# Évaluation de la performance du modèle
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.6439157448406119


In [467]:
print(player_features_example)

<xgboost.core.DMatrix object at 0x12d393810>


In [468]:
# Exemple de joueur à prédire
player_features_example = dtest

# Prédiction des performances du joueur
predicted_performance_example = model.predict(player_features_example)

print("Prédiction de performances du joueur:", predicted_performance_example)


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/melusinecaillard/RSPL/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3548, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/75/4vn9ylqs28df9mqtqpgtm9p40000gn/T/ipykernel_8266/2822279563.py", line 5, in <module>
    predicted_performance_example = model.predict(player_features_example)
                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/melusinecaillard/RSPL/venv/lib/python3.11/site-packages/xgboost/core.py", line 2271, in predict
    fn = data.feature_names
  File "/Users/melusinecaillard/RSPL/venv/lib/python3.11/site-packages/xgboost/core.py", line 2969, in _validate_features
ValueError: feature_names mismatch: ['value', 'clean_sheets', 'yellow_cards', 'red_cards', 'total_cards', 'name_Aaron Connolly', 'name_Aaron Cresswell', 'name_Aaron Hickey', 'name_Aaron Lennon', 'name_Aaron Mooy', 'name_Aaron Ramsdale', 'name_Aaron Ramsey', 'name

In [469]:
# Index du joueur dont vous voulez obtenir les performances prédites
index_joueur = 0

# Performances prédites du joueur spécifique
performances_joueur = predicted_performance_example[index_joueur]

print("Performances prédites du joueur:", performances_joueur)


Performances prédites du joueur: [0.0313129  0.03253323]


In [470]:
# Convertir les données d'origine en DataFrame pandas
data_df = load_and_preprocess_data(file_path)

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = split_data(data_df)

# Récupérer le nom des joueurs dans les données de test
player_names = X_test['name']

# Recherche de l'index du joueur dont vous voulez obtenir les performances prédites
nom_joueur = "Olivier Giroud"
index_joueur = player_names[player_names == nom_joueur].index

if len(index_joueur) == 0:
    print("Le joueur spécifié n'a pas été trouvé dans les données de test.")
else:
    index_joueur = index_joueur[0]

    # Performances prédites du joueur spécifié
    performances_joueur = predicted_performance_example[index_joueur]

    print("Nom du joueur:", nom_joueur)
    print("Performances prédites du joueur:", performances_joueur)


ValueError: not enough values to unpack (expected 4, got 3)

In [471]:
# Recommander les joueurs en respectant les contraintes
budget = 100
team_constraints = 3
recommended_players, captain_player, vice_captain_player = recommend_players(model, data, budget, team_constraints)
print("Recommandations de joueurs:")
print(recommended_players.head())
print("\nCapitaine recommandé:")
print(captain_player)
print("\nVice-capitaine recommandé:")
print(vice_captain_player)

  available_players = data[(data['value'] <= budget) & (data.groupby('team_x')['name'].transform('count') <= team_constraints)]


TypeError: ('Expecting data to be a DMatrix object, got: ', <class 'pandas.core.frame.DataFrame'>)

In [472]:
# Exemple d'équipe actuelle
current_team = pd.DataFrame([["Player1", "Team1", 10]], columns=['name', 'team', 'value'])
transfer_player = {"player_name": "Player1", "team": "Team1", "value": 10}
new_player = {"player_name": "Player2", "team": "Team2", "value": 8}
updated_team, updated_budget = weekly_transfers(current_team, budget, team_constraints, transfer_player, new_player)
print("Équipe mise à jour après les transferts:")
print(updated_team)
print("\nBudget restant après les transferts:", updated_budget)

KeyError: 'team_x'

In [473]:
# Évaluer l'algorithme
accuracy = evaluate_algorithm(y_pred, y_test)
print("Précision de l'algorithme:", accuracy)

Précision de l'algorithme: goals_scored    0.0
assists         0.0
total_points    0.0
total_points    0.0
dtype: float64
