In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

In [3]:
#Récupération des données pour l'entrainement
df_for_train = pd.read_csv('../Donnees/tennis_data5ans.csv')

#Récupération des données pour le test
df_for_test = pd.read_csv('../Donnees/atp_matches_2024.csv')

In [10]:
# Création du dataframe de train avec deux lignes pour chaque match
# Une ligne avec les statistiques du vainqueur et une avec celles du perdant

def create_match_rows_train(df):

    columns = ['player_rank', 'player_age', 'player_ace', 'player_df', 'player_svpt', 
               'player_1stIn', 'player_1stWon', 'player_2ndWon', 'player_SvGms', 
               'player_bpSaved', 'player_bpFaced', 'result']

    # DataFrame pour les statistiques du vainqueur
    winner_df = pd.DataFrame(columns = columns)
    winner_df['player_rank'] = df['winner_rank']
    winner_df['player_age'] = df['winner_age']
    winner_df['player_ace'] = df['w_ace']
    winner_df['player_df'] = df['w_df']
    winner_df['player_svpt'] = df['w_svpt']
    winner_df['player_1stIn'] = df['w_1stIn']
    winner_df['player_1stWon'] = df['w_1stWon']
    winner_df['player_2ndWon'] = df['w_2ndWon']
    winner_df['player_SvGms'] = df['w_SvGms']
    winner_df['player_bpSaved'] = df['w_bpSaved']
    winner_df['player_bpFaced'] = df['w_bpFaced']
    winner_df['result'] = 1  # Le joueur est un gagnant

    # DataFrame pour les statistiques du perdant
    loser_df = pd.DataFrame(columns = columns)
    loser_df['player_rank'] = df['loser_rank']
    loser_df['player_age'] = df['loser_age']
    loser_df['player_ace'] = df['l_ace']
    loser_df['player_df'] = df['l_df']
    loser_df['player_svpt'] = df['l_svpt']
    loser_df['player_1stIn'] = df['l_1stIn']
    loser_df['player_1stWon'] = df['l_1stWon']
    loser_df['player_2ndWon'] = df['l_2ndWon']
    loser_df['player_SvGms'] = df['l_SvGms']
    loser_df['player_bpSaved'] = df['l_bpSaved']
    loser_df['player_bpFaced'] = df['l_bpFaced']
    loser_df['result'] = 0  # Le joueur est un perdant

    # Concaténer les deux DataFrames
    final_df = pd.concat([winner_df, loser_df])
    final_df.reset_index(drop=True, inplace=True)
    return final_df

# Création du dataframe de test avec deux lignes pour chaque match
# Une ligne avec les statistiques du vainqueur et une avec celles du perdant mais la colonne result est vide

def create_match_rows_test(df):

    columns = ['player_rank', 'player_age', 'player_ace', 'player_df', 'player_svpt', 
               'player_1stIn', 'player_1stWon', 'player_2ndWon', 'player_SvGms', 
               'player_bpSaved', 'player_bpFaced', 'result']

    # DataFrame pour les statistiques du joueur 1 (vainqueur dans le dataset d'entraînement)
    player1_df = pd.DataFrame(columns = columns)
    player1_df['player_rank'] = df['winner_rank']
    player1_df['player_age'] = df['winner_age']
    player1_df['player_ace'] = df['w_ace']
    player1_df['player_df'] = df['w_df']
    player1_df['player_svpt'] = df['w_svpt']
    player1_df['player_1stIn'] = df['w_1stIn']
    player1_df['player_1stWon'] = df['w_1stWon']
    player1_df['player_2ndWon'] = df['w_2ndWon']
    player1_df['player_SvGms'] = df['w_SvGms']
    player1_df['player_bpSaved'] = df['w_bpSaved']
    player1_df['player_bpFaced'] = df['w_bpFaced']
    player1_df['result'] = None  # Pas de résultat pour les données de test

    # DataFrame pour les statistiques du joueur 2 (perdant dans le dataset d'entraînement)
    player2_df = pd.DataFrame(columns = columns)
    player2_df['player_rank'] = df['loser_rank']
    player2_df['player_age'] = df['loser_age']
    player2_df['player_ace'] = df['l_ace']
    player2_df['player_df'] = df['l_df']
    player2_df['player_svpt'] = df['l_svpt']
    player2_df['player_1stIn'] = df['l_1stIn']
    player2_df['player_1stWon'] = df['l_1stWon']
    player2_df['player_2ndWon'] = df['l_2ndWon']
    player2_df['player_SvGms'] = df['l_SvGms']
    player2_df['player_bpSaved'] = df['l_bpSaved']
    player2_df['player_bpFaced'] = df['l_bpFaced']
    player2_df['result'] = None  # Pas de résultat pour les données de test

    # Concaténer les deux DataFrames
    final_df = pd.concat([player1_df, player2_df])
    final_df.reset_index(drop=True, inplace=True)

    return final_df

In [11]:
df_for_train = create_match_rows_train(df_for_train)
print(df_for_train.info())
print(df_for_train.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57356 entries, 0 to 57355
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player_rank     56966 non-null  float64
 1   player_age      57330 non-null  float64
 2   player_ace      57356 non-null  float64
 3   player_df       57356 non-null  float64
 4   player_svpt     57356 non-null  float64
 5   player_1stIn    57356 non-null  float64
 6   player_1stWon   57356 non-null  float64
 7   player_2ndWon   57356 non-null  float64
 8   player_SvGms    57356 non-null  float64
 9   player_bpSaved  57356 non-null  float64
 10  player_bpFaced  57356 non-null  float64
 11  result          57356 non-null  int64  
dtypes: float64(11), int64(1)
memory usage: 5.3 MB
None
   player_rank  player_age  player_ace  player_df  player_svpt  player_1stIn  \
0         47.0        25.6         9.0        2.0         82.0          49.0   
1         54.0        21.2         5.0       

In [12]:
df_for_test = create_match_rows_test(df_for_test)
print(df_for_test.info())
print(df_for_test.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2834 entries, 0 to 2833
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player_rank     2799 non-null   float64
 1   player_age      2830 non-null   float64
 2   player_ace      2834 non-null   float64
 3   player_df       2834 non-null   float64
 4   player_svpt     2834 non-null   float64
 5   player_1stIn    2834 non-null   float64
 6   player_1stWon   2834 non-null   float64
 7   player_2ndWon   2834 non-null   float64
 8   player_SvGms    2834 non-null   float64
 9   player_bpSaved  2834 non-null   float64
 10  player_bpFaced  2834 non-null   float64
 11  result          0 non-null      object 
dtypes: float64(11), object(1)
memory usage: 265.8+ KB
None
   player_rank  player_age  player_ace  player_df  player_svpt  player_1stIn  \
0         14.0        32.6         8.0        2.0         74.0          52.0   
1          8.0        20.6         7.0     