In [1]:
import pandas as pd
import os
from sklearn.ensemble import RandomForestRegressor

# Dossier contenant les données
data_dir = 'C:/Users/bouft/OneDrive/Bureau/FOOTBALL-main/data/processed'

# Listes des ligues et des fichiers
leagues = ['bundesliga', 'laliga', 'ligue1', 'premierleague', 'seriea']
all_data = []

# Chargement des données
for league in leagues:
    league_dir = os.path.join(data_dir, league)
    for filename in os.listdir(league_dir):
        filepath = os.path.join(league_dir, filename)
        if league == 'bundesliga':
            df = pd.read_csv(filepath, names=['Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalDifference', 'Points'])
            df['GoalsAgainst'] = 0  # Ajouter une colonne fictive pour harmonisation
        elif league == 'laliga':
            df = pd.read_csv(filepath, names=['Position', 'Team', 'Points', 'Played', 'GoalDifference', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst'])
        elif league == 'ligue1':
            df = pd.read_csv(filepath, names=['Position', 'Team', 'Points', 'Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst', 'GoalDifference', 'Sign'])
        elif league == 'premierleague':
            df = pd.read_csv(filepath, names=['Position', 'Team', 'Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst', 'GoalDifference', 'Points'])
            df['Position'] = df['Position'].str.split().str[0]
        elif league == 'seriea':
            df = pd.read_csv(filepath, names=['Position', 'Team', 'Points', 'Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst', 'GoalDifference'])
        
        year = filename.split('-')[-1].split('_')[0]
        
        df['Year'] = year
        df['League'] = league
        all_data.append(df)

data = pd.concat(all_data, ignore_index=True)

expected_columns = ['Position', 'Team', 'Points', 'Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst', 'GoalDifference', 'League', 'Year']
data = data.reindex(columns=expected_columns)

data.fillna(0, inplace=True)
data = data[data['Points'] != 'PointsP']
data = data[data['Played'] != 'Played']

numeric_cols = ['Points', 'Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst', 'GoalDifference']
data[numeric_cols] = data[numeric_cols].apply(pd.to_numeric, errors='coerce')
data[numeric_cols] = data[numeric_cols].fillna(0)

# Entraînement du modèle de régression et prédictions
train_data = data[data['Year'] != '2023']
test_data = data[data['Year'] == '2023']

features = ['Played', 'Wins', 'Draws', 'Losses', 'GoalsFor', 'GoalsAgainst', 'GoalDifference']
X_train = train_data[features]
y_train = train_data['Points']
X_test = test_data[features]

random_forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest_model.fit(X_train, y_train)

predicted_points = random_forest_model.predict(X_test)
test_data['Predicted_Points'] = predicted_points

# Ajouter les prédictions au DataFrame principal
data = data.merge(test_data[['Team', 'Year', 'Predicted_Points']], on=['Team', 'Year'], how='left')

# Enregistrer les données préparées dans un fichier CSV
data.to_csv('processed_football_data.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Predicted_Points'] = predicted_points


In [2]:
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

# Charger les données
data = pd.read_csv('processed_football_data.csv')

# Initialiser l'application Dash
app = dash.Dash(__name__)

# Disposition de l'application
app.layout = html.Div([
    html.H1("Tableau de bord des ligues de football"),
    
    dcc.Dropdown(
        id='league-dropdown',
        options=[{'label': league, 'value': league} for league in data['League'].unique()],
        value='premierleague',
        clearable=False
    ),
    
    dcc.Dropdown(
        id='year-dropdown',
        options=[{'label': year, 'value': year} for year in data['Year'].unique()],
        value='2023',
        clearable=False
    ),
    
    dcc.Graph(id='league-comparison-graph'),
    
    dcc.Graph(id='team-performance-graph'),
    
    dcc.Graph(id='predicted-vs-actual-graph'),
    
    dcc.Graph(id='position-distribution-graph')
])

@app.callback(
    Output('league-comparison-graph', 'figure'),
    Input('league-dropdown', 'value')
)
def update_league_comparison(selected_league):
    filtered_data = data[data['League'] == selected_league]
    fig = px.line(filtered_data, x='Year', y='Points', color='League', title='Comparaison des ligues au fil des ans')
    return fig

@app.callback(
    Output('team-performance-graph', 'figure'),
    [Input('league-dropdown', 'value'), Input('year-dropdown', 'value')]
)
def update_team_performance(selected_league, selected_year):
    filtered_data = data[(data['League'] == selected_league) & (data['Year'] == selected_year)]
    fig = px.bar(filtered_data, x='Team', y='Points', color='Team', title='Performance des équipes')
    return fig

@app.callback(
    Output('predicted-vs-actual-graph', 'figure'),
    Input('year-dropdown', 'value')
)
def update_predicted_vs_actual(selected_year):
    filtered_data = data[data['Year'] == selected_year]
    fig = px.scatter(filtered_data, x='Predicted_Points', y='Points', color='League', title='Points prédits vs points réels')
    return fig

@app.callback(
    Output('position-distribution-graph', 'figure'),
    Input('league-dropdown', 'value')
)
def update_position_distribution(selected_league):
    filtered_data = data[data['League'] == selected_league]
    fig = px.box(filtered_data, x='League', y='Position', title='Distribution des positions')
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)
