In [4]:
# 📦 Imports
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.io as pio
import imageio.v2 as imageio

# 📂 Chargement des données
df = pd.read_csv("../data/raw/evaluations_6e.csv", sep=";")
df_geoloc = pd.read_csv("../data/raw/annuaire.csv", sep=";")



# 🧼 Nettoyage et agrégation
def preparer_donnees_agrandies(df: pd.DataFrame) -> pd.DataFrame:
    col_to_keep = [
        'Année', 'Libellé région académique', 'Libellé académie',
        'Libellé département', 'UAI', 'Libellé secteur',
        'Matière', 'Caractéristique', 'Effectif', 'Score moyen', 'Ecart type',
        'Groupe 1', 'Groupe 2', 'Groupe 3', 'Groupe 4', 'Groupe 5', 'Groupe 6'
    ]
    df = df[col_to_keep].copy()
    df["pond_score"] = df["Score moyen"] * df["Effectif"]
    group_cols = [
        "Année", "UAI", "Matière", "Caractéristique",
        "Libellé académie", "Libellé département", "Libellé région académique", "Libellé secteur"
    ]
    df_grouped = df.groupby(group_cols, as_index=False).agg({
        "Effectif": "sum",
        "pond_score": "sum",
        "Ecart type": "mean",
        **{f"Groupe {i}": "sum" for i in range(1, 7)}
    })
    df_grouped["Score moyen"] = df_grouped["pond_score"] / df_grouped["Effectif"]
    return df_grouped.drop(columns=["pond_score"])

df_grouped = preparer_donnees_agrandies(df)

# 🌍 Filtrage géographique
def filtrer_france_metropolitaine(df, lat_col="latitude", lon_col="longitude"):
    return df[df[lat_col].between(41, 51) & df[lon_col].between(-6, 10)].copy()

df_geoloc = filtrer_france_metropolitaine(df_geoloc)

# 📍 Localisation des établissements
def localiser_etablissements(df_grouped: pd.DataFrame, df_geoloc: pd.DataFrame, keep_columns=None) -> pd.DataFrame:
    if keep_columns is None:
        keep_columns = ["latitude", "longitude", "Nom_etablissement"]
    df_geo_college = df_geoloc[df_geoloc["Type_etablissement"].str.lower().str.contains("collège", na=False)].copy()
    df_geo_college = df_geo_college.rename(columns={"Identifiant_de_l_etablissement": "UAI"})
    df_geo_clean = df_geo_college.drop_duplicates(subset="UAI")
    cols_to_merge = ["UAI"] + [col for col in keep_columns if col in df_geo_clean.columns]
    return df_grouped.merge(df_geo_clean[cols_to_merge], on="UAI", how="left")

df_localise = localiser_etablissements(df_grouped, df_geoloc, keep_columns=["latitude", "longitude", "Nom_etablissement"])

# 👧👦 Fusion filles/garçons
def fusion_filles_garcons(df: pd.DataFrame) -> pd.DataFrame:
    filles = df[df["Caractéristique"].str.lower() == "fille"].copy()
    garcons = df[df["Caractéristique"].str.lower() == "garçon"].copy()
    filles = filles.rename(columns={"Score moyen": "Score moyen_fille"})
    garcons = garcons.rename(columns={"Score moyen": "Score moyen_garçon"})
    fusion = pd.merge(
        filles[["UAI", "Année", "Matière", "Score moyen_fille"]],
        garcons[["UAI", "Année", "Matière", "Score moyen_garçon"]],
        on=["UAI", "Année", "Matière"],
        how="inner"
    )
    fusion["Ecart_score"] = fusion["Score moyen_fille"] - fusion["Score moyen_garçon"]
    return fusion

df_ecarts = fusion_filles_garcons(df_grouped)
df_ecarts_geo = df_ecarts.merge(df_localise, on=["UAI", "Année", "Matière"], how="left")

  df = pd.read_csv("../data/raw/evaluations_6e.csv", sep=";")
  df_geoloc = pd.read_csv("../data/raw/annuaire.csv", sep=";")


In [13]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Données filtrées
df_filtered = df_ecarts_geo[
    (df_ecarts_geo["Matière"] == "Mathématiques") &
    df_ecarts_geo["latitude"].notna() &
    df_ecarts_geo["longitude"].notna()
].copy()

# Données pour les courbes d’évolution
df_prop = df_filtered.copy()
df_prop["avantage"] = df_prop["Ecart_score"].apply(lambda x: "Filles" if x > 0 else "Garçons")
prop = df_prop.groupby(["Année", "avantage"]).size().unstack(fill_value=0)
prop["Total"] = prop.sum(axis=1)
prop["Filles (%)"] = prop.get("Filles", 0) / prop["Total"] * 100
prop["Garçons (%)"] = prop.get("Garçons", 0) / prop["Total"] * 100
prop = prop.reset_index()

# Initialisation des sous-graphes
fig = make_subplots(
    rows=2, cols=2,
    specs=[
        [{"type": "mapbox"}, {"type": "mapbox"}],
        [{"type": "xy", "colspan": 2}, None]
    ],
    subplot_titles=["Recul des filles (G > F)", "Avantage des filles (F > G)", "Évolution des proportions (%)"],
    horizontal_spacing=0.05,
    vertical_spacing=0.12
)

# Traces par année
annees = sorted(df_filtered["Année"].unique())
steps = []

for i, annee in enumerate(annees):
    df_annee = df_filtered[df_filtered["Année"] == annee]

    # G > F : comme dans le code initial
    dfg = df_annee[df_annee["Ecart_score"] < 0].copy()
    dfg["Recul_filles"] = dfg["Ecart_score"].apply(lambda x: abs(x))

    fig_gf = px.density_mapbox(
        dfg,
        lat="latitude",
        lon="longitude",
        z="Recul_filles",
        radius=8,
        center={"lat": 46.5, "lon": 2.5},
        zoom=5,
        color_continuous_scale="Tealgrn",
        range_color=(0, 20),
        height=900,
        title=f"Évaluations Nationales 6e – Écart Filles/Garçons en Mathématiques"
    )

    trace_gf = fig_gf.data[0]
    trace_gf.visible = (i == 0)
    fig.add_trace(trace_gf, row=1, col=1)

    # F > G : même style mais autre couleur
    dff = df_annee[df_annee["Ecart_score"] > 0].copy()
    dff["Avantage_filles"] = dff["Ecart_score"]

    fig_fg = px.density_mapbox(
        dff,
        lat="latitude",
        lon="longitude",
        z="Avantage_filles",
        radius=8,
        center={"lat": 46.5, "lon": 2.5},
        zoom=5,
        color_continuous_scale="OrRd",
        range_color=(0, 20),
        height=900,
        title=f"Évaluations Nationales 6e – Écart Filles/Garçons en Mathématiques"
    )

    trace_fg = fig_fg.data[0]
    trace_fg.visible = (i == 0)
    fig.add_trace(trace_fg, row=1, col=2)

    vis = []
    for j in range(len(annees)):
        vis.extend([j == i, j == i])
    step = dict(
        method="update",
        label=str(annee),
        args=[{"visible": vis + [True, True]}]
    )
    steps.append(step)

# Traces pour l'évolution des proportions
fig.add_trace(go.Scatter(
    x=prop["Année"], y=prop["Garçons (%)"],
    mode="lines+markers", name="Garçons > Filles", line=dict(color="teal")
), row=2, col=1)

fig.add_trace(go.Scatter(
    x=prop["Année"], y=prop["Filles (%)"],
    mode="lines+markers", name="Filles > Garçons", line=dict(color="orangered")
), row=2, col=1)

# Finalisation du layout
fig.update_layout(
    height=900,
    title="Écarts Filles / Garçons en Mathématiques – Visualisation complète",
    sliders=[{
        "active": 0,
        "steps": steps,
        "x": 0.1,
        "xanchor": "left",
        "y": -0.1,
        "len": 0.8
    }],
    mapbox_style="carto-positron"
)

fig.show()



*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*density_mapbox* is dep

In [15]:
df_grouped.groupby("Matière")["Score moyen"].describe()


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Matière,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Français,164129.0,255.116433,19.88012,76.0,243.0,255.0,267.0,355.0
Mathématiques,164118.0,250.728573,20.805287,125.0,238.0,251.0,264.0,374.0
