In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [13]:
data = pd.read_csv("clubs-data-2022.csv", sep=";")

In [14]:
data.columns

Index(['Code Commune', 'Commune', 'Code QPV', 'Nom QPV', 'Département',
       'Région', 'Statut géo', 'Code', 'Fédération', 'Clubs', 'EPA', 'Total'],
      dtype='object')

In [15]:
dom = ["Guadeloupe", "Martinique", "Guyane", "La Réunion", "Mayotte"]
data = data[~data["Région"].isin(dom)]

In [16]:
data.head()

Unnamed: 0,Code Commune,Commune,Code QPV,Nom QPV,Département,Région,Statut géo,Code,Fédération,Clubs,EPA,Total
0,1001,L'Abergement-Clémenciat,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,124,FF de Tennis de Table,1,0,1
1,1001,L'Abergement-Clémenciat,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,133,FF de Rugby,1,0,1
2,1001,L'Abergement-Clémenciat,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,211,FF du Sport Boules,1,0,1
3,1002,L'Abergement-de-Varey,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,211,FF du Sport Boules,1,0,1
4,1002,L'Abergement-de-Varey,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,245,FF de la Randonnée Pédestre,1,0,1


In [19]:
import pandas as pd
import requests
import plotly.express as px

# Total clubs per region (all federations)
total_reg = (
    data.groupby("Région")["Clubs"]
    .sum()
    .reset_index(name="total_clubs")
)

# Football and rugby subsets
football = data[data["Fédération"] == "FF de Football"]
rugby = data[data["Fédération"] == "FF de Rugby"]

# Clubs per region for each sport
football_reg = (
    football.groupby("Région")["Clubs"]
    .sum()
    .reset_index(name="football_clubs")
)

rugby_reg = (
    rugby.groupby("Région")["Clubs"]
    .sum()
    .reset_index(name="rugby_clubs")
)

# Merge with total clubs
football_reg = football_reg.merge(total_reg, on="Région", how="left")
rugby_reg = rugby_reg.merge(total_reg, on="Région", how="left")

# Percentages of clubs
football_reg["football_pct"] = 100 * football_reg["football_clubs"] / football_reg["total_clubs"]
rugby_reg["rugby_pct"] = 100 * rugby_reg["rugby_clubs"] / rugby_reg["total_clubs"]

# GeoJSON
url = "https://france-geojson.gregoiredavid.fr/repo/regions.geojson"
geo = requests.get(url).json()

# Football map
football_map = px.choropleth(
    football_reg,
    geojson=geo,
    locations="Région",
    featureidkey="properties.nom",
    color="football_pct",
    color_continuous_scale="Blues",
    projection="mercator",
    title="Pourcentage de clubs de football (%)"
)
football_map.update_geos(fitbounds="locations", visible=False)
football_map.update_layout(height=650, width=650, margin=dict(l=0, r=0, t=40, b=0))
football_map.show()

# Rugby map
rugby_map = px.choropleth(
    rugby_reg,
    geojson=geo,
    locations="Région",
    featureidkey="properties.nom",
    color="rugby_pct",
    color_continuous_scale="Reds",
    projection="mercator",
    title="Pourcentage de clubs de rugby (%)"
)
rugby_map.update_geos(fitbounds="locations", visible=False)
rugby_map.update_layout(height=650, width=650, margin=dict(l=0, r=0, t=40, b=0))
rugby_map.show()


In [None]:
import pandas as pd
import numpy as np
import requests
import plotly.express as px

REGIONS = ["Hauts-de-France", "Occitanie"]
FEDERATIONS = ["FF de Football", "FF de Rugby"]

df = data.copy()
df["Code Commune"] = df["Code Commune"].astype(str).str.zfill(5)
df = df[df["Région"].isin(REGIONS) & df["Fédération"].isin(FEDERATIONS)]

agg = (
    df.groupby(["Code Commune", "Région", "Fédération"], as_index=False)["Clubs"]
    .sum()
)

# Color scales
FOOT_SCALE = ["#6BAED6", "#2171B5", "#08306B", "#000000"]
RUGBY_SCALE = ["#FB6A4A", "#CB181D", "#99000D", "#000000"]

url = "https://france-geojson.gregoiredavid.fr/repo/communes.geojson"
geo = requests.get(url).json()

def build_map(region_name, federation_name, color_scale, title):
    tmp = agg[(agg["Région"] == region_name) & (agg["Fédération"] == federation_name)]
    fig = px.choropleth_mapbox(
        tmp,
        geojson=geo,
        locations="Code Commune",
        featureidkey="properties.code",
        color="Clubs",
        color_continuous_scale=color_scale,
        mapbox_style="open-street-map",
        zoom=6,
        center={"lat": 46.5, "lon": 2.5},
        height=650,
        title=title,
    )
    fig.update_layout(margin=dict(l=0, r=0, t=40, b=0))
    return fig

fig_fb_hdf = build_map(
    "Hauts-de-France", "FF de Football", FOOT_SCALE,
    "Football – Hauts-de-France (clubs)"
)

fig_fb_occ = build_map(
    "Occitanie", "FF de Football", FOOT_SCALE,
    "Football – Occitanie (clubs)"
)

fig_rg_hdf = build_map(
    "Hauts-de-France", "FF de Rugby", RUGBY_SCALE,
    "Rugby – Hauts-de-France (clubs)"
)

fig_rg_occ = build_map(
    "Occitanie", "FF de Rugby", RUGBY_SCALE,
    "Rugby – Occitanie (clubs)"
)

fig_fb_hdf.show()
fig_fb_occ.show()
fig_rg_hdf.show()
fig_rg_occ.show()


In [21]:
#number of clubs per 100,000 inhabitants for each region
pop = pd.read_excel("POPULATION_MUNICIPALE_COMMUNES_FRANCE.xlsx")

In [22]:
pop.head()

Unnamed: 0,objectid,reg,dep,cv,codgeo,libgeo,p13_pop,p14_pop,p15_pop,p16_pop,p17_pop,p18_pop,p19_pop,p20_pop,p21_pop
0,115658,52,85,8502,85062,Châteauneuf,968.0,993.0,1013.0,1027.0,1056,1085.0,1114.0,1118.0,1134.0
1,115659,26,58,5808,58300,Urzy,1839.0,1835.0,1828.0,1802.0,1775,1749.0,1746.0,1747.0,1742.0
2,115660,43,70,7012,70137,Chassey-lès-Montbozon,218.0,217.0,216.0,215.0,217,215.0,215.0,220.0,225.0
3,115661,21,51,5123,51649,Vitry-le-François,13174.0,13144.0,12805.0,12552.0,12133,11743.0,11376.0,11458.0,11454.0
4,115662,11,78,7811,78638,Vaux-sur-Seine,4749.0,4715.0,4788.0,4857.0,4927,4929.0,5010.0,5020.0,5083.0


In [23]:
pop["codgeo"] = pop["codgeo"].str.zfill(5)
data["Code Commune"] = data["Code Commune"].astype(str).str.zfill(5)

data = data.merge(
    pop[["codgeo", "p21_pop"]],
    left_on="Code Commune",
    right_on="codgeo",
    how="left"
)

data = data.drop(columns=["codgeo"])

In [24]:

data.head()

Unnamed: 0,Code Commune,Commune,Code QPV,Nom QPV,Département,Région,Statut géo,Code,Fédération,Clubs,EPA,Total,p21_pop
0,1001,L'Abergement-Clémenciat,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,124,FF de Tennis de Table,1,0,1,832.0
1,1001,L'Abergement-Clémenciat,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,133,FF de Rugby,1,0,1,832.0
2,1001,L'Abergement-Clémenciat,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,211,FF du Sport Boules,1,0,1,832.0
3,1002,L'Abergement-de-Varey,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,211,FF du Sport Boules,1,0,1,267.0
4,1002,L'Abergement-de-Varey,CSZ,,1,Auvergne-Rhône-Alpes,1.Champ geoc,245,FF de la Randonnée Pédestre,1,0,1,267.0


In [25]:
import pandas as pd
import requests
import plotly.express as px

SPORTS = ["FF de Football", "FF de Rugby"]

df = data[data["Fédération"].isin(SPORTS)].copy()

pop_reg = (
    df[["Région", "Code Commune", "p21_pop"]]
    .drop_duplicates(subset=["Région", "Code Commune"])
    .groupby("Région", as_index=False)["p21_pop"]
    .sum()
    .rename(columns={"p21_pop": "population_2021"})
)

clubs_reg = (
    df.groupby(["Région", "Fédération"], as_index=False)["Clubs"]
    .sum()
    .rename(columns={"Clubs": "nb_clubs"})
)

clubs_reg = clubs_reg.merge(pop_reg, on="Région", how="left")
clubs_reg["clubs_pour_100k"] = 100000 * clubs_reg["nb_clubs"] / clubs_reg["population_2021"]

geo = requests.get("https://france-geojson.gregoiredavid.fr/repo/regions.geojson").json()

def carte_sport(sport, palette):
    d = clubs_reg[clubs_reg["Fédération"] == sport]

    fig = px.choropleth(
        d,
        geojson=geo,
        locations="Région",
        featureidkey="properties.nom",
        color="clubs_pour_100k",
        color_continuous_scale=palette,
        title=f"Nombre de clubs pour 100 000 habitants – {sport}",
    )

    fig.update_geos(
        projection_type="mercator",
        lataxis_range=[41, 52],   # fenêtre centrée France
        lonaxis_range=[-5, 10],   # coupe les pays voisins
        visible=False
    )

    fig.update_layout(
        height=700,
        width=700,
        margin=dict(l=0, r=0, t=40, b=0)
    )
    fig.show()

bleu = ["#b3cde3", "#6497b1", "#005b96", "#03396c", "#011f4b"]
rouge = ["#fbb4ae", "#f76868", "#dd3434", "#ae0101", "#7a0101"]

carte_sport("FF de Football", bleu)
carte_sport("FF de Rugby", rouge)


In [30]:
import pandas as pd
import numpy as np

SPORTS = ["FF de Football", "FF de Rugby"]

df = data.copy()
df = df.dropna(subset=["p21_pop"])
df["p21_pop"] = df["p21_pop"].astype(float)
df = df[df["Fédération"].isin(SPORTS)]

# France métropolitaine ≈ départements qui ne commencent pas par 97 ou 98
df["Département"] = df["Département"].astype(str)
df = df[~df["Département"].str.startswith(("97", "98"))]

df["Region_agg"] = np.where(
    df["Région"] == "Occitanie",
    "Occitanie",
    "France métropolitaine hors Occitanie"
)

bins = [0, 5000, np.inf]
labels = ["< 5 000", "≥ 5 000"]

communes_pop = (
    df[["Code Commune", "Region_agg", "p21_pop"]]
    .drop_duplicates(subset=["Code Commune"])
)
communes_pop["classe_pop"] = pd.cut(
    communes_pop["p21_pop"], bins=bins, labels=labels, right=False
)

pop_reg_classe = (
    communes_pop.groupby(["Region_agg", "classe_pop"], as_index=False)["p21_pop"]
    .sum()
    .rename(columns={"p21_pop": "population_classe"})
)

pop_reg_total = (
    communes_pop.groupby("Region_agg", as_index=False)["p21_pop"]
    .sum()
    .rename(columns={"p21_pop": "population_region"})
)

clubs_communes_sport = (
    df.groupby(["Region_agg", "Fédération", "Code Commune"], as_index=False)["Clubs"]
    .sum()
    .rename(columns={"Clubs": "nb_clubs_commune"})
)

clubs_communes_sport = clubs_communes_sport.merge(
    communes_pop[["Code Commune", "classe_pop"]],
    on="Code Commune",
    how="left",
)

clubs_reg_classe = (
    clubs_communes_sport
    .groupby(["Region_agg", "Fédération", "classe_pop"], as_index=False)["nb_clubs_commune"]
    .sum()
    .rename(columns={"nb_clubs_commune": "nb_clubs"})
)

total_clubs_reg_sport = (
    clubs_reg_classe
    .groupby(["Region_agg", "Fédération"], as_index=False)["nb_clubs"]
    .sum()
    .rename(columns={"nb_clubs": "total_clubs_region_sport"})
)

table = clubs_reg_classe.merge(
    total_clubs_reg_sport,
    on=["Region_agg", "Fédération"],
    how="left"
)

table = table.merge(
    pop_reg_classe,
    on=["Region_agg", "classe_pop"],
    how="left"
)

table = table.merge(
    pop_reg_total,
    on="Region_agg",
    how="left"
)

table["pct_clubs_region_sport"] = 100 * table["nb_clubs"] / table["total_clubs_region_sport"]
table["clubs_pour_100k"] = 100000 * table["nb_clubs"] / table["population_classe"]
table["pct_population_region"] = 100 * table["population_classe"] / table["population_region"]
table["ratio_clubs_vs_pop"] = table["pct_clubs_region_sport"] / table["pct_population_region"]

table["Sport"] = table["Fédération"].map({
    "FF de Football": "Football",
    "FF de Rugby": "Rugby"
})

table_finale = table[
    [
        "Region_agg",
        "Sport",
        "classe_pop",
        "nb_clubs",
        "pct_clubs_region_sport",
        "clubs_pour_100k",
        "pct_population_region",
        "ratio_clubs_vs_pop",
    ]
].sort_values(["Sport", "Region_agg", "classe_pop"])

table_finale = table_finale.rename(columns={"Region_agg": "Région"})

table_finale







Unnamed: 0,Région,Sport,classe_pop,nb_clubs,pct_clubs_region_sport,clubs_pour_100k,pct_population_region,ratio_clubs_vs_pop
0,France métropolitaine hors Occitanie,Football,< 5 000,7227,60.807741,66.731567,23.258288,2.614455
1,France métropolitaine hors Occitanie,Football,≥ 5 000,4658,39.192259,13.03524,76.741712,0.510703
4,Occitanie,Football,< 5 000,700,63.405797,54.940872,28.26965,2.242893
5,Occitanie,Football,≥ 5 000,404,36.594203,12.496729,71.73035,0.510163
2,France métropolitaine hors Occitanie,Rugby,< 5 000,507,36.819172,4.681459,23.258288,1.583056
3,France métropolitaine hors Occitanie,Rugby,≥ 5 000,870,63.180828,2.434663,76.741712,0.823292
6,Occitanie,Rugby,< 5 000,249,58.588235,19.543253,28.26965,2.072478
7,Occitanie,Rugby,≥ 5 000,176,41.411765,5.44412,71.73035,0.577326


In [32]:
import pandas as pd
from scipy.stats import chi2_contingency

# Contingence brute
ct = pd.DataFrame(
    {
        "Occitanie": [249, 176],
        "France_hors_Occitanie": [507, 870],
    },
    index=["< 5 000", "≥ 5 000"]
)

chi2, p, dof, expected = chi2_contingency(ct)

occ_total = ct["Occitanie"].sum()
fr_total = ct["France_hors_Occitanie"].sum()

ct["share_occ"] = ct["Occitanie"] / occ_total
ct["share_fr"] = ct["France_hors_Occitanie"] / fr_total
ct["Ratio"] = ct["share_occ"] / ct["share_fr"]

ct_display = ct[["Occitanie", "France_hors_Occitanie", "Ratio"]].copy()
ct_display["Ratio"] = ct_display["Ratio"].round(3)

print("=== Tableau avec ratio ===")
print(ct_display)
print(f"\nChi² = {chi2:.3f}, dof = {dof}, p-value = {p:.4e}")


=== Tableau avec ratio ===
         Occitanie  France_hors_Occitanie  Ratio
< 5 000        249                    507  1.591
≥ 5 000        176                    870  0.655

Chi² = 62.307, dof = 1, p-value = 2.9383e-15


In [None]:
from statsmodels.stats.proportion import proportions_ztest

tab = table_finale
rug = tab[tab["Sport"] == "Rugby"]

occ = rug[rug["Région"] == "Occitanie"]
nat = rug.groupby("classe_pop")["nb_clubs"].sum()

nb_occ_5k = int(occ[occ["classe_pop"] == "< 5 000"]["nb_clubs"])
nb_fr_5k = int(nat.loc["< 5 000"])

nb_occ_total = int(occ["nb_clubs"].sum())
nb_fr_total = int(nat.sum())

count = nb_occ_5k
nobs = nb_occ_total
prop_france = nb_fr_5k / nb_fr_total

stat, p = proportions_ztest(count, nobs, value=prop_france)
print(stat, p)


1.8791506687653554 0.06022392328974711





Calling int on a single element Series is deprecated and will raise a TypeError in the future. Use int(ser.iloc[0]) instead

