In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

In [2]:
engine = create_engine("postgresql://odis:odis@localhost:5432/odis")

In [45]:
ASSUR_AXIS = "count_cat_nat"
SOCDEM_AXIS = "avg_demandeurs_emploi"
MERGE_KEY = "code_insee"

df_assur = pd.read_sql(
    f"SELECT cod_commune as {MERGE_KEY}, count(*) as {ASSUR_AXIS} FROM pcc.catnat_gaspar GROUP BY cod_commune", engine
)
df_sociodemo = pd.read_sql(
    f"SELECT codgeo as {MERGE_KEY}, AVG(\"Demandeurs_Emploi\") as {SOCDEM_AXIS} FROM gold.gold_emploi_demandeur GROUP BY codgeo",
    engine,
)

df_fusion = pd.merge(df_assur, df_sociodemo, on=MERGE_KEY)

In [52]:
from ipywidgets import Tab, Output
from IPython.display import display
from matplotlib.ticker import FuncFormatter

# Crée un onglet pour chaque section
tab1 = Output()
tab2 = Output()
tab3 = Output()

with tab1:
    print("Données assurance")
    print(len(df_assur), "enregistrements")
    display(df_assur.head())
    plt.figure(figsize=(10, 5))
    plt.hist(df_assur[ASSUR_AXIS], bins=30, edgecolor='black', alpha=0.7)
    plt.xlabel(ASSUR_AXIS)
    plt.ylabel("Fréquence")
    plt.title("Distribution des données assurance")
    plt.show()

with tab2:
    print("Données socio-demo")
    print(len(df_sociodemo), "enregistrements")
    display(df_sociodemo.head())
    plt.figure(figsize=(10, 5))
    plt.hist(df_sociodemo[SOCDEM_AXIS], bins=30, edgecolor='black', alpha=0.7)
    plt.xlabel(SOCDEM_AXIS)
    plt.ylabel("Fréquence")
    plt.title("Distribution des données socio-demo")
    ax = plt.gca()
    ax.xaxis.set_major_formatter(FuncFormatter(lambda x, p: f'{int(x):,}'))
    plt.show()

with tab3:
    print("Corrélation")
    sns.scatterplot(data=df_fusion, x=SOCDEM_AXIS, y=ASSUR_AXIS)
    plt.xscale('log')
    plt.show()

    correlation = df_fusion[SOCDEM_AXIS].corr(df_fusion[ASSUR_AXIS])
    abs_correlation = abs(correlation)
    
    # Interprétation basée sur les seuils
    if abs_correlation > 0.7:
        interpretation = "Corrélation forte"
    elif 0.3 < abs_correlation < 0.7:
        interpretation = "Corrélation modérée"
    else:
        interpretation = "Corrélation faible"
    
    print(f"Coefficient de corrélation: {correlation:.4f}")
    print(f"Interprétation: {interpretation}")


tabs = Tab([tab1, tab2, tab3])
tabs.set_title(0, "Données assurance")
tabs.set_title(1, "Données socio-demo")
tabs.set_title(2, "Corrélation")

display(tabs)

Tab(children=(Output(), Output(), Output()), selected_index=0, titles=('Données assurance', 'Données socio-dem…