In [None]:
import pandas as pd
import plotly.express as px

file_path = '../data/clean/borneClean.csv'
df = pd.read_csv(file_path)

print(df.columns)

  df = pd.read_csv(file_path)


Index(['nom_amenageur', 'contact_amenageur', 'nom_operateur',
       'contact_operateur', 'nom_enseigne', 'id_station_itinerance',
       'id_station_local', 'nom_station', 'implantation_station',
       'adresse_station', 'coordonneesXY', 'nbre_pdc', 'id_pdc_itinerance',
       'id_pdc_local', 'puissance_nominale', 'prise_type_ef', 'prise_type_2',
       'prise_type_combo_ccs', 'prise_type_chademo', 'prise_type_autre',
       'gratuit', 'paiement_acte', 'paiement_cb', 'paiement_autre',
       'tarification', 'condition_acces', 'reservation', 'horaires',
       'accessibilite_pmr', 'restriction_gabarit', 'station_deux_roues',
       'raccordement', 'date_mise_en_service', 'date_maj', 'cable_t2_attache',
       'last_modified', 'datagouv_dataset_id', 'datagouv_resource_id',
       'datagouv_organization_or_owner', 'created_at',
       'consolidated_longitude', 'consolidated_latitude',
       'consolidated_is_lon_lat_correct',
       'consolidated_is_code_insee_verified',
       'consoli

In [4]:
print(df.head(3))

  nom_amenageur     contact_amenageur nom_operateur     contact_operateur  \
0   ChargePoint  info@chargepoint.com   ChargePoint  info@chargepoint.com   
1   ChargePoint  info@chargepoint.com   ChargePoint  info@chargepoint.com   
2   ChargePoint  info@chargepoint.com   ChargePoint  info@chargepoint.com   

                  nom_enseigne id_station_itinerance id_station_local  \
0  ACU_Poste_De_Garde_Haguenau         ATHTBE1004017    ATHTBE1004017   
1  ACU_Poste_De_Garde_Haguenau         ATHTBE1004018    ATHTBE1004018   
2  ACU_Poste_De_Garde_Haguenau         ATHTBE1004019    ATHTBE1004019   

                   nom_station          implantation_station  \
0  ACU_Poste_De_Garde_Haguenau  Parking privé à usage public   
1  ACU_Poste_De_Garde_Haguenau  Parking privé à usage public   
2  ACU_Poste_De_Garde_Haguenau  Parking privé à usage public   

                            adresse_station  ...  \
0  93 route de Bitche, 67506 Haguenau Cedex  ...   
1  93 route de Bitche, 67506 Haguenau

In [13]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import folium
from folium.plugins import MarkerCluster
import warnings
warnings.filterwarnings('ignore')


class VEChargingAnalyzer:
    def __init__(self, df):
        self.df = df.copy()
        self.prepare_data()

    def prepare_data(self):
        self.df['date_mise_en_service'] = pd.to_datetime(self.df['date_mise_en_service'], errors='coerce')
        self.df['date_maj'] = pd.to_datetime(self.df['date_maj'], errors='coerce')

        if 'coordonneesXY' in self.df.columns:
            coords = self.df['coordonneesXY'].str.strip('[]').str.split(',', expand=True)
            if coords.shape[1] >= 2:
                self.df['longitude'] = pd.to_numeric(coords[0], errors='coerce')
                self.df['latitude'] = pd.to_numeric(coords[1], errors='coerce')

        self.df['annee'] = self.df['date_mise_en_service'].dt.year.astype('Int64')
        self.df['mois'] = self.df['date_mise_en_service'].dt.month.astype('Int64')
        self.df['trimestre'] = self.df['date_mise_en_service'].dt.quarter.astype('Int64')

        current_year = datetime.now().year
        self.df = self.df[(self.df['annee'] >= 2010) & (self.df['annee'] <= current_year)]

        self.df['puissance_nominale'] = pd.to_numeric(self.df['puissance_nominale'], errors='coerce')

        if 'adresse_station' in self.df.columns:
            self.df['code_postal'] = self.df['adresse_station'].str.extract(r'(\d{5})')
            self.df['departement'] = self.df['code_postal'].str[:2]
        else:
            np.random.seed(42)
            self.df['departement'] = np.random.choice(
                ['75', '13', '69', '59', '31', '44', '33', '34', '06', '67'], size=len(self.df)
            )

        self.df['puissance_nominale'] = self.df['puissance_nominale'].fillna(22)
        self.df['categorie_puissance'] = pd.cut(
            self.df['puissance_nominale'],
            bins=[0, 7, 22, 50, 150, float('inf')],
            labels=['Lente (≤7kW)', 'Normale (7-22kW)', 'Semi-rapide (22-50kW)',
                    'Rapide (50-150kW)', 'Ultra-rapide (>150kW)']
        ).astype(str)

        self.df['nom_operateur'] = self.df['nom_operateur'].fillna('Inconnu').astype(str)
        self.df['nom_station'] = self.df['nom_station'].fillna('Station sans nom').astype(str)

        years = list(range(2015, 2026))
        ve_data = {
            'annee': years,
            'nb_ve': [8000, 15000, 25000, 42000, 68000, 185000, 295000,
                      400000, 520000, 680000, 900000]
        }
        self.ve_df = pd.DataFrame(ve_data)

    def plot_evolution_bornes(self):
        df_valid = self.df.dropna(subset=['annee'])
        if len(df_valid) == 0:
            print("Pas de données valides.")
            return

        bornes_par_an = df_valid.groupby('annee').size().reset_index(name='total_bornes')
        bornes_par_an['cumul'] = bornes_par_an['total_bornes'].cumsum()

        fig = make_subplots(rows=1, cols=2, subplot_titles=["Cumul bornes", "Nouvelles installations"])

        fig.add_trace(go.Scatter(
            x=bornes_par_an['annee'], y=bornes_par_an['cumul'],
            mode='lines+markers', name="Cumul bornes"), row=1, col=1)

        fig.add_trace(go.Bar(
            x=bornes_par_an['annee'], y=bornes_par_an['total_bornes'],
            name="Installations annuelles"), row=1, col=2)

        fig.update_layout(title_text="Évolution des bornes", height=500)
        fig.show()

    def plot_ratio_bornes_ve(self):
        df_clean = self.df.dropna(subset=['annee'])
        bornes_cumul = df_clean.groupby('annee').size().cumsum()
        bornes_reset = bornes_cumul.reset_index()
        bornes_reset.columns = ['annee', 'nb_bornes']
        bornes_reset['annee'] = bornes_reset['annee'].astype(int)

        ratio_data = pd.merge(bornes_reset, self.ve_df, on='annee', how='inner')
        ratio_data['ratio_actual'] = ratio_data['nb_ve'] / ratio_data['nb_bornes']
        ratio_data['objectif_afi'] = 10
        ratio_data['ecart'] = ratio_data['ratio_actual'] - 10

        fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                            subplot_titles=["Ratio VE/bornes", "Écart à l’objectif"])

        fig.add_trace(go.Scatter(
            x=ratio_data['annee'], y=ratio_data['ratio_actual'],
            mode='lines+markers', name="Ratio actuel"), row=1, col=1)

        fig.add_trace(go.Scatter(
            x=ratio_data['annee'], y=[10]*len(ratio_data),
            mode='lines', name="Objectif AFI", line=dict(dash='dash')), row=1, col=1)

        fig.add_trace(go.Bar(
            x=ratio_data['annee'], y=ratio_data['ecart'],
            name="Écart à l’objectif",
            marker_color=['red' if x > 0 else 'green' for x in ratio_data['ecart']]
        ), row=2, col=1)

        fig.update_layout(title_text="Ratio Bornes / Véhicules Électriques", height=600)
        fig.show()

    def plot_analyse_departementale(self):
        dept_stats = self.df.groupby('departement').agg({
            'id_station_itinerance': 'count',
            'puissance_nominale': ['mean', 'sum'],
            'nbre_pdc': 'sum'
        }).round(2)

        dept_stats.columns = ['nb_stations', 'puissance_moy', 'puissance_totale', 'nb_pdc_total']
        dept_stats = dept_stats.reset_index().sort_values('nb_stations', ascending=False)

        fig = make_subplots(rows=2, cols=2,
                            subplot_titles=['Stations par département',
                                            'Puissance moyenne',
                                            'Répartition puissance',
                                            'Total points de charge'])

        fig.add_trace(go.Bar(x=dept_stats['departement'], y=dept_stats['nb_stations']), row=1, col=1)
        fig.add_trace(go.Bar(x=dept_stats['departement'], y=dept_stats['puissance_moy']), row=1, col=2)
        fig.add_trace(go.Bar(x=dept_stats['departement'], y=dept_stats['nb_pdc_total']), row=2, col=2)

        power_cats = pd.crosstab(self.df['departement'], self.df['categorie_puissance'])
        for cat in power_cats.columns:
            fig.add_trace(go.Bar(name=cat, x=power_cats.index, y=power_cats[cat]), row=2, col=1)

        fig.update_layout(title="Analyse départementale", barmode='stack', height=700)
        fig.show()

    def plot_saisonnalite_installations(self):
        df_valid = self.df.dropna(subset=['mois', 'annee', 'trimestre'])
        mois_order = list(range(1, 13))
        mois_labels = ['Jan', 'Fév', 'Mar', 'Avr', 'Mai', 'Jun',
                       'Jul', 'Aoû', 'Sep', 'Oct', 'Nov', 'Déc']

        installs_par_mois = df_valid.groupby('mois').size().reindex(mois_order, fill_value=0)

        heatmap_data = df_valid.groupby(['annee', 'trimestre']).size().unstack(fill_value=0)

        fig = make_subplots(rows=1, cols=2, subplot_titles=["Installations par mois", "Heatmap Annuelle"])

        fig.add_trace(go.Bar(x=mois_labels, y=installs_par_mois.values), row=1, col=1)

        fig.add_trace(go.Heatmap(
            z=heatmap_data.values,
            x=[f'T{col}' for col in heatmap_data.columns],
            y=heatmap_data.index,
            colorscale='YlOrRd'
        ), row=1, col=2)

        fig.update_layout(title="Saisonnalité des installations", height=500)
        fig.show()

    def plot_operateurs_analysis(self):
        top_operateurs = self.df['nom_operateur'].value_counts().head(10)

        fig = make_subplots(rows=1, cols=2, subplot_titles=["Top opérateurs", "Évolution cumulée"])

        fig.add_trace(go.Bar(
            x=top_operateurs.values, y=top_operateurs.index,
            orientation='h'), row=1, col=1)

        top5_ops = top_operateurs.head(5).index
        for op in top5_ops:
            op_data = self.df[self.df['nom_operateur'] == op]
            install_cumul = op_data.groupby('annee').size().cumsum()
            fig.add_trace(go.Scatter(
                x=install_cumul.index, y=install_cumul.values,
                mode='lines+markers', name=op), row=1, col=2)

        fig.update_layout(title="Analyse des opérateurs", height=500)
        fig.show()

    def plot_scenarios_futurs(self):
        derniere_annee = self.df['annee'].max()
        bornes_actuelles = len(self.df)

        annees_futures = list(range(derniere_annee + 1, derniere_annee + 6))
        scenarios = {
            'Conservateur': [680000 * (1.2 ** i) for i in range(1, 6)],
            'Modéré': [680000 * (1.35 ** i) for i in range(1, 6)],
            'Ambitieux': [680000 * (1.5 ** i) for i in range(1, 6)]
        }

        fig = make_subplots(rows=1, cols=2, subplot_titles=["Projection VE", "Besoins en bornes"])

        for label, data in scenarios.items():
            fig.add_trace(go.Scatter(
                x=annees_futures, y=data, mode='lines+markers', name=f'VE - {label}'
            ), row=1, col=1)

            bornes_requises = [x / 10 for x in data]
            fig.add_trace(go.Scatter(
                x=annees_futures, y=bornes_requises, mode='lines+markers', name=f'Bornes - {label}'
            ), row=1, col=2)

        fig.add_hline(y=bornes_actuelles, line_dash='dash', row=1, col=2)

        fig.update_layout(title="Scénarios futurs", height=500)
        fig.show()

    def plot_repartition_geographique(self):
        if 'longitude' not in self.df.columns or 'latitude' not in self.df.columns:
            print("Pas de coordonnées.")
            return

        df_geo = self.df.dropna(subset=['longitude', 'latitude'])
        m = folium.Map(location=[df_geo['latitude'].mean(), df_geo['longitude'].mean()], zoom_start=6)
        marker_cluster = MarkerCluster().add_to(m)

        for _, row in df_geo.sample(min(1000, len(df_geo))).iterrows():
            folium.Marker(
                [row['latitude'], row['longitude']],
                popup=f"{row['nom_station']}<br>Puissance: {row['puissance_nominale']}kW",
                icon=folium.Icon(color='green' if row['puissance_nominale'] <= 22 else 'red')
            ).add_to(marker_cluster)

        m.save('carte_bornes_recharge.html')
        print("Carte enregistrée dans 'carte_bornes_recharge.html'")

    def generer_tous_graphiques(self):
        print("📊 Génération de l'analyse dynamique avec Plotly...")
        self.plot_evolution_bornes()
        self.plot_ratio_bornes_ve()
        self.plot_analyse_departementale()
        self.plot_saisonnalite_installations()
        self.plot_operateurs_analysis()
        self.plot_scenarios_futurs()
        self.plot_repartition_geographique()
        print("✅ Tous les graphiques interactifs ont été générés.")


# Exemple d'utilisation
# ======================
file_path = '../data/clean/borneClean.csv'
df = pd.read_csv(file_path)

analyzer = VEChargingAnalyzer(df)
analyzer.generer_tous_graphiques()


📊 Génération de l'analyse dynamique avec Plotly...


Carte enregistrée dans 'carte_bornes_recharge.html'
✅ Tous les graphiques interactifs ont été générés.


In [6]:
import matplotlib.pyplot as plt

print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']
