<span style="color:#42a5f5; font-size:2em; font-weight:bold;">Notebook de chargement des données de mobilisations et nettoyage</span>

<span style="color:#e91e63; font-size:1em; font-weight:bold;">1. Import des bibliothèques Python</span>

In [2]:
import pandas as pd
import numpy as np
import os
import glob
from pathlib import Path

In [3]:
# Pour afficher toutes les colonnes et tout vérifier!
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

<span style="color:#e91e63; font-size:1em; font-weight:bold;"> 2.Définition des chemins propres "</span>

In [None]:
# Revenir deux niveaux au-dessus du répertoire courant:
base_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

# Dossier contenant les fichiers d'incidents:
incidents_path = os.path.join(base_path, "data", "raw", "mobilisations", "CSV_files")

# Affichage des chemins de manière relative pour éviter les chemins absolus spécifiques à l'utilisateur:
print("Répertoire courant :", os.path.relpath(os.getcwd(), base_path))
print("-Chemin incidents :", os.path.relpath(incidents_path, base_path))

# Fonction pour lister les fichiers dans le dossier:
def lister_fichiers(dossier):
    if os.path.exists(dossier):
        fichiers = os.listdir(dossier)
        if fichiers:
            print(f"\nFichiers dans {os.path.relpath(dossier, base_path)} :")
            for f in fichiers:
                print("   -", f)
        else:
            print(f"\nLe dossier {os.path.relpath(dossier, base_path)} est vide.")
    else:
        print(f"\nLe chemin {os.path.relpath(dossier, base_path)} n'existe pas.")

lister_fichiers(incidents_path)

Répertoire courant : c:\Users\9609241C\london-fire-response\notebooks\Ingestion
-Chemin mobilisations : c:\Users\9609241C\london-fire-response\data\raw\mobilisations\CSV_files

Fichiers dans c:\Users\9609241C\london-fire-response\data\raw\mobilisations\CSV_files :
   - Mobilisation data 2025 onwards.csv
   - Mobilisation data from 2015 - 2020.csv
   - Mobilisation data from January 2009 - 2014.csv
   - mobilisations_2021_2024.csv


<span style="color:#e91e63; font-size:1em; font-weight:bold;"> 3.Chargement des fichiers CSV "mobilisations"</span>

In [5]:
mobilisation_files = glob.glob(os.path.join(mobilisations_path, "*.csv"))

# On charge tous les fichiers des mobilisations et on les regroupe
df_all_mobilisations = pd.concat([pd.read_csv(f, dtype={'IncidentNumber': str}) for f in mobilisation_files], ignore_index=True)

# Définition initiale des types pour une lecture robuste
from IPython.display import display
# Affichage interactif du DataFrame
display(df_all_mobilisations.head(10)) 

# Aperçu rapide des données
print("Données de mobilisations chargées !")
print("-Nombre de lignes :", len(df_all_mobilisations))
print("-Nombre de colonnes :", len(df_all_mobilisations.columns))
print("-Colonnes :", df_all_mobilisations.columns.tolist())
print("-Types de données :", df_all_mobilisations.dtypes)
# Affichage avec mise en forme
#df_all_mobilisations.head(20).style.set_table_attributes("style='display:inline'").set_caption("Aperçu des mobilisations")

  df_all_mobilisations = pd.concat([pd.read_csv(f, dtype={'IncidentNumber': str}) for f in mobilisation_files], ignore_index=True)


Unnamed: 0,IncidentNumber,CalYear,BoroughName,WardName,HourOfCall,ResourceMobilisationId,Resource_Code,PerformanceReporting,DateAndTimeMobilised,DateAndTimeMobile,DateAndTimeArrived,TurnoutTimeSeconds,TravelTimeSeconds,AttendanceTimeSeconds,DateAndTimeLeft,DateAndTimeReturned,DeployedFromStation_Code,DeployedFromStation_Name,DeployedFromLocation,PumpOrder,PlusCode_Code,PlusCode_Description,DelayCodeId,DelayCode_Description
0,000004-01012025,2025,HAMMERSMITH AND FULHAM,FULHAM REACH,0,6862256,H331,2,1/1/2025 0:02,1/1/2025 0:07,1/1/2025 0:13,310.0,311.0,621,1/1/2025 0:23,,H33,Wandsworth,Home Station,2,Initial,Initial Mobilisation,12.0,Not held up
1,000004-01012025,2025,HAMMERSMITH AND FULHAM,FULHAM REACH,0,6862257,G261,1,1/1/2025 0:02,1/1/2025 0:02,1/1/2025 0:12,11.0,552.0,563,1/1/2025 0:38,,G36,Hammersmith,Other Station,1,Initial,Initial Mobilisation,12.0,Not held up
2,000005-01012025,2025,MERTON,WEST BARNES,0,6862259,H401,1,1/1/2025 0:03,1/1/2025 0:04,1/1/2025 0:06,33.0,135.0,168,1/1/2025 0:11,,H40,New Malden,Home Station,1,Initial,Initial Mobilisation,,
3,000006-01012025,2025,CROYDON,PURLEY OAKS & RIDDLESDOWN,0,6862260,H291,1,1/1/2025 0:04,1/1/2025 0:06,1/1/2025 0:09,68.0,183.0,251,1/1/2025 0:25,,H29,Purley,Home Station,1,Initial,Initial Mobilisation,,
4,000007-01012025,2025,BARNET,BURNT OAK,0,6862261,G222,1,1/1/2025 0:05,1/1/2025 0:06,1/1/2025 0:10,83.0,230.0,313,1/1/2025 0:27,,G22,Stanmore,Home Station,1,Initial,Initial Mobilisation,,
5,000010-01012025,2025,BARNET,HENDON,0,6862263,A402,1,1/1/2025 0:06,1/1/2025 0:07,1/1/2025 0:10,68.0,184.0,252,1/1/2025 0:18,,A40,Hendon,Home Station,1,Initial,Initial Mobilisation,,
6,000024-01012025,2025,ENFIELD,ENFIELD LOCK,0,6862266,A352,1,1/1/2025 0:09,1/1/2025 0:11,1/1/2025 0:14,89.0,195.0,284,1/1/2025 0:26,,A35,Enfield,Home Station,1,Initial,Initial Mobilisation,,
7,000025-01012025,2025,SOUTHWARK,CHAMPION HILL,0,6862267,E311,1,1/1/2025 0:10,1/1/2025 0:10,1/1/2025 0:19,19.0,528.0,547,1/1/2025 0:33,,E31,Forest Hill,Home Station,1,Initial,Initial Mobilisation,12.0,Not held up
8,000025-01012025,2025,SOUTHWARK,CHAMPION HILL,0,6862268,E371,2,1/1/2025 0:10,1/1/2025 0:10,1/1/2025 0:20,41.0,583.0,624,1/1/2025 0:28,,H22,Lambeth,Other Station,2,Initial,Initial Mobilisation,12.0,Not held up
9,000026-01012025,2025,HOUNSLOW,CHISWICK HOMEFIELDS,0,6862269,G381,1,1/1/2025 0:10,1/1/2025 0:10,1/1/2025 0:15,34.0,294.0,328,1/1/2025 0:17,,G37,Chiswick,Other Station,1,Initial,Initial Mobilisation,,


Données de mobilisations chargées !
-Nombre de lignes : 2578276
-Nombre de colonnes : 24
-Colonnes : ['IncidentNumber', 'CalYear', 'BoroughName', 'WardName', 'HourOfCall', 'ResourceMobilisationId', 'Resource_Code', 'PerformanceReporting', 'DateAndTimeMobilised', 'DateAndTimeMobile', 'DateAndTimeArrived', 'TurnoutTimeSeconds', 'TravelTimeSeconds', 'AttendanceTimeSeconds', 'DateAndTimeLeft', 'DateAndTimeReturned', 'DeployedFromStation_Code', 'DeployedFromStation_Name', 'DeployedFromLocation', 'PumpOrder', 'PlusCode_Code', 'PlusCode_Description', 'DelayCodeId', 'DelayCode_Description']
-Types de données : IncidentNumber               object
CalYear                       int64
BoroughName                  object
WardName                     object
HourOfCall                    int64
ResourceMobilisationId        int64
Resource_Code                object
PerformanceReporting         object
DateAndTimeMobilised         object
DateAndTimeMobile            object
DateAndTimeArrived           o

<span style="color:#e91e63; font-size:1em; font-weight:bold;">4. Nettoyage et typage </span>

In [6]:
# Nettoyage des "faux nulls"
null_strings = {"null", "NULL", "", "N/A", "n/a", "NaN"}
df_all_mobilisations = df_all_mobilisations.map(lambda x: np.nan if isinstance(x, str) and x.strip() in null_strings else x)

# Conversion des colonnes vers le bon type 
df_all_mobilisations["CalYear"] = pd.to_numeric(df_all_mobilisations["CalYear"], errors='coerce').astype('Int64')
df_all_mobilisations["HourOfCall"] = pd.to_numeric(df_all_mobilisations["HourOfCall"], errors='coerce').astype('Int64')
df_all_mobilisations["TurnoutTimeSeconds"] = pd.to_numeric(df_all_mobilisations["TurnoutTimeSeconds"], errors='coerce').astype('Int64')
df_all_mobilisations["TravelTimeSeconds"] = pd.to_numeric(df_all_mobilisations["TravelTimeSeconds"], errors='coerce').astype('Int64')
df_all_mobilisations["AttendanceTimeSeconds"] = pd.to_numeric(df_all_mobilisations["AttendanceTimeSeconds"], errors='coerce').astype('Int64')
df_all_mobilisations["PumpOrder"] = pd.to_numeric(df_all_mobilisations["PumpOrder"], errors='coerce').astype('Int64')
df_all_mobilisations["DelayCodeId"] = pd.to_numeric(df_all_mobilisations["DelayCodeId"], errors='coerce').astype('Int64')
#df_all_mobilisations["ResourceMobilisationId"] = df_all_mobilisations["ResourceMobilisationId"].astype("string")

# Conversion des colonnes date/heure en datetime 
datetime_fields = ["DateAndTimeMobilised", "DateAndTimeMobile", "DateAndTimeArrived","DateAndTimeLeft", "DateAndTimeReturned"]
for col in datetime_fields:
  df_all_mobilisations[col] = pd.to_datetime(df_all_mobilisations[col], format='%d/%m/%Y %H:%M', errors='coerce')
 
print("Les données sont maintenant nettoyées et transformées !")
print("Aperçu des 5 premières lignes du DataFrame nettoyé :"),display(df_all_mobilisations.head(40))
print("Informations sur le DataFrame (types de données et valeurs non-nulles) :")
print("Taille:",df_all_mobilisations.shape)
print(display(df_all_mobilisations.info()))
print(display(df_all_mobilisations.describe()))

Les données sont maintenant nettoyées et transformées !
Aperçu des 5 premières lignes du DataFrame nettoyé :


Unnamed: 0,IncidentNumber,CalYear,BoroughName,WardName,HourOfCall,ResourceMobilisationId,Resource_Code,PerformanceReporting,DateAndTimeMobilised,DateAndTimeMobile,DateAndTimeArrived,TurnoutTimeSeconds,TravelTimeSeconds,AttendanceTimeSeconds,DateAndTimeLeft,DateAndTimeReturned,DeployedFromStation_Code,DeployedFromStation_Name,DeployedFromLocation,PumpOrder,PlusCode_Code,PlusCode_Description,DelayCodeId,DelayCode_Description
0,000004-01012025,2025,HAMMERSMITH AND FULHAM,FULHAM REACH,0,6862256,H331,2,2025-01-01 00:02:00,2025-01-01 00:07:00,2025-01-01 00:13:00,310.0,311.0,621,2025-01-01 00:23:00,NaT,H33,Wandsworth,Home Station,2,Initial,Initial Mobilisation,12.0,Not held up
1,000004-01012025,2025,HAMMERSMITH AND FULHAM,FULHAM REACH,0,6862257,G261,1,2025-01-01 00:02:00,2025-01-01 00:02:00,2025-01-01 00:12:00,11.0,552.0,563,2025-01-01 00:38:00,NaT,G36,Hammersmith,Other Station,1,Initial,Initial Mobilisation,12.0,Not held up
2,000005-01012025,2025,MERTON,WEST BARNES,0,6862259,H401,1,2025-01-01 00:03:00,2025-01-01 00:04:00,2025-01-01 00:06:00,33.0,135.0,168,2025-01-01 00:11:00,NaT,H40,New Malden,Home Station,1,Initial,Initial Mobilisation,,
3,000006-01012025,2025,CROYDON,PURLEY OAKS & RIDDLESDOWN,0,6862260,H291,1,2025-01-01 00:04:00,2025-01-01 00:06:00,2025-01-01 00:09:00,68.0,183.0,251,2025-01-01 00:25:00,NaT,H29,Purley,Home Station,1,Initial,Initial Mobilisation,,
4,000007-01012025,2025,BARNET,BURNT OAK,0,6862261,G222,1,2025-01-01 00:05:00,2025-01-01 00:06:00,2025-01-01 00:10:00,83.0,230.0,313,2025-01-01 00:27:00,NaT,G22,Stanmore,Home Station,1,Initial,Initial Mobilisation,,
5,000010-01012025,2025,BARNET,HENDON,0,6862263,A402,1,2025-01-01 00:06:00,2025-01-01 00:07:00,2025-01-01 00:10:00,68.0,184.0,252,2025-01-01 00:18:00,NaT,A40,Hendon,Home Station,1,Initial,Initial Mobilisation,,
6,000024-01012025,2025,ENFIELD,ENFIELD LOCK,0,6862266,A352,1,2025-01-01 00:09:00,2025-01-01 00:11:00,2025-01-01 00:14:00,89.0,195.0,284,2025-01-01 00:26:00,NaT,A35,Enfield,Home Station,1,Initial,Initial Mobilisation,,
7,000025-01012025,2025,SOUTHWARK,CHAMPION HILL,0,6862267,E311,1,2025-01-01 00:10:00,2025-01-01 00:10:00,2025-01-01 00:19:00,19.0,528.0,547,2025-01-01 00:33:00,NaT,E31,Forest Hill,Home Station,1,Initial,Initial Mobilisation,12.0,Not held up
8,000025-01012025,2025,SOUTHWARK,CHAMPION HILL,0,6862268,E371,2,2025-01-01 00:10:00,2025-01-01 00:10:00,2025-01-01 00:20:00,41.0,583.0,624,2025-01-01 00:28:00,NaT,H22,Lambeth,Other Station,2,Initial,Initial Mobilisation,12.0,Not held up
9,000026-01012025,2025,HOUNSLOW,CHISWICK HOMEFIELDS,0,6862269,G381,1,2025-01-01 00:10:00,2025-01-01 00:10:00,2025-01-01 00:15:00,34.0,294.0,328,2025-01-01 00:17:00,NaT,G37,Chiswick,Other Station,1,Initial,Initial Mobilisation,,


Informations sur le DataFrame (types de données et valeurs non-nulles) :
Taille: (2578276, 24)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2578276 entries, 0 to 2578275
Data columns (total 24 columns):
 #   Column                    Dtype         
---  ------                    -----         
 0   IncidentNumber            object        
 1   CalYear                   Int64         
 2   BoroughName               object        
 3   WardName                  object        
 4   HourOfCall                Int64         
 5   ResourceMobilisationId    int64         
 6   Resource_Code             object        
 7   PerformanceReporting      object        
 8   DateAndTimeMobilised      datetime64[ns]
 9   DateAndTimeMobile         datetime64[ns]
 10  DateAndTimeArrived        datetime64[ns]
 11  TurnoutTimeSeconds        Int64         
 12  TravelTimeSeconds         Int64         
 13  AttendanceTimeSeconds     Int64         
 14  DateAndTimeLeft           datetime64[ns]
 15  DateA

None

None


Unnamed: 0,CalYear,HourOfCall,ResourceMobilisationId,DateAndTimeMobilised,DateAndTimeMobile,DateAndTimeArrived,TurnoutTimeSeconds,TravelTimeSeconds,AttendanceTimeSeconds,DateAndTimeLeft,DateAndTimeReturned,PumpOrder,DelayCodeId
count,2578276.0,2578276.0,2578276.0,1459792,1446239,1459749,2549352.0,2549253.0,2578276.0,1440311,386915,2578276.0,643489.0
mean,2016.986967,13.463349,4922623.0,2019-03-05 09:17:17.706014720,2019-03-18 07:28:50.098855936,2019-03-05 10:18:41.239418624,79.321986,275.430175,353.074508,2019-04-17 09:48:10.017128960,2012-02-07 05:03:10.632102656,1.462955,10.499483
min,2009.0,0.0,38426.0,2009-01-01 00:02:00,2009-01-01 00:06:00,2009-01-01 00:07:00,0.0,0.0,0.0,2009-01-01 00:11:00,2009-01-01 00:12:00,1.0,3.0
25%,2013.0,9.0,4045812.0,2015-03-02 19:27:00,2015-04-01 19:24:30,2015-03-02 20:04:00,54.0,174.0,252.0,2015-06-04 18:31:00,2010-05-12 08:12:00,1.0,9.0
50%,2017.0,14.0,4999794.0,2021-03-07 13:03:00,2021-03-17 01:25:00,2021-03-07 13:23:00,73.0,249.0,327.0,2021-03-31 21:53:00,2011-12-08 10:54:00,1.0,12.0
75%,2021.0,19.0,5984065.0,2023-04-13 17:34:45,2023-04-17 13:36:00,2023-04-13 17:28:00,96.0,345.0,423.0,2023-04-24 10:02:00,2013-09-06 22:17:00,2.0,12.0
max,2025.0,23.0,6972117.0,2025-12-04 23:51:00,2025-12-04 23:53:00,2025-12-04 23:55:00,1192.0,1198.0,1200.0,2025-12-04 23:57:00,2015-12-11 23:53:00,13.0,13.0
std,4.914926,6.332345,1371195.0,,,,44.335052,151.441705,154.353287,,,0.781168,2.187642


None


In [7]:
#Vérifier la période des données de mobilisations : doit etre de 2009 à 2025
print("Période des données de mobilisations chargées: De", min(df_all_mobilisations["CalYear"]), "à", max(df_all_mobilisations["CalYear"]))

Période des données de mobilisations chargées: De 2009 à 2025


In [None]:
#vérification des valeurs manquantes
print("Valeurs manquantes par colonne :", df_all_mobilisations.isnull().sum())

<span style="color:#e91e63; font-size:1em; font-weight:bold;">5. Export des résultats nettoyés</span>

In [8]:
# Définition du chemin d’export
output_path = Path("../../data/raw/Cleaned_data/InUSE/cleaned_data_mobilisations.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)

# Exporter au format CSV
df_all_mobilisations.to_csv(output_path, index=False, encoding='utf-8')

print("l'export est mnt terminé => le fichier est sauvegardé dans:", output_path)

l'export est mnt terminé => le fichier est sauvegardé dans: ..\..\data\raw\Cleaned_data\InUSE\cleaned_data_mobilisations.csv
