<span style="color:#42a5f5; font-size:2em; font-weight:bold;">Notebook de chargement des données d'incidents et pré-nettoyage</span>

<span style="color:#e91e63; font-size:1em; font-weight:bold;">1. Import des bibliothèques necessaires:
</span>

In [1]:
import pandas as pd
import numpy as np
import os
import glob
from pathlib import Path
from IPython.display import display

In [2]:
# Pour afficher toutes les colonnes et tout vérifier!
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

<span style="color:#e91e63; font-size:1em; font-weight:bold;"> 2.Définition des chemins propres:</span>

In [3]:
# Chemin vers le dossier contenant les fichiers de données:
base_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
incidents_path = os.path.join(base_path, "data", "raw", "incidents")
print("Répertoire courant :", os.path.relpath(os.getcwd(), base_path))
print("- Chemin incidents :", os.path.relpath(incidents_path, base_path))

# Fonction pour lister les fichiers dans le dossier:
def lister_fichiers(dossier):
    if os.path.exists(dossier):
        fichiers = os.listdir(dossier)
        if fichiers:
            print(f"\nFichiers dans {os.path.relpath(dossier, base_path)} :")
            for f in fichiers:
                print("   -", f)
        else:
            print(f"\nLe dossier {os.path.relpath(dossier, base_path)} est vide.")
    else:
        print(f"\nLe chemin {os.path.relpath(dossier, base_path)} n'existe pas.")

lister_fichiers(incidents_path)

Répertoire courant : notebooks\Ingestion
- Chemin incidents : data\raw\incidents

Fichiers dans data\raw\incidents :
   - incident Metadata.xlsx
   - incidents_2009_2017.csv
   - Incidents_2018_2025.csv


<span style="color:#e91e63; font-size:1em; font-weight:bold;"> 3.Chargement des fichiers CSV "incidents":</span>

In [4]:
# Chargement des fichiers CSV d'incidents:
incident_files = glob.glob(os.path.join(incidents_path, "*.csv"))

df_all_incidents = pd.concat([
    pd.read_csv(f, dtype={'IncidentNumber': str}, low_memory=False)
    for f in incident_files
], ignore_index=True)

# Vérification de DateOfCall:   
print("Dates non valides (NaT) :", df_all_incidents['DateOfCall'].isna().sum())
print(df_all_incidents['DateOfCall'].dropna().astype(str).str[:20].unique()[:50])

# Affichage interactif du DataFrame:
display(df_all_incidents.head(10)) 

# Aperçu rapide des données:
print("Données incidents chargées !")
print("-Nombre de lignes :", len(df_all_incidents))
print("-Nombre de colonnes :", len(df_all_incidents.columns))
print("-Colonnes :", df_all_incidents.columns.tolist())
print("-Types de données :", df_all_incidents.dtypes)
print("les statistiques sont:")
display(df_all_incidents.describe(include='all'))

Dates non valides (NaT) : 0
['01-Jan-09' '02-Jan-09' '03-Jan-09' '04-Jan-09' '05-Jan-09' '06-Jan-09'
 '07-Jan-09' '08-Jan-09' '09-Jan-09' '10-Jan-09' '11-Jan-09' '12-Jan-09'
 '13-Jan-09' '14-Jan-09' '15-Jan-09' '16-Jan-09' '17-Jan-09' '18-Jan-09'
 '19-Jan-09' '20-Jan-09' '21-Jan-09' '22-Jan-09' '23-Jan-09' '24-Jan-09'
 '25-Jan-09' '26-Jan-09' '27-Jan-09' '28-Jan-09' '29-Jan-09' '30-Jan-09'
 '31-Jan-09' '01-Feb-09' '02-Feb-09' '03-Feb-09' '04-Feb-09' '05-Feb-09'
 '06-Feb-09' '07-Feb-09' '08-Feb-09' '09-Feb-09' '10-Feb-09' '11-Feb-09'
 '12-Feb-09' '13-Feb-09' '14-Feb-09' '15-Feb-09' '16-Feb-09' '17-Feb-09'
 '18-Feb-09' '19-Feb-09']


Unnamed: 0,IncidentNumber,DateOfCall,CalYear,TimeOfCall,HourOfCall,IncidentGroup,StopCodeDescription,SpecialServiceType,PropertyCategory,PropertyType,AddressQualifier,Postcode_full,Postcode_district,UPRN,USRN,IncGeo_BoroughCode,IncGeo_BoroughName,ProperCase,IncGeo_WardCode,IncGeo_WardName,IncGeo_WardNameNew,Easting_m,Northing_m,Easting_rounded,Northing_rounded,Latitude,Longitude,FRS,IncidentStationGround,FirstPumpArriving_AttendanceTime,FirstPumpArriving_DeployedFromStation,SecondPumpArriving_AttendanceTime,SecondPumpArriving_DeployedFromStation,NumStationsWithPumpsAttending,NumPumpsAttending,PumpCount,PumpMinutesRounded,Notional Cost (£),NumCalls
0,235138081.0,01-Jan-09,2009,00:00:37,0,Special Service,Special Service,RTC,Road Vehicle,Car,In street close to gazetteer location,SW11 4LB,SW11,,,E09000032,WANDSWORTH,Wandsworth,E05014010,Battersea Park,Battersea Park,528652.0,176830.0,528650,176850,51.475812,-0.148894,London,Battersea,319.0,Battersea,342.0,Clapham,2.0,2.0,2,60,255,1.0
1,1091.0,01-Jan-09,2009,00:00:46,0,Special Service,Special Service,Assist other agencies,Outdoor,Lake/pond/reservoir,Open land/water - nearest gazetteer location,SE1 7SG,SE1,,,E09000022,LAMBETH,Lambeth,E05014118,Waterloo & South Bank,Waterloo & South Bank,530485.0,179007.0,530450,179050,51.494957,-0.121712,London,Lambeth,,,,,,,1,60,255,1.0
2,2091.0,01-Jan-09,2009,00:03:00,0,Fire,Secondary Fire,,Outdoor,Road surface/pavement,In street outside gazetteer location,N9 9EL,N9,,,E09000010,ENFIELD,Enfield,E05013682,Haselbury,Haselbury,533773.0,194492.0,533750,194450,51.633342,-0.068488,London,Edmonton,308.0,Edmonton,,,1.0,1.0,1,60,255,2.0
3,3091.0,01-Jan-09,2009,00:04:27,0,Fire,Secondary Fire,,Outdoor,Domestic garden (vegetation not equipment),On land associated with building,UB10 0DG,UB10,100021500000.0,21401491.0,E09000017,HILLINGDON,Hillingdon,E05013571,Hillingdon East,Hillingdon East,507738.0,182805.0,507750,182850,51.533882,-0.448089,London,Hillingdon,210.0,Hillingdon,,,1.0,1.0,1,60,255,2.0
4,5091.0,01-Jan-09,2009,00:05:39,0,Fire,Secondary Fire,,Outdoor,Cycle path/public footpath/bridleway,In street outside gazetteer location,N7 8HG,N7,,,E09000019,ISLINGTON,Islington,E05013708,Laycock,Laycock,531058.0,185307.0,531050,185350,51.551441,-0.11112,London,Holloway,233.0,Holloway,250.0,Holloway,1.0,2.0,2,60,255,1.0
5,6091.0,01-Jan-09,2009,00:06:03,0,False Alarm,AFA,,Dwelling,Purpose Built Flats/Maisonettes - Up to 3 stor...,Correct incident location,,NW5,0.0,20499122.0,E09000007,CAMDEN,Camden,E05013663,Kentish Town North,Kentish Town North,,,529450,185250,,,London,Kentish Town,172.0,Kentish Town,222.0,Kentish Town,1.0,2.0,2,60,255,1.0
6,8091.0,01-Jan-09,2009,00:12:31,0,Special Service,Special Service,RTC,Road Vehicle,Car,On motorway / elevated road,SE3 9PP,SE3,,,E09000011,GREENWICH,Greenwich,E05014084,Kidbrooke Village & Sutcliffe,Kidbrooke Village & Sutcliffe,541086.0,175646.0,541050,175650,51.462211,0.029557,London,Lee Green,522.0,East Greenwich,,,1.0,1.0,1,60,255,1.0
7,9091.0,01-Jan-09,2009,00:13:42,0,Fire,Secondary Fire,,Outdoor Structure,Refuse/rubbish tip,On land associated with building,TW3 2RE,TW3,100021600000.0,21500957.0,E09000018,HOUNSLOW,Hounslow,E05013623,Hounslow South,Hounslow South,514529.0,174907.0,514550,174950,51.46156,-0.352783,London,Heston,342.0,Heston,349.0,Heston,1.0,2.0,2,89,378,1.0
8,10091.0,01-Jan-09,2009,00:14:51,0,Fire,Secondary Fire,,Outdoor Structure,Small refuse/rubbish container,On land associated with building,EC1V 7PB,EC1V,5300037000.0,21604971.0,E09000019,ISLINGTON,Islington,E05013711,St. Peter's & Canalside,St. Peter's & Canalside,531902.0,182654.0,531950,182650,51.527403,-0.099948,London,Shoreditch,255.0,Clerkenwell,,,1.0,1.0,1,60,255,2.0
9,11091.0,01-Jan-09,2009,00:14:57,0,False Alarm,AFA,,Other Residential,Student Hall of Residence,Correct incident location,WC1B 3RA,WC1B,5048015.0,20401090.0,E09000007,CAMDEN,Camden,E05013653,Bloomsbury,Bloomsbury,529914.0,181705.0,529950,181750,51.519334,-0.128939,London,Soho,297.0,Euston,,,1.0,1.0,1,60,255,1.0


Données incidents chargées !
-Nombre de lignes : 1824973
-Nombre de colonnes : 39
-Colonnes : ['IncidentNumber', 'DateOfCall', 'CalYear', 'TimeOfCall', 'HourOfCall', 'IncidentGroup', 'StopCodeDescription', 'SpecialServiceType', 'PropertyCategory', 'PropertyType', 'AddressQualifier', 'Postcode_full', 'Postcode_district', 'UPRN', 'USRN', 'IncGeo_BoroughCode', 'IncGeo_BoroughName', 'ProperCase', 'IncGeo_WardCode', 'IncGeo_WardName', 'IncGeo_WardNameNew', 'Easting_m', 'Northing_m', 'Easting_rounded', 'Northing_rounded', 'Latitude', 'Longitude', 'FRS', 'IncidentStationGround', 'FirstPumpArriving_AttendanceTime', 'FirstPumpArriving_DeployedFromStation', 'SecondPumpArriving_AttendanceTime', 'SecondPumpArriving_DeployedFromStation', 'NumStationsWithPumpsAttending', 'NumPumpsAttending', 'PumpCount', 'PumpMinutesRounded', 'Notional Cost (£)', 'NumCalls']
-Types de données : IncidentNumber                             object
DateOfCall                                 object
CalYear                

Unnamed: 0,IncidentNumber,DateOfCall,CalYear,TimeOfCall,HourOfCall,IncidentGroup,StopCodeDescription,SpecialServiceType,PropertyCategory,PropertyType,AddressQualifier,Postcode_full,Postcode_district,UPRN,USRN,IncGeo_BoroughCode,IncGeo_BoroughName,ProperCase,IncGeo_WardCode,IncGeo_WardName,IncGeo_WardNameNew,Easting_m,Northing_m,Easting_rounded,Northing_rounded,Latitude,Longitude,FRS,IncidentStationGround,FirstPumpArriving_AttendanceTime,FirstPumpArriving_DeployedFromStation,SecondPumpArriving_AttendanceTime,SecondPumpArriving_DeployedFromStation,NumStationsWithPumpsAttending,NumPumpsAttending,PumpCount,PumpMinutesRounded,Notional Cost (£),NumCalls
count,1824973,1824973,1824973.0,1824973,1824973.0,1824967,1824973,598112,1824967,1824967,1824972,905076,1824973,1683640.0,1662102.0,1824973,1824973,1824973,1824333,1824333,1824333,905076.0,905076.0,1824973.0,1824973.0,905076.0,905076.0,1824973,1824972,1686620.0,1686609,654385.0,654378,1810351.0,1810351.0,1824973.0,1824973.0,1824973.0,1823124.0
unique,1824973,5934,,86394,,3,11,21,9,292,11,120319,331,,,33,33,33,899,1484,1481,,,,,,,1,103,,117,,114,,,,,,
top,054211-31032025,12-Jul-21,,18:02:06,,False Alarm,AFA,Effecting entry/exit,Dwelling,Purpose Built Flats/Maisonettes - 4 to 9 storeys,Correct incident location,SW17 0QT,CR0,,,E09000033,WESTMINSTER,Westminster,E05013808,West End,West End,,,,,,,London,Soho,,Soho,,Soho,,,,,,
freq,1,1058,,56,,894018,676133,130776,911806,232472,1024705,1821,35945,,,133650,133650,133650,33366,27583,27583,,,,,,,1824973,63802,,56146,,16886,,,,,,
mean,,,2016.695,,13.4626,,,,,,,,,21693560000.0,20320990.0,,,,,,,530630.371066,180366.797409,530658.2,180484.7,51.443038,-0.11886,,,318.3994,,395.596258,,1.370929,1.555361,1.592914,76.24512,407.816,1.327807
std,,,4.902916,,6.258045,,,,,,,,,47320870000.0,4370918.0,,,,,,,10339.286666,7435.775843,9725.051,7360.098,1.816032,0.149027,,,138.4763,,153.169852,,0.6877263,0.8453713,1.582791,562.3934,2701.663,1.487098
min,,,2009.0,,0.0,,,,,,,,,0.0,0.0,,,,,,,503582.0,155901.0,503550.0,155950.0,0.0,-0.510155,,,1.0,,1.0,,1.0,1.0,1.0,60.0,255.0,1.0
25%,,,2012.0,,9.0,,,,,,,,,0.0,20400960.0,,,,,,,525176.0,175993.0,525250.0,176150.0,51.468961,-0.197529,,,231.0,,295.0,,1.0,1.0,1.0,60.0,290.0,1.0
50%,,,2017.0,,14.0,,,,,,,,,0.0,21201330.0,,,,,,,530796.0,180983.0,530950.0,181050.0,51.512883,-0.116989,,,297.0,,369.0,,1.0,1.0,1.0,60.0,328.0,1.0
75%,,,2021.0,,19.0,,,,,,,,,10012840000.0,22101160.0,,,,,,,536834.0,184949.0,536350.0,185150.0,51.547829,-0.02863,,,379.0,,464.0,,2.0,2.0,2.0,60.0,364.0,1.0


Globalement peu de valeurs manquantes dans les données d'incidents !

<span style="color:#e91e63; font-size:1em; font-weight:bold;">4. Pré-nettoyage et typage: </span>

In [5]:
# vérification des formats d'IncidentNumber:
print("Format d'IncidentNumber (premiers 10) :", df_all_incidents['IncidentNumber'].astype(str).str[:20].unique()[:10])

# Vérification des valeurs uniques dans IncidentNumber:
print("Nombre de valeurs uniques dans IncidentNumber :", df_all_incidents['IncidentNumber'].nunique())

Format d'IncidentNumber (premiers 10) : ['235138081.00' '1091.00' '2091.00' '3091.00' '5091.00' '6091.00'
 '8091.00' '9091.00' '10091.00' '11091.00']
Nombre de valeurs uniques dans IncidentNumber : 1824973


<span style="color:#e91e63; font-size:1em; font-weight:bold;">4. Nettoyage et typage </span>

In [6]:
# Nettoyage initial de l'ID:
df_all_incidents['IncidentNumber'] = (
    df_all_incidents['IncidentNumber']
    .astype(str)
    .str.strip()
    .str.replace(r'\.0+$', '', regex=True)
)

# Conversion DateOfCall:
# Conversion robuste de DateOfCall :
df_all_incidents['DateOfCall'] = pd.to_datetime(
    df_all_incidents['DateOfCall'].astype(str).str.strip(),
    dayfirst=True,
    errors='coerce'
)

# vérification des dates converties:
valid_ratio = df_all_incidents['DateOfCall'].notna().mean()
print(f"Taux de dates valides : {round(valid_ratio * 100, 2)}%")
if valid_ratio < 0.9:
    print("Trop de dates invalides, vérifier le format source.")

# Nettoyage complémentaire:
for col in ['Latitude', 'Longitude']:
    df_all_incidents[col] = df_all_incidents[col].astype(str).str.replace(',', '.', regex=False)

for col in ['UPRN', 'USRN']:
    df_all_incidents[col] = df_all_incidents[col].astype(str).str.replace(',', '', regex=False)

colonnes_entiers = [
    'NumCalls', 'Easting_m', 'Northing_m', 'Easting_rounded', 'Northing_rounded',
    'NumStationsWithPumpsAttending', 'NumPumpsAttending',
    'FirstPumpArriving_AttendanceTime', 'SecondPumpArriving_AttendanceTime'
]
for col in colonnes_entiers:
    if col in df_all_incidents.columns:
        df_all_incidents[col] = pd.to_numeric(df_all_incidents[col], errors='coerce').astype('Int64')

df_all_incidents = df_all_incidents.rename(columns={'Notional Cost (£)': 'NotionalCost'})

# Vérifications globales:
print("Les données sont maintenant nettoyées et transformées !")
print("Taille finale :", df_all_incidents.shape)
print("Période :", df_all_incidents['CalYear'].min(), "→", df_all_incidents['CalYear'].max())
print("Aperçu des 5 premières lignes du DataFrame nettoyé :")
display(df_all_incidents.head())

print("Informations sur le DataFrame (types de données et valeurs non-nulles) :")
display(df_all_incidents.info())

display(df_all_incidents.describe(include='all'))

# vérifier les valeurs manquantes:
print("Valeurs manquantes par colonne :")
display(df_all_incidents.isnull().sum())

# vérifier les doublons:
print("Nombre de doublons dans le DataFrame :", df_all_incidents.duplicated().sum())

# vérifier les values uniques dans toutes les colonnes
for col in df_all_incidents.columns:
    if df_all_incidents[col].dtype == 'object':
        unique_values = df_all_incidents[col].unique()
        print(f"Colonne '{col}' - Valeurs uniques ({len(unique_values)}): {unique_values[:100]}...")


  df_all_incidents['DateOfCall'] = pd.to_datetime(


Taux de dates valides : 100.0%
Les données sont maintenant nettoyées et transformées !
Taille finale : (1824973, 39)
Période : 2009 → 2025
Aperçu des 5 premières lignes du DataFrame nettoyé :


Unnamed: 0,IncidentNumber,DateOfCall,CalYear,TimeOfCall,HourOfCall,IncidentGroup,StopCodeDescription,SpecialServiceType,PropertyCategory,PropertyType,AddressQualifier,Postcode_full,Postcode_district,UPRN,USRN,IncGeo_BoroughCode,IncGeo_BoroughName,ProperCase,IncGeo_WardCode,IncGeo_WardName,IncGeo_WardNameNew,Easting_m,Northing_m,Easting_rounded,Northing_rounded,Latitude,Longitude,FRS,IncidentStationGround,FirstPumpArriving_AttendanceTime,FirstPumpArriving_DeployedFromStation,SecondPumpArriving_AttendanceTime,SecondPumpArriving_DeployedFromStation,NumStationsWithPumpsAttending,NumPumpsAttending,PumpCount,PumpMinutesRounded,NotionalCost,NumCalls
0,235138081,2009-01-01,2009,00:00:37,0,Special Service,Special Service,RTC,Road Vehicle,Car,In street close to gazetteer location,SW11 4LB,SW11,,,E09000032,WANDSWORTH,Wandsworth,E05014010,Battersea Park,Battersea Park,528652,176830,528650,176850,51.47581173,-0.148894436,London,Battersea,319.0,Battersea,342.0,Clapham,2.0,2.0,2,60,255,1
1,1091,2009-01-01,2009,00:00:46,0,Special Service,Special Service,Assist other agencies,Outdoor,Lake/pond/reservoir,Open land/water - nearest gazetteer location,SE1 7SG,SE1,,,E09000022,LAMBETH,Lambeth,E05014118,Waterloo & South Bank,Waterloo & South Bank,530485,179007,530450,179050,51.49495659,-0.12171203,London,Lambeth,,,,,,,1,60,255,1
2,2091,2009-01-01,2009,00:03:00,0,Fire,Secondary Fire,,Outdoor,Road surface/pavement,In street outside gazetteer location,N9 9EL,N9,,,E09000010,ENFIELD,Enfield,E05013682,Haselbury,Haselbury,533773,194492,533750,194450,51.63334229,-0.068487965,London,Edmonton,308.0,Edmonton,,,1.0,1.0,1,60,255,2
3,3091,2009-01-01,2009,00:04:27,0,Fire,Secondary Fire,,Outdoor,Domestic garden (vegetation not equipment),On land associated with building,UB10 0DG,UB10,100021491527.0,21401491.0,E09000017,HILLINGDON,Hillingdon,E05013571,Hillingdon East,Hillingdon East,507738,182805,507750,182850,51.53388171,-0.448088609,London,Hillingdon,210.0,Hillingdon,,,1.0,1.0,1,60,255,2
4,5091,2009-01-01,2009,00:05:39,0,Fire,Secondary Fire,,Outdoor,Cycle path/public footpath/bridleway,In street outside gazetteer location,N7 8HG,N7,,,E09000019,ISLINGTON,Islington,E05013708,Laycock,Laycock,531058,185307,531050,185350,51.55144063,-0.111120228,London,Holloway,233.0,Holloway,250.0,Holloway,1.0,2.0,2,60,255,1


Informations sur le DataFrame (types de données et valeurs non-nulles) :
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1824973 entries, 0 to 1824972
Data columns (total 39 columns):
 #   Column                                  Dtype         
---  ------                                  -----         
 0   IncidentNumber                          object        
 1   DateOfCall                              datetime64[ns]
 2   CalYear                                 int64         
 3   TimeOfCall                              object        
 4   HourOfCall                              int64         
 5   IncidentGroup                           object        
 6   StopCodeDescription                     object        
 7   SpecialServiceType                      object        
 8   PropertyCategory                        object        
 9   PropertyType                            object        
 10  AddressQualifier                        object        
 11  Postcode_full                

None

Unnamed: 0,IncidentNumber,DateOfCall,CalYear,TimeOfCall,HourOfCall,IncidentGroup,StopCodeDescription,SpecialServiceType,PropertyCategory,PropertyType,AddressQualifier,Postcode_full,Postcode_district,UPRN,USRN,IncGeo_BoroughCode,IncGeo_BoroughName,ProperCase,IncGeo_WardCode,IncGeo_WardName,IncGeo_WardNameNew,Easting_m,Northing_m,Easting_rounded,Northing_rounded,Latitude,Longitude,FRS,IncidentStationGround,FirstPumpArriving_AttendanceTime,FirstPumpArriving_DeployedFromStation,SecondPumpArriving_AttendanceTime,SecondPumpArriving_DeployedFromStation,NumStationsWithPumpsAttending,NumPumpsAttending,PumpCount,PumpMinutesRounded,NotionalCost,NumCalls
count,1824973,1824973,1824973.0,1824973,1824973.0,1824967,1824973,598112,1824967,1824967,1824972,905076,1824973,1824973.0,1824973.0,1824973,1824973,1824973,1824333,1824333,1824333,905076.0,905076.0,1824973.0,1824973.0,1824973.0,1824973.0,1824973,1824972,1686620.0,1686609,654385.0,654378,1810351.0,1810351.0,1824973.0,1824973.0,1824973.0,1823124.0
unique,1824973,,,86394,,3,11,21,9,292,11,120319,331,244176.0,54098.0,33,33,33,899,1484,1481,,,,,583093.0,591673.0,1,103,,117,,114,,,,,,
top,054211-31032025,,,18:02:06,,False Alarm,AFA,Effecting entry/exit,Dwelling,Purpose Built Flats/Maisonettes - 4 to 9 storeys,Correct incident location,SW17 0QT,CR0,0.0,,E09000033,WESTMINSTER,Westminster,E05013808,West End,West End,,,,,,,London,Soho,,Soho,,Soho,,,,,,
freq,1,,,56,,894018,676133,130776,911806,232472,1024705,1821,35945,919897.0,162871.0,133650,133650,133650,33366,27583,27583,,,,,919897.0,919897.0,1824973,63802,,56146,,16886,,,,,,
mean,,2017-03-12 20:50:59.275944960,2016.695,,13.4626,,,,,,,,,,,,,,,,,530630.371066,180366.797409,530658.198916,180484.681445,,,,,318.399393,,395.596258,,1.370929,1.555361,1.592914,76.24512,407.816,1.327807
min,,2009-01-01 00:00:00,2009.0,,0.0,,,,,,,,,,,,,,,,,503582.0,155901.0,503550.0,155950.0,,,,,1.0,,1.0,,1.0,1.0,1.0,60.0,255.0,1.0
25%,,2012-09-29 00:00:00,2012.0,,9.0,,,,,,,,,,,,,,,,,525176.0,175993.0,525250.0,176150.0,,,,,231.0,,295.0,,1.0,1.0,1.0,60.0,290.0,1.0
50%,,2017-04-15 00:00:00,2017.0,,14.0,,,,,,,,,,,,,,,,,530796.0,180983.0,530950.0,181050.0,,,,,297.0,,369.0,,1.0,1.0,1.0,60.0,328.0,1.0
75%,,2021-09-02 00:00:00,2021.0,,19.0,,,,,,,,,,,,,,,,,536834.0,184949.0,536350.0,185150.0,,,,,379.0,,464.0,,2.0,2.0,2.0,60.0,364.0,1.0
max,,2025-03-31 00:00:00,2025.0,,23.0,,,,,,,,,,,,,,,,,561126.0,200906.0,611150.0,302450.0,,,,,1200.0,,1200.0,,46.0,106.0,951.0,525629.0,2277726.0,369.0


Valeurs manquantes par colonne :


IncidentNumber                                  0
DateOfCall                                      0
CalYear                                         0
TimeOfCall                                      0
HourOfCall                                      0
IncidentGroup                                   6
StopCodeDescription                             0
SpecialServiceType                        1226861
PropertyCategory                                6
PropertyType                                    6
AddressQualifier                                1
Postcode_full                              919897
Postcode_district                               0
UPRN                                            0
USRN                                            0
IncGeo_BoroughCode                              0
IncGeo_BoroughName                              0
ProperCase                                      0
IncGeo_WardCode                               640
IncGeo_WardName                               640


Nombre de doublons dans le DataFrame : 0
Colonne 'IncidentNumber' - Valeurs uniques (1824973): ['235138081' '1091' '2091' '3091' '5091' '6091' '8091' '9091' '10091'
 '11091' '12091' '14091' '15091' '21091' '20091' '17091' '18091' '22091'
 '23091' '32091' '34091' '36091' '39091' '38091' '33091' '41091' '43091'
 '46091' '48091' '44091' '49091' '53091' '54091' '55091' '58091' '60091'
 '61091' '62091' '63091' '66091' '65091' '67091' '68091' '69091' '70091'
 '88091' '91091' '89091' '93091' '96091' '99091' '101091' '103091'
 '105091' '106091' '107091' '109091' '111091' '110091' '114091' '118091'
 '123091' '125091' '131091' '132091' '135091' '136091' '140091' '139091'
 '142091' '143091' '144091' '145091' '146091' '147091' '148091' '149091'
 '150091' '153091' '155091' '157091' '156091' '158091' '160091' '162091'
 '163091' '168091' '169091' '171091' '170091' '172091' '173091' '175091'
 '174091' '176091' '178091' '179091' '180091' '181091' '185091']...
Colonne 'TimeOfCall' - Valeurs uniques (863

In [7]:
print(df_all_incidents['DateOfCall'].head(10))
print(df_all_incidents['DateOfCall'].dropna().unique()[:10])
print(type(df_all_incidents['DateOfCall'].iloc[0]))

0   2009-01-01
1   2009-01-01
2   2009-01-01
3   2009-01-01
4   2009-01-01
5   2009-01-01
6   2009-01-01
7   2009-01-01
8   2009-01-01
9   2009-01-01
Name: DateOfCall, dtype: datetime64[ns]
<DatetimeArray>
['2009-01-01 00:00:00', '2009-01-02 00:00:00', '2009-01-03 00:00:00',
 '2009-01-04 00:00:00', '2009-01-05 00:00:00', '2009-01-06 00:00:00',
 '2009-01-07 00:00:00', '2009-01-08 00:00:00', '2009-01-09 00:00:00',
 '2009-01-10 00:00:00']
Length: 10, dtype: datetime64[ns]
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [9]:
df_all_incidents.head()

Unnamed: 0,IncidentNumber,DateOfCall,CalYear,TimeOfCall,HourOfCall,IncidentGroup,StopCodeDescription,SpecialServiceType,PropertyCategory,PropertyType,AddressQualifier,Postcode_full,Postcode_district,UPRN,USRN,IncGeo_BoroughCode,IncGeo_BoroughName,ProperCase,IncGeo_WardCode,IncGeo_WardName,IncGeo_WardNameNew,Easting_m,Northing_m,Easting_rounded,Northing_rounded,Latitude,Longitude,FRS,IncidentStationGround,FirstPumpArriving_AttendanceTime,FirstPumpArriving_DeployedFromStation,SecondPumpArriving_AttendanceTime,SecondPumpArriving_DeployedFromStation,NumStationsWithPumpsAttending,NumPumpsAttending,PumpCount,PumpMinutesRounded,NotionalCost,NumCalls
0,235138081,2009-01-01,2009,00:00:37,0,Special Service,Special Service,RTC,Road Vehicle,Car,In street close to gazetteer location,SW11 4LB,SW11,,,E09000032,WANDSWORTH,Wandsworth,E05014010,Battersea Park,Battersea Park,528652,176830,528650,176850,51.47581173,-0.148894436,London,Battersea,319.0,Battersea,342.0,Clapham,2.0,2.0,2,60,255,1
1,1091,2009-01-01,2009,00:00:46,0,Special Service,Special Service,Assist other agencies,Outdoor,Lake/pond/reservoir,Open land/water - nearest gazetteer location,SE1 7SG,SE1,,,E09000022,LAMBETH,Lambeth,E05014118,Waterloo & South Bank,Waterloo & South Bank,530485,179007,530450,179050,51.49495659,-0.12171203,London,Lambeth,,,,,,,1,60,255,1
2,2091,2009-01-01,2009,00:03:00,0,Fire,Secondary Fire,,Outdoor,Road surface/pavement,In street outside gazetteer location,N9 9EL,N9,,,E09000010,ENFIELD,Enfield,E05013682,Haselbury,Haselbury,533773,194492,533750,194450,51.63334229,-0.068487965,London,Edmonton,308.0,Edmonton,,,1.0,1.0,1,60,255,2
3,3091,2009-01-01,2009,00:04:27,0,Fire,Secondary Fire,,Outdoor,Domestic garden (vegetation not equipment),On land associated with building,UB10 0DG,UB10,100021491527.0,21401491.0,E09000017,HILLINGDON,Hillingdon,E05013571,Hillingdon East,Hillingdon East,507738,182805,507750,182850,51.53388171,-0.448088609,London,Hillingdon,210.0,Hillingdon,,,1.0,1.0,1,60,255,2
4,5091,2009-01-01,2009,00:05:39,0,Fire,Secondary Fire,,Outdoor,Cycle path/public footpath/bridleway,In street outside gazetteer location,N7 8HG,N7,,,E09000019,ISLINGTON,Islington,E05013708,Laycock,Laycock,531058,185307,531050,185350,51.55144063,-0.111120228,London,Holloway,233.0,Holloway,250.0,Holloway,1.0,2.0,2,60,255,1


<span style="color:#e91e63; font-size:1em; font-weight:bold;">5. Export des résultats nettoyés</span>

In [10]:
# Définition du chemin d’export
output_path = Path("../../data/raw/Cleaned_data/InUSE/cleaned_data_incidents.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)

# Exporter au format CSV
df_all_incidents.to_csv(output_path, index=False, encoding='utf-8')

print("l'export est mnt terminé => le fichier est sauvegardé dans:", output_path)

l'export est mnt terminé => le fichier est sauvegardé dans: ..\..\data\raw\Cleaned_data\InUSE\cleaned_data_incidents.csv
