In [2]:
# Import libraries

import pandas as pd

# Load the dataset
file_path = './catastrophes_naturelles_data.csv'
data = pd.read_csv(file_path)

# Supprimer les colonnes sans entête
if 'Unnamed: 0' in data.columns:
    data = data.drop(columns=['Unnamed: 0'])

# Supprimer les colonnes sans valeur
data = data.dropna()

# Liste des colonnes numériques
numerical_cols = [
    "temperature",
    "humidite",
    "force_moyenne_du_vecteur_de_vent",
    "force_du_vecteur_de_vent_max",
    "pluie_intensite_max",
    "sismicite",
    "concentration_gaz",
    "pluie_totale",
]

# Convertir les valeurs numériques en nombres
for col in numerical_cols:
    data[col] = pd.to_numeric(data[col], errors="coerce")

# Normaliser la colonne catastrophe
data['catastrophe'] = data['catastrophe'].str.lower()
data["catastrophe"] = data["catastrophe"].str.strip("[]").str.replace("'", "").str.split(", ")

# Formater la colonne de date
data["date"] = pd.to_datetime(data["date"], errors="coerce")
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day

# Nettoyer la colonne quartier
data["quartier"] = data["quartier"].str.strip()

# Remplacer 'innondation' par 'inondation' dans la colonne catastrophe
data['catastrophe'] = data['catastrophe'].apply(lambda x: [item.replace('innondation', 'inondation') for item in x])

# Ajouter des colonnes pour les types de catastrophes spécifiques
data["seisme"] = data["catastrophe"].apply(lambda x: 'seisme' in x)
data["inondation"] = data["catastrophe"].apply(lambda x: 'inondation' in x)

# Save the cleaned dataset
cleaned_file_path = "cleaned_dataset.csv"
data.to_csv(cleaned_file_path, index=False)

data

Unnamed: 0,temperature,humidite,force_moyenne_du_vecteur_de_vent,force_du_vecteur_de_vent_max,pluie_intensite_max,date,quartier,sismicite,concentration_gaz,pluie_totale,catastrophe,year,month,day,seisme,inondation
0,5.7,75.6,3.8,4.3,0.0,2170-01-01,Zone 1,0.62,231.56,182.37,[aucun],2170,1,1,False,False
1,5.7,75.6,3.8,4.3,0.0,2170-01-01,Zone 2,0.94,248.20,903.20,[seisme],2170,1,1,True,False
2,5.7,75.6,3.8,4.3,0.0,2170-01-01,Zone 3,0.95,240.55,363.06,[seisme],2170,1,1,True,False
3,5.7,75.6,3.8,4.3,0.0,2170-01-01,Zone 4,0.39,159.10,473.93,[aucun],2170,1,1,False,False
4,5.7,75.6,3.8,4.3,0.0,2170-01-01,Zone 5,0.41,143.29,475.72,[inondation],2170,1,1,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3389,42.3,28.7,3.1,7.4,0.0,2171-08-09,Zone 5,0.36,149.82,17.18,[inondation],2171,8,9,False,True
3390,27.4,46.8,3.8,6.3,0.0,2171-09-09,Zone 1,0.86,184.18,63.64,[aucun],2171,9,9,False,False
3392,27.4,46.8,3.8,6.3,0.0,2171-09-09,Zone 3,0.53,204.04,668.92,[aucun],2171,9,9,False,False
3393,27.4,46.8,3.8,6.3,0.0,2171-09-09,Zone 4,0.42,182.63,660.69,[inondation],2171,9,9,False,True
