In [88]:
import pandas as pd
import requests
import time
import os
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import seaborn as sns

from sklearn.cluster import DBSCAN
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm

#### Visu de l'historique des feux

In [89]:
feux = pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/historique_incendies_avec_coordonnees.csv', sep=';', encoding='utf-8')
pd.set_option('display.max_columns', None)

feux.head(10)

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
0,66,66190,Salses-le-Château,42.83338,2.91818,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier),1
1,06,06074,Lantosque,43.973468,7.312593,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,,1
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,,1
3,65,65173,Esterre,42.874901,0.006078,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,,1
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,,1
5,65,65233,Jarret,43.0823,-0.014354,2014-01-12 16:41:00,200,0.0,,200.0,,,200.0,0.0,0.0,0.0,,,1
6,30,30051,Branoux-les-Taillades,44.2196,3.99124,2014-01-13 19:24:00,2500,,,,,,,,,,,,1
7,66,66196,Sorède,42.530402,2.956454,2014-01-17 16:40:00,10000,,10000.0,,,,,,,,1.0,Involontaire (travaux),1
8,2B,2B242,Poggio-Mezzana,42.39768,9.49393,2014-01-18 14:57:00,3000,0.0,3000.0,,,,,,,0.0,1.0,,1
9,974,97421,Salazie,-21.024383,55.543545,2014-01-21 14:08:00,3000,0.0,,3000.0,,,3000.0,0.0,0.0,0.0,,,1


In [90]:
#ajout du code postale via le code insee pour joindre le fichier corse pr la suite
df = pd.read_csv('correspondance-code-insee-code-postal.csv', sep=';', encoding='utf-8')
df = df.drop(columns=['Département','Région','Statut','Altitude Moyenne','Superficie','Population','geo_shape','ID Geofla','Code Commune','Code Canton','Code Arrondissement','Code Département','Code Région'], axis=1)
df.head()

Unnamed: 0,Code INSEE,Code Postal,Commune,geo_point_2d
0,66033,66300,CAMELAS,"42.639460715710925, 2.6889148905394613"
1,64254,64370,HAGETAUBIN,"43.510561909594024, -0.612895161323306"
2,47260,47270,SAINT-MAURIN,"44.21584680376108, 0.8936370457330151"
3,80118,80150,BOUFFLERS,"50.25894479050434, 2.021288658965274"
4,46147,46220,LAGARDELLE,"44.48709070580215, 1.170693334968643"


In [91]:
df_feux = pd.merge(feux, df, on=['Code INSEE'], how='left')


In [92]:
df_feux.head()

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Code Postal,Commune,geo_point_2d
0,66,66190,Salses-le-Château,42.83338,2.91818,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier),1,66600,SALSES-LE-CHATEAU,"42.837198552631975, 2.915935802683221"
1,06,06074,Lantosque,43.973468,7.312593,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,,1,6450,LANTOSQUE,"43.970645754919815, 7.30485730116232"
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,,1,20137,LECCI,"41.66548307082755, 9.319067734429447"
3,65,65173,Esterre,42.874901,0.006078,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,,1,65120,ESTERRE,"42.871592054228806, 0.010558551620595"
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,,1,20217,CANARI,"42.84305421033735, 9.345566436210337"


#### Rajout de la météo

In [93]:
df_meteo=pd.read_csv('corse_meteo_cleaned_insee_features.csv', sep=';', encoding='utf-8')
df_meteo.head()

Unnamed: 0,POSTE,DATE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,année,mois,jour,Code INSEE,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois
0,20004014,2006-01-01,16.0,,9.0,,11.5,,,10.25,,,2.5,10.3,,,,,,,,,,,,,,,,,,,,,,2006,1,1,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247
1,20004014,2006-01-02,1.5,,7.0,,10.5,,,8.75,,,3.5,8.8,,,,,,,,,,,,,,,,,,1.0,,,,2006,1,2,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247
2,20004014,2006-01-03,0.3,,1.5,,13.0,,,7.25,,,11.5,7.3,,,,,,,,,,,,,,,,,,,,,,2006,1,3,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247
3,20004014,2006-01-04,0.2,,1.0,,11.5,,,6.25,,,10.5,6.3,,,,,,,,,,,,,,,,,,,,,,2006,1,4,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247
4,20004014,2006-01-05,1.0,,1.5,,12.0,,,6.75,,,10.5,6.8,,,,,,,,,,,,,,,,,,,,,,2006,1,5,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247


In [94]:
df_meteo = df_meteo.drop(columns=['DATE'])

In [95]:
df_meteo = df_meteo.rename(columns={
    "année": "year",
    "mois": "month",
    "jour": "day"
})

# Étape 2 : Créer une colonne datetime à partir de ces 3 colonnes
df_meteo["date"] = pd.to_datetime(df_meteo[["year", "month", "day"]])

df_meteo.head()

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Code INSEE,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois,date
0,20004014,16.0,,9.0,,11.5,,,10.25,,,2.5,10.3,,,,,,,,,,,,,,,,,,,,,,2006,1,1,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-01
1,20004014,1.5,,7.0,,10.5,,,8.75,,,3.5,8.8,,,,,,,,,,,,,,,,,,1.0,,,,2006,1,2,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-02
2,20004014,0.3,,1.5,,13.0,,,7.25,,,11.5,7.3,,,,,,,,,,,,,,,,,,,,,,2006,1,3,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-03
3,20004014,0.2,,1.0,,11.5,,,6.25,,,10.5,6.3,,,,,,,,,,,,,,,,,,,,,,2006,1,4,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-04
4,20004014,1.0,,1.5,,12.0,,,6.75,,,10.5,6.8,,,,,,,,,,,,,,,,,,,,,,2006,1,5,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-05


In [96]:
# df_meteo['date'] = pd.to_datetime(df_meteo['date'])
df_meteo['date'] = pd.to_datetime(df_meteo['date']).dt.normalize()

In [97]:
df_meteo['Code INSEE'] = df_meteo['Code INSEE'].astype(str)

In [98]:
df_meteo['Code INSEE'].dtype

dtype('O')

In [99]:
df_feux['Date'] = pd.to_datetime(df_feux['Date']).dt.normalize()



In [100]:

df_meteo['Code Postal'] = df_meteo['Code Postal'].astype(str)

In [101]:
df_feux['Code INSEE'].dtype

dtype('O')

In [102]:
df_feux.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50166 entries, 0 to 50165
Data columns (total 22 columns):
 #   Column                                      Non-Null Count  Dtype         
---  ------                                      --------------  -----         
 0   Département                                 50166 non-null  object        
 1   Code INSEE                                  50166 non-null  object        
 2   Nom de la commune                           50166 non-null  object        
 3   latitude                                    50147 non-null  float64       
 4   longitude                                   50147 non-null  float64       
 5   Date                                        50166 non-null  datetime64[ns]
 6   Surface parcourue (m2)                      50166 non-null  int64         
 7   Surface forêt (m2)                          38499 non-null  float64       
 8   Surface maquis garrigues (m2)               19683 non-null  float64       
 9   Autres

### Merge Feux et météo

In [103]:
feux_corse = df_feux[df_feux['Département'].isin(['2A', '2B', 2])]


In [104]:
feux_corse = feux_corse.rename(columns={'Date': 'date'})


In [105]:
feux_corse.head()

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Code Postal,Commune,geo_point_2d
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09,10,,,,,,,,,,,,1,20137,LECCI,"41.66548307082755, 9.319067734429447"
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11,100,,,,,,,,,,,,1,20217,CANARI,"42.84305421033735, 9.345566436210337"
8,2B,2B242,Poggio-Mezzana,42.39768,9.49393,2014-01-18,3000,0.0,3000.0,,,,,,,0.0,1.0,,1,20230,POGGIO-MEZZANA,"42.40140901998409, 9.511957356683453"
13,2B,2B036,Bigorno,42.527867,9.301862,2014-01-26,10000,0.0,10000.0,,,,,,,0.0,1.0,,1,20252,BIGORNO,"42.531034959633104, 9.302259027992637"
14,2B,2B341,Venaco,42.231867,9.172566,2014-01-26,5000,0.0,5000.0,,,,,,,0.0,1.0,,1,20231,VENACO,"42.21399468502561, 9.137540408015997"


In [106]:
mask = feux_corse['Nom de la commune'] == 'Ajaccio'
feux_ajaccio = feux_corse[mask]
feux_ajaccio.head()

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Code Postal,Commune,geo_point_2d
57,2A,2A004,Ajaccio,41.926399,8.737603,2014-02-25,400,,,,,,,,,,,,1,20000,AJACCIO,"41.93479266376617, 8.701322759741174"
579,2A,2A004,Ajaccio,41.926399,8.737603,2014-05-14,500,,,,,,,,,,,,1,20000,AJACCIO,"41.93479266376617, 8.701322759741174"
586,2A,2A004,Ajaccio,41.926399,8.737603,2014-05-14,1000,,,,,,,,,,,,1,20000,AJACCIO,"41.93479266376617, 8.701322759741174"
875,2A,2A004,Ajaccio,41.926399,8.737603,2014-06-22,40,,40.0,,,,,,,0.0,,,1,20000,AJACCIO,"41.93479266376617, 8.701322759741174"
945,2A,2A004,Ajaccio,41.926399,8.737603,2014-06-30,100,,100.0,,,,,,,0.0,,,1,20000,AJACCIO,"41.93479266376617, 8.701322759741174"


In [107]:
# feux_corse.shape

In [108]:
 df= pd.merge(df_meteo, feux_corse, on=['date', 'Code Postal'], how='left')

In [109]:
df.head()

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Code INSEE_x,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois,date,Département,Code INSEE_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Commune,geo_point_2d
0,20004014,16.0,,9.0,,11.5,,,10.25,,,2.5,10.3,,,,,,,,,,,,,,,,,,,,,,2006,1,1,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-01,,,,,,,,,,,,,,,,,,,,
1,20004014,1.5,,7.0,,10.5,,,8.75,,,3.5,8.8,,,,,,,,,,,,,,,,,,1.0,,,,2006,1,2,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-02,,,,,,,,,,,,,,,,,,,,
2,20004014,0.3,,1.5,,13.0,,,7.25,,,11.5,7.3,,,,,,,,,,,,,,,,,,,,,,2006,1,3,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-03,,,,,,,,,,,,,,,,,,,,
3,20004014,0.2,,1.0,,11.5,,,6.25,,,10.5,6.3,,,,,,,,,,,,,,,,,,,,,,2006,1,4,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-04,,,,,,,,,,,,,,,,,,,,
4,20004014,1.0,,1.5,,12.0,,,6.75,,,10.5,6.8,,,,,,,,,,,,,,,,,,,,,,2006,1,5,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-05,,,,,,,,,,,,,,,,,,,,


In [110]:
mask = df['Nom de la commune'] == 'Ajaccio'
feux_ajaccio = df[mask]
feux_ajaccio.head()

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Code INSEE_x,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois,date,Département,Code INSEE_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Commune,geo_point_2d
90,20004014,0.0,,8.0,,21.0,,,14.5,,,13.0,14.5,,,,,,,,,,,,,,,,,,,,,,2006,4,1,20004,20000,Ajaccio,1.73001,2.012621,3.325753,3.075749,3.608811,3.670861,15.274144,12.560301,2006-04-01,2A,2A004,Ajaccio,41.926399,8.737603,100.0,0.0,0.0,,,,,,,0.0,,Malveillance,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
123,20004014,0.2,,10.6,,27.0,,,18.8,,,16.4,18.8,,,,,,,,,,,,,,,,,,,,,,2006,5,4,20004,20000,Ajaccio,1.73001,2.242662,3.325753,4.06687,3.608811,3.471918,15.274144,16.137539,2006-05-04,2A,2A004,Ajaccio,41.926399,8.737603,400.0,,,,,,,,,,,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
134,20004014,0.0,,9.5,,24.5,,,17.0,,,15.0,17.0,,,,,,,,,,,,,,,,,,,,,,2006,5,15,20004,20000,Ajaccio,1.73001,2.242662,3.325753,4.06687,3.608811,3.471918,15.274144,16.137539,2006-05-15,2A,2A004,Ajaccio,41.926399,8.737603,50.0,,,,,,,,,,,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
150,20004014,1.0,,9.5,,21.0,,,15.25,,,11.5,15.3,,,,,,,,,,,,,,,,,,,,,,2006,5,31,20004,20000,Ajaccio,1.73001,2.242662,3.325753,4.06687,3.608811,3.471918,15.274144,16.137539,2006-05-31,2A,2A004,Ajaccio,41.926399,8.737603,100.0,,,,,,,,,,,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
157,20004014,0.0,,11.0,,24.5,,,17.75,,,13.5,17.8,,,,,,,,,,,,,,,,,,,,,,2006,6,7,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-07,2A,2A004,Ajaccio,41.926399,8.737603,10000.0,0.0,0.0,,,,,,,0.0,2.0,Involontaire (travaux),1.0,AJACCIO,"41.93479266376617, 8.701322759741174"


In [111]:
print(df_fusion['Feux'].value_counts())
mask = df_fusion['Feux'] == 1
print(df_fusion[mask])

Feux
1.0    5751
Name: count, dtype: int64
           POSTE   RR  DRR    TN  HTN    TX  HTX  TM   TMNX  TNSOL  TN50  \
90      20004014  0.0  NaN   8.0  NaN  21.0  NaN NaN  14.50    NaN   NaN   
123     20004014  0.2  NaN  10.6  NaN  27.0  NaN NaN  18.80    NaN   NaN   
134     20004014  0.0  NaN   9.5  NaN  24.5  NaN NaN  17.00    NaN   NaN   
150     20004014  1.0  NaN   9.5  NaN  21.0  NaN NaN  15.25    NaN   NaN   
157     20004014  0.0  NaN  11.0  NaN  24.5  NaN NaN  17.75    NaN   NaN   
...          ...  ...  ...   ...  ...   ...  ...  ..    ...    ...   ...   
291212  20007400  NaN  NaN   NaN  NaN   NaN  NaN NaN    NaN    NaN   NaN   
291213  20007400  NaN  NaN   NaN  NaN   NaN  NaN NaN    NaN    NaN   NaN   
291353  20007400  NaN  NaN   NaN  NaN   NaN  NaN NaN    NaN    NaN   NaN   
291375  20007400  NaN  NaN   NaN  NaN   NaN  NaN NaN    NaN    NaN   NaN   
291424  20007400  NaN  NaN   NaN  NaN   NaN  NaN NaN    NaN    NaN   NaN   

        TAMPLI  TNTXM  FFM  FXI  DXI  HXI  F

In [112]:
df.shape

(273545, 69)

In [113]:
df.columns

Index(['POSTE', 'RR', 'DRR', 'TN', 'HTN', 'TX', 'HTX', 'TM', 'TMNX', 'TNSOL',
       'TN50', 'TAMPLI', 'TNTXM', 'FFM', 'FXI', 'DXI', 'HXI', 'FXY', 'DXY',
       'HXY', 'FXI3S', 'HXI3S', 'UN', 'HUN', 'UX', 'HUX', 'DHUMI40', 'DHUMI80',
       'TSVM', 'UM', 'ORAG', 'BRUME', 'ETPMON', 'ETPGRILLE', 'year', 'month',
       'day', 'Code INSEE_x', 'Code Postal', 'nom_de_la_commune',
       'moyenne precipitations année', 'moyenne precipitations mois',
       'moyenne evapotranspiration année', 'moyenne evapotranspiration mois',
       'moyenne vitesse vent année', 'moyenne vitesse vent mois',
       'moyenne temperature année', 'moyenne temperature mois', 'date',
       'Département', 'Code INSEE_y', 'Nom de la commune', 'latitude',
       'longitude', 'Surface parcourue (m2)', 'Surface forêt (m2)',
       'Surface maquis garrigues (m2)',
       'Autres surfaces naturelles hors forêt (m2)', 'Surfaces agricoles (m2)',
       'Autres surfaces (m2)', 'Surface autres terres boisées (m2)',
       '

In [114]:
df['Feux'].replace(np.nan, 0, inplace=True)
df.head(10)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Feux'].replace(np.nan, 0, inplace=True)


Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Code INSEE_x,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois,date,Département,Code INSEE_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Commune,geo_point_2d
0,20004014,16.0,,9.0,,11.5,,,10.25,,,2.5,10.3,,,,,,,,,,,,,,,,,,,,,,2006,1,1,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-01,,,,,,,,,,,,,,,,,,0.0,,
1,20004014,1.5,,7.0,,10.5,,,8.75,,,3.5,8.8,,,,,,,,,,,,,,,,,,1.0,,,,2006,1,2,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-02,,,,,,,,,,,,,,,,,,0.0,,
2,20004014,0.3,,1.5,,13.0,,,7.25,,,11.5,7.3,,,,,,,,,,,,,,,,,,,,,,2006,1,3,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-03,,,,,,,,,,,,,,,,,,0.0,,
3,20004014,0.2,,1.0,,11.5,,,6.25,,,10.5,6.3,,,,,,,,,,,,,,,,,,,,,,2006,1,4,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-04,,,,,,,,,,,,,,,,,,0.0,,
4,20004014,1.0,,1.5,,12.0,,,6.75,,,10.5,6.8,,,,,,,,,,,,,,,,,,,,,,2006,1,5,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-05,,,,,,,,,,,,,,,,,,0.0,,
5,20004014,0.0,,2.0,,13.5,,,7.75,,,11.5,7.8,,,,,,,,,,,,,,,,,,,,,,2006,1,6,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-06,,,,,,,,,,,,,,,,,,0.0,,
6,20004014,0.0,,4.5,,13.9,,,9.2,,,9.4,9.2,,,,,,,,,,,,,,,,,,,,,,2006,1,7,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-07,,,,,,,,,,,,,,,,,,0.0,,
7,20004014,0.2,,3.5,,13.5,,,8.5,,,10.0,8.5,,,,,,,,,,,,,,,,,,,,,,2006,1,8,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-08,,,,,,,,,,,,,,,,,,0.0,,
8,20004014,0.0,,3.0,,14.5,,,8.75,,,11.5,8.8,,,,,,,,,,,,,,,,,,,,,,2006,1,9,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-09,,,,,,,,,,,,,,,,,,0.0,,
9,20004014,0.2,,3.5,,12.5,,,8.0,,,9.0,8.0,,,,,,,,,,,,,,,,,,,,,,2006,1,10,20004,20000,Ajaccio,1.73001,3.035429,3.325753,1.324654,3.608811,3.819228,15.274144,7.674247,2006-01-10,,,,,,,,,,,,,,,,,,0.0,,


In [115]:
df.tail(100)

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Code INSEE_x,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois,date,Département,Code INSEE_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Commune,geo_point_2d
273445,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,9,23,20007,20224,Albertacce,2.46616,1.880114,2.943078,3.941225,3.469634,3.380008,15.249994,19.868451,2024-09-23,,,,,,,,,,,,,,,,,,0.0,,
273446,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,9,24,20007,20224,Albertacce,2.46616,1.880114,2.943078,3.941225,3.469634,3.380008,15.249994,19.868451,2024-09-24,,,,,,,,,,,,,,,,,,0.0,,
273447,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,9,25,20007,20224,Albertacce,2.46616,1.880114,2.943078,3.941225,3.469634,3.380008,15.249994,19.868451,2024-09-25,,,,,,,,,,,,,,,,,,0.0,,
273448,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,9,26,20007,20224,Albertacce,2.46616,1.880114,2.943078,3.941225,3.469634,3.380008,15.249994,19.868451,2024-09-26,,,,,,,,,,,,,,,,,,0.0,,
273449,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,9,27,20007,20224,Albertacce,2.46616,1.880114,2.943078,3.941225,3.469634,3.380008,15.249994,19.868451,2024-09-27,,,,,,,,,,,,,,,,,,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273540,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,12,27,20007,20224,Albertacce,2.46616,3.471692,2.943078,1.320757,3.469634,3.745633,15.249994,8.841597,2024-12-27,,,,,,,,,,,,,,,,,,0.0,,
273541,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,12,28,20007,20224,Albertacce,2.46616,3.471692,2.943078,1.320757,3.469634,3.745633,15.249994,8.841597,2024-12-28,,,,,,,,,,,,,,,,,,0.0,,
273542,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,12,29,20007,20224,Albertacce,2.46616,3.471692,2.943078,1.320757,3.469634,3.745633,15.249994,8.841597,2024-12-29,,,,,,,,,,,,,,,,,,0.0,,
273543,20007400,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,12,30,20007,20224,Albertacce,2.46616,3.471692,2.943078,1.320757,3.469634,3.745633,15.249994,8.841597,2024-12-30,,,,,,,,,,,,,,,,,,0.0,,


In [116]:
mask = df['Feux'] == 1
df = df[mask]
df.head(10)


Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Code INSEE_x,Code Postal,nom_de_la_commune,moyenne precipitations année,moyenne precipitations mois,moyenne evapotranspiration année,moyenne evapotranspiration mois,moyenne vitesse vent année,moyenne vitesse vent mois,moyenne temperature année,moyenne temperature mois,date,Département,Code INSEE_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux,Commune,geo_point_2d
90,20004014,0.0,,8.0,,21.0,,,14.5,,,13.0,14.5,,,,,,,,,,,,,,,,,,,,,,2006,4,1,20004,20000,Ajaccio,1.73001,2.012621,3.325753,3.075749,3.608811,3.670861,15.274144,12.560301,2006-04-01,2A,2A004,Ajaccio,41.926399,8.737603,100.0,0.0,0.0,,,,,,,0.0,,Malveillance,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
123,20004014,0.2,,10.6,,27.0,,,18.8,,,16.4,18.8,,,,,,,,,,,,,,,,,,,,,,2006,5,4,20004,20000,Ajaccio,1.73001,2.242662,3.325753,4.06687,3.608811,3.471918,15.274144,16.137539,2006-05-04,2A,2A004,Ajaccio,41.926399,8.737603,400.0,,,,,,,,,,,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
134,20004014,0.0,,9.5,,24.5,,,17.0,,,15.0,17.0,,,,,,,,,,,,,,,,,,,,,,2006,5,15,20004,20000,Ajaccio,1.73001,2.242662,3.325753,4.06687,3.608811,3.471918,15.274144,16.137539,2006-05-15,2A,2A004,Ajaccio,41.926399,8.737603,50.0,,,,,,,,,,,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
150,20004014,1.0,,9.5,,21.0,,,15.25,,,11.5,15.3,,,,,,,,,,,,,,,,,,,,,,2006,5,31,20004,20000,Ajaccio,1.73001,2.242662,3.325753,4.06687,3.608811,3.471918,15.274144,16.137539,2006-05-31,2A,2A004,Ajaccio,41.926399,8.737603,100.0,,,,,,,,,,,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
157,20004014,0.0,,11.0,,24.5,,,17.75,,,13.5,17.8,,,,,,,,,,,,,,,,,,,,,,2006,6,7,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-07,2A,2A004,Ajaccio,41.926399,8.737603,10000.0,0.0,0.0,,,,,,,0.0,2.0,Involontaire (travaux),1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
159,20004014,0.1,,10.3,,26.0,,,18.15,,,15.7,18.2,,,,,,,,,,,,,,,,,,,,,,2006,6,9,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-09,2A,2A004,Ajaccio,41.926399,8.737603,1000.0,0.0,0.0,,,,,,,0.0,2.0,,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
160,20004014,0.0,,11.0,,27.0,,,19.0,,,16.0,19.0,,,,,,,,,,,,,,,,,,,,,,2006,6,10,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-10,2A,2A004,Ajaccio,41.926399,8.737603,30.0,0.0,0.0,,,,,,,0.0,4.0,Accidentelle,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
162,20004014,0.0,,11.5,,26.5,,,19.0,,,15.0,19.0,,,,,,,,,,,,,,,,,,,,,,2006,6,12,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-12,2A,2A004,Ajaccio,41.926399,8.737603,200.0,0.0,0.0,,,,,,,0.0,1.0,Involontaire (particulier),1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
165,20004014,0.0,,14.5,,32.5,,,23.5,,,18.0,23.5,,,,,,,,,,,,,,,,,,,,,,2006,6,15,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-15,2A,2A004,Ajaccio,41.926399,8.737603,4000.0,0.0,0.0,,,,,,,0.0,1.0,Accidentelle,1.0,AJACCIO,"41.93479266376617, 8.701322759741174"
175,20004014,0.0,,20.0,,38.5,,,29.25,,,18.5,29.3,,,,,,,,,,,,,,,,,,,,,,2006,6,25,20004,20000,Ajaccio,1.73001,1.152275,3.325753,5.114559,3.608811,3.226636,15.274144,20.825412,2006-06-25,2A,2A004,Ajaccio,41.926399,8.737603,150.0,0.0,0.0,,,,,,,0.0,4.0,Involontaire (particulier),1.0,AJACCIO,"41.93479266376617, 8.701322759741174"


In [117]:
df.to_csv("Dataset_modele.csv", index=False, sep=';')
