In [1]:
import pandas as pd
import requests
import time
import os
import numpy as np
import seaborn as sns

from tqdm import tqdm

### Premier dataset

In [2]:
url = 'https://projet-incendie.s3.eu-west-3.amazonaws.com/Incendies_2014_2024.csv'

df = pd.read_csv(url, sep=';', encoding='utf-8', skiprows=3)
pd.set_option('display.max_columns', None)

print(df.head(10))
print(df.shape)

   Année  Numéro Département Code INSEE      Nom de la commune  \
0   2014    6330          66      66190      Salses-le-Château   
1   2014    2369          06      06074              Lantosque   
2   2014    2546          2A      2A139                  Lecci   
3   2014       6          65      65173                Esterre   
4   2014    8243          2B      2B058                 Canari   
5   2014       7          65      65233                 Jarret   
6   2014    1997          30      30051  Branoux-les-Taillades   
7   2014    6439          66      66196                 Sorède   
8   2014    6002          2B      2B242         Poggio-Mezzana   
9   2014      56         974      97421                Salazie   

  Date de première alerte  Surface parcourue (m2)  Surface forêt (m2)  \
0     2014-01-02 14:56:00                   10000                 NaN   
1     2014-01-07 13:50:00                   48400                 NaN   
2     2014-01-09 16:14:00                      10     

In [3]:
print("Display of dataset: ")
display(df.head(10))
pd.options.display.max_columns = None 
print()

print("Basics statistics: ")
df_desc = df.describe(include="all")
display(df_desc)
print()

print("Total and percentage of missing values: ")
missing_total = df.isnull().sum().sum()
display(missing_total)
missing_percent = 100 * df.isnull().sum() / df.shape[0]
display(missing_percent[missing_percent != 0])

Display of dataset: 


Unnamed: 0,Année,Numéro,Département,Code INSEE,Nom de la commune,Date de première alerte,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Type de peuplement,Nature,Décès ou bâtiments touchés,Nombre de décès,Nombre de bâtiments totalement détruits,Nombre de bâtiments partiellement détruits,Précision de la donnée
0,2014,6330,66,66190,Salses-le-Château,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,,1.0,Involontaire (particulier),,,,,
1,2014,2369,06,06074,Lantosque,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,,1.0,,,,,,
2,2014,2546,2A,2A139,Lecci,2014-01-09 16:14:00,10,,,,,,,,,,,,,,,,,
3,2014,6,65,65173,Esterre,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,Estimées,,,,,,,
4,2014,8243,2B,2B058,Canari,2014-01-11 14:00:00,100,,,,,,,,,,,,,,,,,
5,2014,7,65,65233,Jarret,2014-01-12 16:41:00,200,0.0,,200.0,,,200.0,0.0,0.0,0.0,Estimées,,,,,,,
6,2014,1997,30,30051,Branoux-les-Taillades,2014-01-13 19:24:00,2500,,,,,,,,,,,,,,,,,
7,2014,6439,66,66196,Sorède,2014-01-17 16:40:00,10000,,10000.0,,,,,,,,,1.0,Involontaire (travaux),,,,,
8,2014,6002,2B,2B242,Poggio-Mezzana,2014-01-18 14:57:00,3000,0.0,3000.0,,,,,,,0.0,,1.0,,,,,,
9,2014,56,974,97421,Salazie,2014-01-21 14:08:00,3000,0.0,,3000.0,,,3000.0,0.0,0.0,0.0,Estimées,,,,,,,



Basics statistics: 


Unnamed: 0,Année,Numéro,Département,Code INSEE,Nom de la commune,Date de première alerte,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Type de peuplement,Nature,Décès ou bâtiments touchés,Nombre de décès,Nombre de bâtiments totalement détruits,Nombre de bâtiments partiellement détruits,Précision de la donnée
count,29732.0,29732.0,29732.0,29732.0,29705,29732,29732.0,21005.0,8809.0,14536.0,4064.0,4048.0,10551.0,7620.0,7422.0,11671.0,12091,11135.0,14111,4731,866.0,867.0,866.0,813
unique,,,96.0,7740.0,7586,29053,,,,,,,,,,,2,,5,2,,,,1
top,,,13.0,97415.0,Saint-Paul,2020-07-30 00:51:00,,,,,,,,,,,Estimées,,Malveillance,Non,,,,Données partielles ou non valides
freq,,,2688.0,363.0,366,7,,,,,,,,,,,9653,,4593,4690,,,,813
mean,2018.986715,8390.076382,,,,,54258.08,44811.69,42775.21,9906.364,1529.158465,909.7792,8953.332,5263.809,3538.426,11401.25,,1.998383,,,0.003464,0.110727,0.062356,
std,2.991246,6514.790521,,,,,1115496.0,1225440.0,443653.8,123406.0,22583.735051,34304.65,91378.42,122525.4,46871.92,163359.3,,1.437769,,,0.058789,2.353654,1.032777,
min,2014.0,1.0,,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,,,0.0,0.0,0.0,
25%,2016.0,2683.0,,,,,100.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,,,0.0,0.0,0.0,
50%,2019.0,7244.0,,,,,1000.0,150.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,,,0.0,0.0,0.0,
75%,2022.0,12175.25,,,,,10000.0,3000.0,7900.0,300.0,0.0,0.0,1000.0,0.0,0.0,0.0,,3.0,,,0.0,0.0,0.0,



Total and percentage of missing values: 


370265

Nom de la commune                              0.090811
Surface forêt (m2)                            29.352213
Surface maquis garrigues (m2)                 70.371990
Autres surfaces naturelles hors forêt (m2)    51.109915
Surfaces agricoles (m2)                       86.331226
Autres surfaces (m2)                          86.385040
Surface autres terres boisées (m2)            64.512983
Surfaces non boisées naturelles (m2)          74.371048
Surfaces non boisées artificialisées (m2)     75.036997
Surfaces non boisées (m2)                     60.745998
Précision des surfaces                        59.333378
Type de peuplement                            62.548769
Nature                                        52.539352
Décès ou bâtiments touchés                    84.087851
Nombre de décès                               97.087313
Nombre de bâtiments totalement détruits       97.083950
Nombre de bâtiments partiellement détruits    97.087313
Précision de la donnée                        97

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29732 entries, 0 to 29731
Data columns (total 24 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   Année                                       29732 non-null  int64  
 1   Numéro                                      29732 non-null  int64  
 2   Département                                 29732 non-null  object 
 3   Code INSEE                                  29732 non-null  object 
 4   Nom de la commune                           29705 non-null  object 
 5   Date de première alerte                     29732 non-null  object 
 6   Surface parcourue (m2)                      29732 non-null  int64  
 7   Surface forêt (m2)                          21005 non-null  float64
 8   Surface maquis garrigues (m2)               8809 non-null   float64
 9   Autres surfaces naturelles hors forêt (m2)  14536 non-null  float64
 10  Surfaces a

In [5]:
communes = df["Nom de la commune"]
communes_uniques = communes.unique()
liste_communes = communes.unique().tolist()

print(liste_communes[0:10])

['Salses-le-Château', 'Lantosque', 'Lecci', 'Esterre', 'Canari', 'Jarret', 'Branoux-les-Taillades', 'Sorède', 'Poggio-Mezzana', 'Salazie']


#### Test API Nominatim et requête

In [6]:
# r = requests.get('https://nominatim.openstreetmap.org/search')
# r

In [7]:
# url = "https://nominatim.openstreetmap.org/search"
# headers = {
#     "User-Agent": "MyGeocoder/1.0 (myemail@example.com)"
# }
# output_file = "coordonnees_villes.csv"
# delay = 1  # en secondes, respect de la limite Nominatim
# places = [p for p in liste_communes if pd.notna(p)]

# # Charger les données déjà géocodées si le fichier existe
# if os.path.exists(output_file):
#     existing_df = pd.read_csv(output_file)
#     done_places = set(existing_df["ville"])
#     data_list = existing_df.to_dict("records")
# else:
#     done_places = set()
#     data_list = []

# # Boucle principale avec reprise
# for place in tqdm(places, desc="Géocodage des communes"):
#     place_str = str(place).strip()
#     if place_str in done_places:
#         continue  # éviter les doublons déjà traités

#     params = {
#         "q": place_str + ", France",
#         "format": "json",
#         "limit": 1
#     }

#     try:
#         response = requests.get(url, params=params, headers=headers)
#         if response.status_code == 200:
#             data = response.json()
#             if data:
#                 lat = data[0].get("lat")
#                 lon = data[0].get("lon")
#             else:
#                 lat, lon = None, None
#         else:
#             lat, lon = None, None

#     except Exception as e:
#         print(f"Erreur pour {place_str} : {e}")
#         lat, lon = None, None

#     data_list.append({"ville": place_str, "latitude": lat, "longitude": lon})
#     done_places.add(place_str)

#     # Sauvegarde après chaque ville
#     pd.DataFrame(data_list).to_csv(output_file, index=False)

#     # Pause pour respecter la limite Nominatim
#     time.sleep(delay)

# print("\n✅ Géocodage terminé. Données enregistrées dans :", output_file)

#### Second DF

In [8]:
df2 = pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/Incendies_2006_2013.csv', sep=';', encoding='utf-8', skiprows=6)
pd.set_option('display.max_columns', None)
df2.head(10)

Unnamed: 0,Année,Numéro,Département,Code INSEE,Nom de la commune,Date de première alerte,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Type de peuplement,Nature,Décès ou bâtiments touchés,Nombre de décès,Nombre de bâtiments totalement détruits,Nombre de bâtiments partiellement détruits,Précision de la donnée
0,2006,8806,66,66179,Saint-Laurent-de-Cerdans,2006-01-01 12:50:00,10000,10000.0,0.0,,,,,,,0.0,,1.0,Naturelle,,,,,
1,2006,8917,66,66190,Salses-le-Château,2006-01-01 12:56:00,20000,0.0,20000.0,,,,,,,0.0,,1.0,Involontaire (particulier),,,,,
2,2006,4695,83,83028,Callas,2006-01-01 15:40:00,30,30.0,0.0,,,,,,,0.0,,5.0,Involontaire (particulier),,,,,
3,2006,8028,34,34237,Roujan,2006-01-02 18:19:00,37500,10000.0,27500.0,,,,,,,0.0,,1.0,Malveillance,,,,,
4,2006,5806,83,83050,Draguignan,2006-01-02 20:57:00,200,200.0,0.0,,,,,,,0.0,,5.0,Involontaire (travaux),,,,,
5,2006,7696,48,48152,Saint-Frézal-de-Ventalon,2006-01-03 15:31:00,250,50.0,200.0,,,,,,,0.0,,5.0,Accidentelle,,,,,
6,2006,5362,83,83049,Cuers,2006-01-04 13:15:00,400,400.0,0.0,,,,,,,0.0,,5.0,Involontaire (particulier),,,,,
7,2006,5473,6,6101,Rigaud,2006-01-04 15:54:00,2500,0.0,2500.0,,,,,,,0.0,,1.0,,,,,,
8,2006,8362,6,6101,Rigaud,2006-01-05 15:24:00,500,0.0,500.0,,,,,,,0.0,,1.0,,,,,,
9,2006,6943,13,13098,Saint-Mitre-les-Remparts,2006-01-05 17:06:00,10,0.0,0.0,,,,,,,0.0,,,,,,,,


In [9]:
communes2 = df2["Nom de la commune"]
communes_uniques2 = communes2.unique()
liste_communes2 = communes2.unique().tolist()

print(liste_communes2[0:10])

['Saint-Laurent-de-Cerdans', 'Salses-le-Château', 'Callas', 'Roujan', 'Draguignan', 'Saint-Frézal-de-Ventalon', 'Cuers', 'Rigaud', 'Saint-Mitre-les-Remparts', 'Salignac-Eyvigues']


#### Fusion des listes de villes sans doublon

In [10]:
liste_complete = list(set(liste_communes + liste_communes2))
print(liste_complete[0:10])

['Biviers', 'Roz-sur-Couesnon', 'Lanuéjols', 'Vouzeron', 'Cars', 'Thenon', 'Saint-Julien-de-Crempse', 'Mazeley', 'Perpignan', 'Malemort-sur-Corrèze']


#### Requête des coordonnées GPS en ne rajoutant que les nouvelles villes

In [11]:
# url = "https://nominatim.openstreetmap.org/search"
# headers = {
#     "User-Agent": "MyGeocoder/1.0 (myemail@example.com)"
# }
# output_file = "coordonnees_villes.csv"
# delay = 1  # en secondes, respect de la limite Nominatim
# places = [p for p in liste_complete if pd.notna(p)]

# # Charger les données déjà géocodées si le fichier existe
# if os.path.exists(output_file):
#     existing_df = pd.read_csv(output_file)
#     done_places = set(existing_df["ville"])
#     data_list = existing_df.to_dict("records")
# else:
#     done_places = set()
#     data_list = []

# # Boucle principale avec reprise
# for place in tqdm(places, desc="Géocodage des communes"):
#     place_str = str(place).strip()
#     if place_str in done_places:
#         continue  # éviter les doublons déjà traités

#     params = {
#         "q": place_str + ", France",
#         "format": "json",
#         "limit": 1
#     }

#     try:
#         response = requests.get(url, params=params, headers=headers)
#         if response.status_code == 200:
#             data = response.json()
#             if data:
#                 lat = data[0].get("lat")
#                 lon = data[0].get("lon")
#             else:
#                 lat, lon = None, None
#         else:
#             lat, lon = None, None

#     except Exception as e:
#         print(f"Erreur pour {place_str} : {e}")
#         lat, lon = None, None

#     data_list.append({"ville": place_str, "latitude": lat, "longitude": lon})
#     done_places.add(place_str)

#     # Sauvegarde après chaque ville
#     pd.DataFrame(data_list).to_csv(output_file, index=False)

#     # Pause pour respecter la limite Nominatim
#     time.sleep(delay)

# print("\n✅ Géocodage terminé. Données enregistrées dans :", output_file)

In [12]:
url_ville = 'https://projet-incendie.s3.eu-west-3.amazonaws.com/coordonnees_villes.csv'
coords=pd.read_csv(url_ville, sep=',', encoding='utf-8')
coords.head(10)

Unnamed: 0,ville,latitude,longitude
0,Salses-le-Château,42.83338,2.91818
1,Lantosque,43.973468,7.312593
2,Lecci,41.679373,9.317798
3,Esterre,42.874901,0.006078
4,Canari,42.845933,9.331333
5,Jarret,43.0823,-0.014354
6,Branoux-les-Taillades,44.2196,3.99124
7,Sorède,42.530402,2.956454
8,Poggio-Mezzana,42.39768,9.49393
9,Salazie,-21.024383,55.543545


In [13]:
coords.rename(columns={"ville": "Nom de la commune"}, inplace=True)
coords.head(5)

Unnamed: 0,Nom de la commune,latitude,longitude
0,Salses-le-Château,42.83338,2.91818
1,Lantosque,43.973468,7.312593
2,Lecci,41.679373,9.317798
3,Esterre,42.874901,0.006078
4,Canari,42.845933,9.331333


In [14]:
coords=coords.dropna(subset=['latitude', 'longitude'])
coords.isna().sum()

Nom de la commune    0
latitude             0
longitude            0
dtype: int64

#### Fusion des csv des incendies

In [15]:
# df_concat = pd.concat([df, df2], ignore_index=True)
# df_concat.to_csv("Incendies_2006_2024.csv", index=False, sep=';')

df_concat=pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/Incendies_2006_2024.csv', sep=';', encoding='utf-8')
df_concat.head(10)

  df_concat=pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/Incendies_2006_2024.csv', sep=';', encoding='utf-8')


Unnamed: 0,Année,Numéro,Département,Code INSEE,Nom de la commune,Date de première alerte,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Type de peuplement,Nature,Décès ou bâtiments touchés,Nombre de décès,Nombre de bâtiments totalement détruits,Nombre de bâtiments partiellement détruits,Précision de la donnée
0,2014,6330,66,66190,Salses-le-Château,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,,1.0,Involontaire (particulier),,,,,
1,2014,2369,06,06074,Lantosque,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,,1.0,,,,,,
2,2014,2546,2A,2A139,Lecci,2014-01-09 16:14:00,10,,,,,,,,,,,,,,,,,
3,2014,6,65,65173,Esterre,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,Estimées,,,,,,,
4,2014,8243,2B,2B058,Canari,2014-01-11 14:00:00,100,,,,,,,,,,,,,,,,,
5,2014,7,65,65233,Jarret,2014-01-12 16:41:00,200,0.0,,200.0,,,200.0,0.0,0.0,0.0,Estimées,,,,,,,
6,2014,1997,30,30051,Branoux-les-Taillades,2014-01-13 19:24:00,2500,,,,,,,,,,,,,,,,,
7,2014,6439,66,66196,Sorède,2014-01-17 16:40:00,10000,,10000.0,,,,,,,,,1.0,Involontaire (travaux),,,,,
8,2014,6002,2B,2B242,Poggio-Mezzana,2014-01-18 14:57:00,3000,0.0,3000.0,,,,,,,0.0,,1.0,,,,,,
9,2014,56,974,97421,Salazie,2014-01-21 14:08:00,3000,0.0,,3000.0,,,3000.0,0.0,0.0,0.0,Estimées,,,,,,,


In [16]:
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50193 entries, 0 to 50192
Data columns (total 24 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   Année                                       50193 non-null  int64  
 1   Numéro                                      50193 non-null  int64  
 2   Département                                 50193 non-null  object 
 3   Code INSEE                                  50193 non-null  object 
 4   Nom de la commune                           50166 non-null  object 
 5   Date de première alerte                     50193 non-null  object 
 6   Surface parcourue (m2)                      50193 non-null  int64  
 7   Surface forêt (m2)                          38506 non-null  float64
 8   Surface maquis garrigues (m2)               19703 non-null  float64
 9   Autres surfaces naturelles hors forêt (m2)  20939 non-null  float64
 10  Surfaces a

In [17]:
df_concat["Date de première alerte"] = pd.to_datetime(df_concat["Date de première alerte"])
print(df_concat.dtypes)

Année                                                  int64
Numéro                                                 int64
Département                                           object
Code INSEE                                            object
Nom de la commune                                     object
Date de première alerte                       datetime64[ns]
Surface parcourue (m2)                                 int64
Surface forêt (m2)                                   float64
Surface maquis garrigues (m2)                        float64
Autres surfaces naturelles hors forêt (m2)           float64
Surfaces agricoles (m2)                              float64
Autres surfaces (m2)                                 float64
Surface autres terres boisées (m2)                   float64
Surfaces non boisées naturelles (m2)                 float64
Surfaces non boisées artificialisées (m2)            float64
Surfaces non boisées (m2)                            float64
Précision des surfaces  

In [18]:
df_concat.rename(columns={"Date de première alerte": "Date"}, inplace=True)
df_concat.head()

Unnamed: 0,Année,Numéro,Département,Code INSEE,Nom de la commune,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Type de peuplement,Nature,Décès ou bâtiments touchés,Nombre de décès,Nombre de bâtiments totalement détruits,Nombre de bâtiments partiellement détruits,Précision de la donnée
0,2014,6330,66,66190,Salses-le-Château,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,,1.0,Involontaire (particulier),,,,,
1,2014,2369,06,06074,Lantosque,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,,1.0,,,,,,
2,2014,2546,2A,2A139,Lecci,2014-01-09 16:14:00,10,,,,,,,,,,,,,,,,,
3,2014,6,65,65173,Esterre,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,Estimées,,,,,,,
4,2014,8243,2B,2B058,Canari,2014-01-11 14:00:00,100,,,,,,,,,,,,,,,,,


In [19]:
df_concat.isna().sum()/len(df_concat)*100

Année                                          0.000000
Numéro                                         0.000000
Département                                    0.000000
Code INSEE                                     0.000000
Nom de la commune                              0.053792
Date                                           0.000000
Surface parcourue (m2)                         0.000000
Surface forêt (m2)                            23.284123
Surface maquis garrigues (m2)                 60.745522
Autres surfaces naturelles hors forêt (m2)    58.283028
Surfaces agricoles (m2)                       91.903253
Autres surfaces (m2)                          91.935130
Surface autres terres boisées (m2)            66.222382
Surfaces non boisées naturelles (m2)          79.190325
Surfaces non boisées artificialisées (m2)     79.722272
Surfaces non boisées (m2)                     43.211205
Précision des surfaces                        73.912697
Type de peuplement                            59

#### Drop des colonnes inutiles

In [20]:
df_concat.drop(df_concat.columns[[0, 1, 16, 19, 20, 21, 22, 23]], axis=1, inplace=True)

display(df_concat.head(10))

Unnamed: 0,Département,Code INSEE,Nom de la commune,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature
0,66,66190,Salses-le-Château,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier)
1,06,06074,Lantosque,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,
2,2A,2A139,Lecci,2014-01-09 16:14:00,10,,,,,,,,,,,
3,65,65173,Esterre,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,
4,2B,2B058,Canari,2014-01-11 14:00:00,100,,,,,,,,,,,
5,65,65233,Jarret,2014-01-12 16:41:00,200,0.0,,200.0,,,200.0,0.0,0.0,0.0,,
6,30,30051,Branoux-les-Taillades,2014-01-13 19:24:00,2500,,,,,,,,,,,
7,66,66196,Sorède,2014-01-17 16:40:00,10000,,10000.0,,,,,,,,1.0,Involontaire (travaux)
8,2B,2B242,Poggio-Mezzana,2014-01-18 14:57:00,3000,0.0,3000.0,,,,,,,0.0,1.0,
9,974,97421,Salazie,2014-01-21 14:08:00,3000,0.0,,3000.0,,,3000.0,0.0,0.0,0.0,,


##### Traitement des Nan de Communes

In [21]:
nb_null = df_concat['Nom de la commune'].isnull().sum()
print(nb_null)

27


In [22]:
df_concat = df_concat.dropna(subset=['Nom de la commune'])
print(df_concat['Nom de la commune'].isnull().sum())

0


In [23]:
df_concat.shape

(50166, 16)

### Rajout des coordonnées GPS

In [24]:
df_merge = df_concat.merge(coords[["Nom de la commune", "latitude", "longitude"]], 
                    on="Nom de la commune", 
                    how="left")
df_merge.head(5)

Unnamed: 0,Département,Code INSEE,Nom de la commune,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,latitude,longitude
0,66,66190,Salses-le-Château,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier),42.83338,2.91818
1,06,06074,Lantosque,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,,43.973468,7.312593
2,2A,2A139,Lecci,2014-01-09 16:14:00,10,,,,,,,,,,,,41.679373,9.317798
3,65,65173,Esterre,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,,42.874901,0.006078
4,2B,2B058,Canari,2014-01-11 14:00:00,100,,,,,,,,,,,,42.845933,9.331333


In [25]:
df_merge.shape

(50166, 18)

In [26]:
# Replacement des coordonnées GPS à coté de la commune

colonnes = df_merge.columns.tolist()
colonnes.remove("latitude")
colonnes.remove("longitude")

colonnes.insert(3, "latitude")
colonnes.insert(4, "longitude")

df_merge = df_merge[colonnes]

df_merge.head(5)

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature
0,66,66190,Salses-le-Château,42.83338,2.91818,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier)
1,06,06074,Lantosque,43.973468,7.312593,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,
3,65,65173,Esterre,42.874901,0.006078,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,


#### Export 'historique incendies avec coordonnées

In [27]:
# df_merge.to_csv("historique_incendies_avec_coordonnees.csv", index=False, sep=';')

#### Visu des causes d'incendie

In [28]:
df_merge=pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/historique_incendies_avec_coordonnees.csv', sep=';', encoding='utf-8')

In [29]:
df_merge.shape

(50166, 19)

In [30]:
df_merge.head()

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
0,66,66190,Salses-le-Château,42.83338,2.91818,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier),1
1,06,06074,Lantosque,43.973468,7.312593,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,,1
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,,1
3,65,65173,Esterre,42.874901,0.006078,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,,1
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,,1


In [31]:
# Rajout d'une colonne pour indiquer les feux
df_merge['Feux']=1
df_merge.head()

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
0,66,66190,Salses-le-Château,42.83338,2.91818,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier),1
1,06,06074,Lantosque,43.973468,7.312593,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,,1
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,,1
3,65,65173,Esterre,42.874901,0.006078,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,,1
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,,1


In [32]:
df_merge['Feux'].value_counts()

Feux
1    50166
Name: count, dtype: int64

In [33]:
# df_merge.to_csv('historique_incendies_avec_coordonnees.csv', index=False, sep=';')

In [34]:
feux = pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/historique_incendies_avec_coordonnees.csv', sep=';', encoding='utf-8')
feux.head(10)

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,Date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
0,66,66190,Salses-le-Château,42.83338,2.91818,2014-01-02 14:56:00,10000,,10000.0,,,,,,,,1.0,Involontaire (particulier),1
1,06,06074,Lantosque,43.973468,7.312593,2014-01-07 13:50:00,48400,,48400.0,,,,,,,,1.0,,1
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,,1
3,65,65173,Esterre,42.874901,0.006078,2014-01-10 20:04:00,10000,10000.0,,0.0,,,0.0,0.0,0.0,0.0,,,1
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,,1
5,65,65233,Jarret,43.0823,-0.014354,2014-01-12 16:41:00,200,0.0,,200.0,,,200.0,0.0,0.0,0.0,,,1
6,30,30051,Branoux-les-Taillades,44.2196,3.99124,2014-01-13 19:24:00,2500,,,,,,,,,,,,1
7,66,66196,Sorède,42.530402,2.956454,2014-01-17 16:40:00,10000,,10000.0,,,,,,,,1.0,Involontaire (travaux),1
8,2B,2B242,Poggio-Mezzana,42.39768,9.49393,2014-01-18 14:57:00,3000,0.0,3000.0,,,,,,,0.0,1.0,,1
9,974,97421,Salazie,-21.024383,55.543545,2014-01-21 14:08:00,3000,0.0,,3000.0,,,3000.0,0.0,0.0,0.0,,,1


In [35]:
feux.columns

Index(['Département', 'Code INSEE', 'Nom de la commune', 'latitude',
       'longitude', 'Date', 'Surface parcourue (m2)', 'Surface forêt (m2)',
       'Surface maquis garrigues (m2)',
       'Autres surfaces naturelles hors forêt (m2)', 'Surfaces agricoles (m2)',
       'Autres surfaces (m2)', 'Surface autres terres boisées (m2)',
       'Surfaces non boisées naturelles (m2)',
       'Surfaces non boisées artificialisées (m2)',
       'Surfaces non boisées (m2)', 'Type de peuplement', 'Nature', 'Feux'],
      dtype='object')

In [36]:
feux['Feux'].value_counts()

Feux
1    50166
Name: count, dtype: int64

In [37]:
feux.shape

(50166, 19)

#### Rajout de la météo

In [38]:
df_meteo=pd.read_csv('https://projet-incendie.s3.eu-west-3.amazonaws.com/corse_meteo_insee.csv', sep=';', encoding='utf-8')
df_meteo.head(10)

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,année,mois,jour,Département,Code Postal,Code INSEE,Commune,geo_point_2d
0,20148001,1.2,,6.1,1750.0,13.2,1012.0,9.5,9.65,1.8,3.2,7.1,9.7,2.6,14.9,260.0,2217.0,9.0,270.0,2222.0,,,54.0,2219.0,91.0,1.0,0.0,704.0,9.4,78.0,0.0,0.0,1.1,1.1,2006,1,1,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
1,20148001,1.4,424.0,4.0,656.0,14.0,1247.0,8.2,9.0,0.2,1.7,10.0,9.0,1.9,13.0,30.0,1842.0,8.0,320.0,1831.0,,,55.0,1243.0,95.0,2217.0,0.0,646.0,8.6,79.0,1.0,0.0,1.0,1.0,2006,1,2,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
2,20148001,0.2,87.0,4.0,2143.0,11.6,1338.0,8.6,7.8,1.8,2.4,7.6,7.8,4.9,14.0,330.0,1911.0,10.0,20.0,2034.0,,,55.0,1911.0,91.0,528.0,0.0,539.0,7.9,71.0,0.0,0.0,1.4,1.4,2006,1,3,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
3,20148001,0.0,0.0,7.4,1825.0,10.2,1131.0,8.8,8.8,5.0,5.8,2.8,8.8,4.6,12.0,20.0,625.0,8.0,30.0,7.0,,,61.0,628.0,75.0,1901.0,0.0,0.0,7.8,69.0,0.0,0.0,1.8,1.8,2006,1,4,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
4,20148001,0.0,0.0,5.1,1757.0,10.4,1159.0,7.1,7.75,1.7,3.1,5.3,7.8,3.7,9.0,30.0,338.0,6.0,30.0,343.0,,,58.0,1406.0,89.0,2120.0,0.0,358.0,7.4,74.0,0.0,0.0,1.4,1.5,2006,1,5,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
5,20148001,0.2,0.0,2.7,2259.0,11.7,1011.0,6.0,7.2,0.1,0.8,9.0,7.2,1.8,6.0,50.0,1104.0,3.0,220.0,23.0,,,65.0,1026.0,95.0,2221.0,0.0,1040.0,7.9,84.0,0.0,0.0,0.9,1.0,2006,1,6,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
6,20148001,0.0,0.0,2.0,414.0,9.2,252.0,5.8,5.6,-0.2,0.6,7.2,5.6,1.2,4.0,220.0,443.0,3.0,220.0,11.0,,,88.0,1244.0,97.0,1903.0,0.0,1440.0,8.7,93.0,0.0,1.0,0.1,0.2,2006,1,7,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
7,20148001,0.0,0.0,5.8,1739.0,9.0,1246.0,7.8,7.4,2.4,3.5,3.2,7.4,1.8,6.2,30.0,2313.0,4.0,80.0,140.0,,,71.0,2349.0,92.0,1744.0,0.0,1321.0,9.0,85.0,0.0,1.0,0.5,0.6,2006,1,8,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
8,20148001,0.0,0.0,5.1,1901.0,9.5,957.0,7.1,7.3,1.9,3.8,4.4,7.3,3.3,8.0,350.0,1053.0,6.0,350.0,1058.0,,,65.0,1007.0,81.0,45.0,0.0,47.0,7.4,74.0,0.0,0.0,1.4,1.4,2006,1,9,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"
9,20148001,0.0,0.0,1.2,517.0,10.3,1207.0,5.0,5.75,-1.9,-0.8,9.1,5.8,2.8,8.0,360.0,1209.0,6.0,10.0,1102.0,,,59.0,1314.0,92.0,526.0,0.0,553.0,6.7,76.0,0.0,0.0,1.1,1.1,2006,1,10,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942"


In [39]:
df_meteo.isna().sum()/len(df_meteo)*100

POSTE            0.000000
RR               1.342173
DRR             95.245322
TN              16.166431
HTN             24.907395
TX              16.217162
HTX             24.894275
TM              24.653742
TMNX            16.229407
TNSOL           94.451563
TN50            94.440193
TAMPLI          16.229407
TNTXM           16.229407
FFM             38.032179
FXI             38.101715
DXI             38.191368
HXI             38.227667
FXY             43.344893
DXY             43.345768
HXY             43.375069
FXI3S           80.687399
HXI3S           80.687399
UN              39.681360
HUN             39.879471
UX              39.696229
HUX             39.899588
DHUMI40         40.078020
DHUMI80         40.144057
TSVM            39.704101
UM              39.629317
ORAG            92.704420
BRUME           93.178926
ETPMON          95.496350
ETPGRILLE       23.147569
année            0.000000
mois             0.000000
jour             0.000000
Département      0.000000
Code Postal 

In [40]:
df_meteo = df_meteo.rename(columns={
    "année": "year",
    "mois": "month",
    "jour": "day"
})

# Étape 2 : Créer une colonne datetime à partir de ces 3 colonnes
df_meteo["date"] = pd.to_datetime(df_meteo[["year", "month", "day"]])

df_meteo.head()

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Département,Code Postal,Code INSEE,Commune,geo_point_2d,date
0,20148001,1.2,,6.1,1750.0,13.2,1012.0,9.5,9.65,1.8,3.2,7.1,9.7,2.6,14.9,260.0,2217.0,9.0,270.0,2222.0,,,54.0,2219.0,91.0,1.0,0.0,704.0,9.4,78.0,0.0,0.0,1.1,1.1,2006,1,1,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-01
1,20148001,1.4,424.0,4.0,656.0,14.0,1247.0,8.2,9.0,0.2,1.7,10.0,9.0,1.9,13.0,30.0,1842.0,8.0,320.0,1831.0,,,55.0,1243.0,95.0,2217.0,0.0,646.0,8.6,79.0,1.0,0.0,1.0,1.0,2006,1,2,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-02
2,20148001,0.2,87.0,4.0,2143.0,11.6,1338.0,8.6,7.8,1.8,2.4,7.6,7.8,4.9,14.0,330.0,1911.0,10.0,20.0,2034.0,,,55.0,1911.0,91.0,528.0,0.0,539.0,7.9,71.0,0.0,0.0,1.4,1.4,2006,1,3,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-03
3,20148001,0.0,0.0,7.4,1825.0,10.2,1131.0,8.8,8.8,5.0,5.8,2.8,8.8,4.6,12.0,20.0,625.0,8.0,30.0,7.0,,,61.0,628.0,75.0,1901.0,0.0,0.0,7.8,69.0,0.0,0.0,1.8,1.8,2006,1,4,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-04
4,20148001,0.0,0.0,5.1,1757.0,10.4,1159.0,7.1,7.75,1.7,3.1,5.3,7.8,3.7,9.0,30.0,338.0,6.0,30.0,343.0,,,58.0,1406.0,89.0,2120.0,0.0,358.0,7.4,74.0,0.0,0.0,1.4,1.5,2006,1,5,20,20148,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-05


In [41]:
df_meteo['date'] = pd.to_datetime(df_meteo['date'])

In [42]:
df_meteo.shape

(228659, 43)

In [43]:
df_meteo.columns

Index(['POSTE', 'RR', 'DRR', 'TN', 'HTN', 'TX', 'HTX', 'TM', 'TMNX', 'TNSOL',
       'TN50', 'TAMPLI', 'TNTXM', 'FFM', 'FXI', 'DXI', 'HXI', 'FXY', 'DXY',
       'HXY', 'FXI3S', 'HXI3S', 'UN', 'HUN', 'UX', 'HUX', 'DHUMI40', 'DHUMI80',
       'TSVM', 'UM', 'ORAG', 'BRUME', 'ETPMON', 'ETPGRILLE', 'year', 'month',
       'day', 'Département', 'Code Postal', 'Code INSEE', 'Commune',
       'geo_point_2d', 'date'],
      dtype='object')

In [44]:
df_meteo.isna().sum()

POSTE                0
RR                3069
DRR             217787
TN               36966
HTN              56953
TX               37082
HTX              56923
TM               56373
TMNX             37110
TNSOL           215972
TN50            215946
TAMPLI           37110
TNTXM            37110
FFM              86964
FXI              87123
DXI              87328
HXI              87411
FXY              99112
DXY              99114
HXY              99181
FXI3S           184499
HXI3S           184499
UN               90735
HUN              91188
UX               90769
HUX              91234
DHUMI40          91642
DHUMI80          91793
TSVM             90787
UM               90616
ORAG            211977
BRUME           213062
ETPMON          218361
ETPGRILLE        52929
year                 0
month                0
day                  0
Département          0
Code Postal          0
Code INSEE           0
Commune              0
geo_point_2d         0
date                 0
dtype: int6

In [45]:
feux.columns

Index(['Département', 'Code INSEE', 'Nom de la commune', 'latitude',
       'longitude', 'Date', 'Surface parcourue (m2)', 'Surface forêt (m2)',
       'Surface maquis garrigues (m2)',
       'Autres surfaces naturelles hors forêt (m2)', 'Surfaces agricoles (m2)',
       'Autres surfaces (m2)', 'Surface autres terres boisées (m2)',
       'Surfaces non boisées naturelles (m2)',
       'Surfaces non boisées artificialisées (m2)',
       'Surfaces non boisées (m2)', 'Type de peuplement', 'Nature', 'Feux'],
      dtype='object')

In [46]:
print(df_meteo['date'].dtype)

datetime64[ns]


In [47]:
print(feux['Date'].dtype)

object


In [48]:
feux['Date'] = pd.to_datetime(feux['Date'])


In [49]:
print(feux['Date'].dtype)

datetime64[ns]


In [50]:
feux['Département'] = feux['Département'].astype(str).str.zfill(2)
feux.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50166 entries, 0 to 50165
Data columns (total 19 columns):
 #   Column                                      Non-Null Count  Dtype         
---  ------                                      --------------  -----         
 0   Département                                 50166 non-null  object        
 1   Code INSEE                                  50166 non-null  object        
 2   Nom de la commune                           50166 non-null  object        
 3   latitude                                    50147 non-null  float64       
 4   longitude                                   50147 non-null  float64       
 5   Date                                        50166 non-null  datetime64[ns]
 6   Surface parcourue (m2)                      50166 non-null  int64         
 7   Surface forêt (m2)                          38499 non-null  float64       
 8   Surface maquis garrigues (m2)               19683 non-null  float64       
 9   Autres

In [51]:
feux['Feux'].value_counts()

Feux
1    50166
Name: count, dtype: int64

In [52]:
df_meteo.columns

Index(['POSTE', 'RR', 'DRR', 'TN', 'HTN', 'TX', 'HTX', 'TM', 'TMNX', 'TNSOL',
       'TN50', 'TAMPLI', 'TNTXM', 'FFM', 'FXI', 'DXI', 'HXI', 'FXY', 'DXY',
       'HXY', 'FXI3S', 'HXI3S', 'UN', 'HUN', 'UX', 'HUX', 'DHUMI40', 'DHUMI80',
       'TSVM', 'UM', 'ORAG', 'BRUME', 'ETPMON', 'ETPGRILLE', 'year', 'month',
       'day', 'Département', 'Code Postal', 'Code INSEE', 'Commune',
       'geo_point_2d', 'date'],
      dtype='object')

In [53]:
df_meteo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 228659 entries, 0 to 228658
Data columns (total 43 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   POSTE         228659 non-null  int64         
 1   RR            225590 non-null  float64       
 2   DRR           10872 non-null   float64       
 3   TN            191693 non-null  float64       
 4   HTN           171706 non-null  float64       
 5   TX            191577 non-null  float64       
 6   HTX           171736 non-null  float64       
 7   TM            172286 non-null  float64       
 8   TMNX          191549 non-null  float64       
 9   TNSOL         12687 non-null   float64       
 10  TN50          12713 non-null   float64       
 11  TAMPLI        191549 non-null  float64       
 12  TNTXM         191549 non-null  float64       
 13  FFM           141695 non-null  float64       
 14  FXI           141536 non-null  float64       
 15  DXI           141

### Merge Feux et météo

In [54]:
feux_corse = feux[feux['Département'].isin(['2A', '2B', 2])]


In [55]:
feux_corse = feux_corse.rename(columns={'Date': 'date'})


In [56]:
feux_corse.head()

Unnamed: 0,Département,Code INSEE,Nom de la commune,latitude,longitude,date,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
2,2A,2A139,Lecci,41.679373,9.317798,2014-01-09 16:14:00,10,,,,,,,,,,,,1
4,2B,2B058,Canari,42.845933,9.331333,2014-01-11 14:00:00,100,,,,,,,,,,,,1
8,2B,2B242,Poggio-Mezzana,42.39768,9.49393,2014-01-18 14:57:00,3000,0.0,3000.0,,,,,,,0.0,1.0,,1
13,2B,2B036,Bigorno,42.527867,9.301862,2014-01-26 13:29:00,10000,0.0,10000.0,,,,,,,0.0,1.0,,1
14,2B,2B341,Venaco,42.231867,9.172566,2014-01-26 13:58:00,5000,0.0,5000.0,,,,,,,0.0,1.0,,1


In [57]:
feux_corse.shape

(9509, 19)

In [58]:
df = pd.merge(df_meteo, feux_corse, on=['date', 'Code INSEE'], how='outer')

In [59]:
df.head()

Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Département_x,Code Postal,Code INSEE,Commune,geo_point_2d,date,Département_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
0,20142001.0,40.0,,3.0,,8.0,,,5.5,,,5.0,5.5,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20142.0,2A056,CAMPO,"41.890691356038275, 9.000984854426516",2006-01-01,,,,,,,,,,,,,,,,,
1,20135002.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20135.0,2A092,CONCA,"41.753081312395295, 9.330311865111108",2006-01-01,,,,,,,,,,,,,,,,,
2,20148001.0,1.2,,6.1,1750.0,13.2,1012.0,9.5,9.65,1.8,3.2,7.1,9.7,2.6,14.9,260.0,2217.0,9.0,270.0,2222.0,,,54.0,2219.0,91.0,1.0,0.0,704.0,9.4,78.0,0.0,0.0,1.1,1.1,2006.0,1.0,1.0,20.0,20148.0,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-01,,,,,,,,,,,,,,,,,
3,20114002.0,5.4,505.0,10.1,1711.0,12.5,847.0,11.0,11.3,8.8,9.0,2.4,11.3,7.3,19.0,270.0,1613.0,12.0,280.0,2054.0,,,61.0,2028.0,93.0,103.0,0.0,791.0,10.6,80.0,0.0,0.0,1.6,0.5,2006.0,1.0,1.0,20.0,20114.0,2A114,FIGARI,"41.5163043788812, 9.120594158285483",2006-01-01,,,,,,,,,,,,,,,,,
4,20142001.0,40.0,,3.0,,8.0,,,5.5,,,5.0,5.5,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20142.0,2A253,QUASQUARA,"41.90875371016429, 9.008361179069034",2006-01-01,,,,,,,,,,,,,,,,,


In [60]:
df['Feux'].value_counts()

Feux
1.0    9509
Name: count, dtype: int64

In [61]:
df.shape

(238167, 60)

In [62]:
df.columns

Index(['POSTE', 'RR', 'DRR', 'TN', 'HTN', 'TX', 'HTX', 'TM', 'TMNX', 'TNSOL',
       'TN50', 'TAMPLI', 'TNTXM', 'FFM', 'FXI', 'DXI', 'HXI', 'FXY', 'DXY',
       'HXY', 'FXI3S', 'HXI3S', 'UN', 'HUN', 'UX', 'HUX', 'DHUMI40', 'DHUMI80',
       'TSVM', 'UM', 'ORAG', 'BRUME', 'ETPMON', 'ETPGRILLE', 'year', 'month',
       'day', 'Département_x', 'Code Postal', 'Code INSEE', 'Commune',
       'geo_point_2d', 'date', 'Département_y', 'Nom de la commune',
       'latitude', 'longitude', 'Surface parcourue (m2)', 'Surface forêt (m2)',
       'Surface maquis garrigues (m2)',
       'Autres surfaces naturelles hors forêt (m2)', 'Surfaces agricoles (m2)',
       'Autres surfaces (m2)', 'Surface autres terres boisées (m2)',
       'Surfaces non boisées naturelles (m2)',
       'Surfaces non boisées artificialisées (m2)',
       'Surfaces non boisées (m2)', 'Type de peuplement', 'Nature', 'Feux'],
      dtype='object')

In [63]:
df['Feux'].replace(np.nan, 0, inplace=True)
df.head(10)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Feux'].replace(np.nan, 0, inplace=True)


Unnamed: 0,POSTE,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,TAMPLI,TNTXM,FFM,FXI,DXI,HXI,FXY,DXY,HXY,FXI3S,HXI3S,UN,HUN,UX,HUX,DHUMI40,DHUMI80,TSVM,UM,ORAG,BRUME,ETPMON,ETPGRILLE,year,month,day,Département_x,Code Postal,Code INSEE,Commune,geo_point_2d,date,Département_y,Nom de la commune,latitude,longitude,Surface parcourue (m2),Surface forêt (m2),Surface maquis garrigues (m2),Autres surfaces naturelles hors forêt (m2),Surfaces agricoles (m2),Autres surfaces (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Type de peuplement,Nature,Feux
0,20142001.0,40.0,,3.0,,8.0,,,5.5,,,5.0,5.5,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20142.0,2A056,CAMPO,"41.890691356038275, 9.000984854426516",2006-01-01,,,,,,,,,,,,,,,,,0.0
1,20135002.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20135.0,2A092,CONCA,"41.753081312395295, 9.330311865111108",2006-01-01,,,,,,,,,,,,,,,,,0.0
2,20148001.0,1.2,,6.1,1750.0,13.2,1012.0,9.5,9.65,1.8,3.2,7.1,9.7,2.6,14.9,260.0,2217.0,9.0,270.0,2222.0,,,54.0,2219.0,91.0,1.0,0.0,704.0,9.4,78.0,0.0,0.0,1.1,1.1,2006.0,1.0,1.0,20.0,20148.0,2A099,COZZANO,"41.933536178154235, 9.186589019928942",2006-01-01,,,,,,,,,,,,,,,,,0.0
3,20114002.0,5.4,505.0,10.1,1711.0,12.5,847.0,11.0,11.3,8.8,9.0,2.4,11.3,7.3,19.0,270.0,1613.0,12.0,280.0,2054.0,,,61.0,2028.0,93.0,103.0,0.0,791.0,10.6,80.0,0.0,0.0,1.6,0.5,2006.0,1.0,1.0,20.0,20114.0,2A114,FIGARI,"41.5163043788812, 9.120594158285483",2006-01-01,,,,,,,,,,,,,,,,,0.0
4,20142001.0,40.0,,3.0,,8.0,,,5.5,,,5.0,5.5,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20142.0,2A253,QUASQUARA,"41.90875371016429, 9.008361179069034",2006-01-01,,,,,,,,,,,,,,,,,0.0
5,20272001.0,10.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20272.0,2B015,AMPRIANI,"42.253657150601654, 9.35731976413852",2006-01-01,,,,,,,,,,,,,,,,,0.0
6,20272004.0,10.0,,9.1,1300.0,11.5,615.0,10.3,10.3,,,2.4,10.3,3.0,13.0,300.0,2245.0,7.0,310.0,0.0,,,69.0,2030.0,94.0,1100.0,0.0,1043.0,10.7,85.0,,,,0.8,2006.0,1.0,1.0,20.0,20272.0,2B015,AMPRIANI,"42.253657150601654, 9.35731976413852",2006-01-01,,,,,,,,,,,,,,,,,0.0
7,20232002.0,25.0,,6.1,1715.0,8.7,601.0,7.5,7.4,,,2.6,7.4,4.5,19.0,300.0,2345.0,10.0,300.0,2230.0,,,80.0,2030.0,94.0,630.0,0.0,1440.0,9.2,89.0,,,,0.4,2006.0,1.0,1.0,20.0,20232.0,2B185,OLETTA,"42.641626187682455, 9.33321314144448",2006-01-01,,,,,,,,,,,,,,,,,0.0
8,20232002.0,25.0,,6.1,1715.0,8.7,601.0,7.5,7.4,,,2.6,7.4,4.5,19.0,300.0,2345.0,10.0,300.0,2230.0,,,80.0,2030.0,94.0,630.0,0.0,1440.0,9.2,89.0,,,,0.4,2006.0,1.0,1.0,20.0,20232.0,2B188,OLMETA-DI-TUDA,"42.61224090494986, 9.364359108275961",2006-01-01,,,,,,,,,,,,,,,,,0.0
9,20272001.0,10.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006.0,1.0,1.0,20.0,20272.0,2B213,PIANELLO,"42.29743428222778, 9.356484925803798",2006-01-01,,,,,,,,,,,,,,,,,0.0


In [64]:
df.to_csv("Dataset_modele.csv", index=False, sep=';')
