# Feux de forêt

Projet Python de 2A à l'ENSAE portant sur l'étude des feux de forêt en France.

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import urllib
import os
from datetime import datetime, timedelta

# I. Récupération et traitement des données

On récupère les données sur le site de Météo-France. Les données disponibles complètes sur l'année sont de 1997 à 2019.
https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32

Voici notre fonction pour télécharger les données :

In [5]:
def download_data_meteo():
    
    for k in range(1997,2020):
        for i in range(1,13):

            if i <= 9:
                month = '0' + str(i)
            else:
                month = str(i)

            file_date = str(k) + month
            file_url = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.' + file_date + '.csv.gz'
            output_file_name = file_date + '.csv.gz'

            if os.path.exists('bdd_meteo') == 0:
                os.mkdir('bdd_meteo') 

            urllib.request.urlretrieve (file_url, 'bdd_meteo/' + output_file_name)
    
    return 'Téléchargement terminé !'    

Test d'un fichier en ouverture :

In [6]:
pd.read_csv('bdd_meteo/200202.csv.gz', sep = ';')

Unnamed: 0,numer_sta,date,pmer,tend,cod_tend,dd,ff,t,td,u,...,nnuage2,ctype2,hnuage2,nnuage3,ctype3,hnuage3,nnuage4,ctype4,hnuage4,Unnamed: 59
0,7005,20020201000000,101540,20,3,210,7.200000,281.850000,280.650000,92,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
1,7015,20020201000000,101520,-20,5,200,8.700000,281.450000,279.450000,87,...,7,3,3000,mq,mq,mq,mq,mq,mq,
2,7020,20020201000000,101500,180,2,220,9.800000,283.650000,283.050000,96,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
3,7027,20020201000000,101770,380,1,240,5.100000,282.450000,280.550000,88,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
4,7037,20020201000000,101690,10,3,250,8.200000,282.450000,281.850000,96,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12891,78922,20020228210000,101350,-70,5,80,4.100000,298.750000,290.650000,61,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
12892,81401,20020228210000,100980,mq,mq,50,5.100000,300.750000,296.350000,77,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
12893,81405,20020228210000,100970,mq,mq,70,4.100000,299.750000,297.050000,85,...,4,8,660,5,3,3000,mq,mq,mq,
12894,81415,20020228210000,100960,mq,mq,mq,mq,298.150000,296.950000,93,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,


Nous voyons que les fichiers bruts ont énormément de colonnes avec des noms peu clairs. Il y a également beaucoup de NaN (ici 'mq') que nous devrons traiter.

Voici la fonction pour traiter un fichier :

In [7]:
def cleaned_csv_meteo(year, month):
    
    if month <= 9:
        file = 'bdd_meteo/' + str(year) + '0' + str(month) + '.csv.gz'
    
    else:
        file = 'bdd_meteo/' + str(year) + str(month) + '.csv.gz'
        
    #ouvrir le fichier   
    df_file = pd.read_csv(file, sep=';')
    
    #sélectionner et renommer les colonnes
    df_file = df_file.rename(columns={'numer_sta':'Station', 'date':'Date', 't':'Température (°C)', 'rr3':'Précipitations (3 heures)', 'u':'Humidité (%)', 'ff':'Vitesse du vent (m/s)'})
    df_file = df_file[['Station', 'Date', 'Température (°C)', 'Précipitations (3 heures)', 'Humidité (%)', 'Vitesse du vent (m/s)']]
    
    #modifier le format de la date et l'heure
    df_file['Date'] = df_file['Date'].apply(lambda x: datetime(year = int(str(x)[0:4]), month = int(str(x)[4:6]), day = int(str(x)[6:8]), hour = int(str(x)[8:10])))
    
    #convertir les Kelvin en degrés Celsius
    l = list(df_file['Température (°C)'])
    for i in range(len(l)):
        if l[i] != 'mq':
            df_file.loc[i, 'Température (°C)'] = float(l[i]) - 273.15
    
    #trier par station et date ainsi que reset l'index
    df_file = df_file.sort_values(['Station', 'Date']).reset_index(drop = True)
    
    return df_file

Voici un exemple du résultat :

In [8]:
cleaned_csv_meteo(2002,2)

Unnamed: 0,Station,Date,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,7005,2002-02-01 00:00:00,8.7,0.800000,92,7.200000
1,7005,2002-02-01 03:00:00,9,0.000000,90,6.200000
2,7005,2002-02-01 06:00:00,9.1,mq,86,6.200000
3,7005,2002-02-01 09:00:00,11.1,0.200000,88,8.200000
4,7005,2002-02-01 12:00:00,13.3,mq,78,10.800000
...,...,...,...,...,...,...
12891,89642,2002-02-28 09:00:00,-7.8,mq,52,8.700000
12892,89642,2002-02-28 12:00:00,-8.4,mq,50,13.900000
12893,89642,2002-02-28 15:00:00,-7.4,mq,56,9.800000
12894,89642,2002-02-28 18:00:00,-7.7,mq,75,15.900000


Il ne reste plus qu'à regrouper tous les fichiers mensuels en un seul dataframe :

In [9]:
def concat_meteo(year_begin, year_end):
    
    df_meteo = pd.DataFrame()

    for k in range(year_begin, year_end + 1):
        for i in range(1,13):       
            df_meteo = pd.concat([df_meteo, cleaned_csv_meteo(k,i)], ignore_index = True)

    return df_meteo

In [222]:
#lignes de commande pour enregistrer le fichier 'data_meteo.csv'
#df_meteo = concat_meteo(1997,2019)
#df_meteo.to_csv('data_meteo.csv', index = False)

In [10]:
df_meteo = pd.read_csv('data_meteo_2ans.csv')
df_meteo

Unnamed: 0,Station,Date,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,7005,2017-01-01 00:00:00,-3.8999999999999773,0.000000,96,0.000000
1,7005,2017-01-01 03:00:00,-5.099999999999966,0.000000,94,0.000000
2,7005,2017-01-01 06:00:00,-4.099999999999966,0.000000,96,0.000000
3,7005,2017-01-01 09:00:00,-2.1999999999999886,0.000000,97,0.000000
4,7005,2017-01-01 12:00:00,-0.8999999999999773,0.000000,98,0.000000
...,...,...,...,...,...,...
331919,89642,2018-12-31 09:00:00,-1.3999999999999773,mq,65,20.100000
331920,89642,2018-12-31 12:00:00,-2.2999999999999545,mq,71,21.600000
331921,89642,2018-12-31 15:00:00,-1.5,mq,64,20.100000
331922,89642,2018-12-31 18:00:00,-2.3999999999999773,mq,72,20.100000


**On va désormais relier les stations météos à une position sur la carte de France**

Les fichiers 'postesSynop.txt' et 'postesSynop.json' ont été trouvés sur le site de Météo-France. Ils comportent les caractéristiques des stations météo.

In [33]:
stations = pd.read_csv('postesSynop.txt', sep = ';')
stations

Unnamed: 0,ID,Nom,Latitude,Longitude,Altitude
0,7005,ABBEVILLE,50.136000,1.834000,69
1,7015,LILLE-LESQUIN,50.570000,3.097500,47
2,7020,PTE DE LA HAGUE,49.725167,-1.939833,6
3,7027,CAEN-CARPIQUET,49.180000,-0.456167,67
4,7037,ROUEN-BOOS,49.383000,1.181667,151
...,...,...,...,...,...
57,81401,SAINT LAURENT,5.485500,-54.031667,5
58,81405,CAYENNE-MATOURY,4.822333,-52.365333,4
59,81408,SAINT GEORGES,3.890667,-51.804667,6
60,81415,MARIPASOULA,3.640167,-54.028333,106


In [12]:
stations_geo = gpd.read_file('postesSynop.json')
stations_geo

NameError: name 'gpd' is not defined

On utilise également la base de données des villes françaises. On traite ces données pour obtenir seulement les informations dont on a besoin. On traite aussi le texte en enlevant les tirets et slash.

In [52]:
villes = pd.read_csv('villes_france.csv',sep=',', low_memory=False)
villes = villes.iloc[:, [1,3,8,19,20]]
villes.columns = ['Département', 'Nom', 'Code postal', 'Longitude', 'Latitude']
villes['Nom'] = villes['Nom'].str.replace('-', ' ')
villes['Nom'] = villes['Nom'].str.replace('\'', ' ')

villes

Unnamed: 0,Département,Nom,Code postal,Longitude,Latitude
0,01,CORMORANCHE SUR SAONE,01290,4.83333,46.23330
1,01,PLAGNE,01130,5.73333,46.18330
2,01,TOSSIAT,01250,5.31667,46.13330
3,01,POUILLAT,01250,5.43333,46.33330
4,01,TORCIEU,01230,5.40000,45.91670
...,...,...,...,...,...
36694,976,SADA,97640,45.10470,-12.84860
36695,976,TSINGONI,97680,45.10700,-12.78970
36696,971,SAINT BARTHELEMY,97133,-62.83330,17.91670
36697,971,SAINT MARTIN,97150,18.09130,-63.08290


On utilise la base de données BDIFF pour les feux de forêt

In [53]:
df_feu = pd.read_csv('BDIFF_2017_2018.csv', sep = ';', skiprows = [0,1])
df_feu

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Statut
0,2017,5,12,12187,Prades-d'Aubrac,2017-01-07 12:20:00,10000,0,10000,0.0,0.0,0,Estimées,Validé
1,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,0.0,0,Estimées,Validé
2,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,0.0,0,Estimées,Validé
3,2017,8,12,12214,Saint-Chély-d'Aubrac,2017-01-07 17:38:00,20000,0,20000,0.0,0.0,0,Estimées,Validé
4,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,0.0,0,Estimées,Validé
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5214,2018,3221,77,77186,Fontainebleau,2018-09-13 10:50:00,1,1,0,0.0,0.0,0,Estimées,Validé
5215,2018,3222,77,77186,Fontainebleau,2018-10-21 11:00:00,2,2,0,0.0,0.0,0,Estimées,Validé
5216,2018,3223,77,77186,Fontainebleau,2018-10-16 08:10:00,30000,30000,0,0.0,0.0,0,Estimées,Validé
5217,2018,3226,65,65045,Aucun,2018-03-14 14:37:00,100000,0,100000,0.0,0.0,0,Estimées,Validé


**Fusion feu/liste des villes**

Il n'y a pas de code INSEE pour la bdd des villes et pas de code postal pour la bdd des feux. Le nom de commune ne suffit pas car il y a des homonymes. On crée donc une colomne avec le nom de commune + le numéro de département pour la fusion. 

In [54]:
villes['Clé'] = villes['Département'] + villes['Nom']
villes = villes.drop(columns = ['Département', 'Nom'])
df_feu['Clé'] = df_feu['Département'] + df_feu['Commune'].apply(lambda row: str.upper(str(row)))

In [56]:
#jointure feu/ville

df_feu_villes = pd.merge(df_feu, villes, on = 'Clé', how ='inner')
df_feu_villes = df_feu_villes.drop(columns = 'Clé')
df_feu_villes

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Statut,Code postal,Longitude,Latitude
0,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,0.0,0,Estimées,Validé,12580,2.566670,44.5333
1,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,0.0,0,Estimées,Validé,12600,2.650000,44.8333
2,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,0.0,0,Estimées,Validé,12210,2.850000,44.6833
3,2018,1203,12,12119,Laguiole,2018-08-11 22:36:00,300,300,0,0.0,0.0,0,Estimées,Validé,12210,2.850000,44.6833
4,2017,13,12,12272,Sonnac,2017-01-21 15:39:00,5000,0,5000,0.0,0.0,0,Estimées,Validé,12700,2.100000,44.5500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,2018,3116,83,83098,Le Pradet,2018-01-04 02:15:00,1000,1000,0,,,0,,Validé,83220,6.016670,43.1000
2101,2018,3124,24,24155,Douville,2018-10-02 10:50:00,200,200,0,,,0,,Validé,24140,0.583333,45.0000
2102,2018,3154,24,24564,Vanxains,2018-10-05 18:21:00,400,400,0,,,0,,Validé,24600,0.283333,45.2167
2103,2018,3168,24,24550,Thenon,2018-09-30 19:05:00,100,100,0,,,0,,Validé,24210,1.066670,45.1333


On voit qu'on perd des lignes car certains feux ont des NaN dans la colonne 'Commune'

In [57]:
stations

Unnamed: 0,ID,Nom,Latitude,Longitude,Altitude
0,7005,ABBEVILLE,50.136000,1.834000,69
1,7015,LILLE-LESQUIN,50.570000,3.097500,47
2,7020,PTE DE LA HAGUE,49.725167,-1.939833,6
3,7027,CAEN-CARPIQUET,49.180000,-0.456167,67
4,7037,ROUEN-BOOS,49.383000,1.181667,151
...,...,...,...,...,...
57,81401,SAINT LAURENT,5.485500,-54.031667,5
58,81405,CAYENNE-MATOURY,4.822333,-52.365333,4
59,81408,SAINT GEORGES,3.890667,-51.804667,6
60,81415,MARIPASOULA,3.640167,-54.028333,106


Calcul de la distance entre chaque commune de df_feu_villes et chaque station pour trouver la plus proche :

In [58]:
import geopy.distance

In [59]:
def distance(lat1, long1, lat2, long2):
    
    coords_1 = (lat1, long1)
    coords_2 = (lat2, long2)
    
    return(geopy.distance.distance(coords_1, coords_2).km)

In [60]:
def station_la_plus_proche(codeinsee):
    
    sub = df_feu_villes[df_feu_villes['Code INSEE'] == codeinsee]
    L_dist = []
    lat = sub['Latitude'].iloc[0]
    long = sub['Longitude'].iloc[0]
    
    if long is not None and lat is not None:        
        for k in range(stations.shape[0]):
            L_dist.append(distance(lat, long, stations.loc[k, 'Latitude'], stations.loc[k, 'Longitude']))
            
        return stations.iloc[L_dist.index(min(L_dist)), 1]
    
    else: 
        return None 

In [61]:
df_feu_villes['Station la plus proche'] = df_feu_villes['Code INSEE'].apply(lambda row: station_la_plus_proche(row))
df_feu_villes

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),Surfaces non boisées artificialisées (m2),Surfaces non boisées (m2),Précision des surfaces,Statut,Code postal,Longitude,Latitude,Station la plus proche
0,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,0.0,0,Estimées,Validé,12580,2.566670,44.5333,MILLAU
1,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,0.0,0,Estimées,Validé,12600,2.650000,44.8333,MILLAU
2,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,0.0,0,Estimées,Validé,12210,2.850000,44.6833,MILLAU
3,2018,1203,12,12119,Laguiole,2018-08-11 22:36:00,300,300,0,0.0,0.0,0,Estimées,Validé,12210,2.850000,44.6833,MILLAU
4,2017,13,12,12272,Sonnac,2017-01-21 15:39:00,5000,0,5000,0.0,0.0,0,Estimées,Validé,12700,2.100000,44.5500,GOURDON
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,2018,3116,83,83098,Le Pradet,2018-01-04 02:15:00,1000,1000,0,,,0,,Validé,83220,6.016670,43.1000,CAP CEPET
2101,2018,3124,24,24155,Douville,2018-10-02 10:50:00,200,200,0,,,0,,Validé,24140,0.583333,45.0000,GOURDON
2102,2018,3154,24,24564,Vanxains,2018-10-05 18:21:00,400,400,0,,,0,,Validé,24600,0.283333,45.2167,BORDEAUX-MERIGNAC
2103,2018,3168,24,24550,Thenon,2018-09-30 19:05:00,100,100,0,,,0,,Validé,24210,1.066670,45.1333,GOURDON


In [62]:
df_fdf = pd.merge(df_feu_villes, stations, how = 'inner', left_on = 'Station la plus proche', right_on = 'Nom')
df_fdf = df_fdf.drop(columns = 'Nom')
df_fdf=df_fdf.rename(columns={"Longitude_x": "Longitude commune", 
                                  "Latitude_x": "Latitude commune", "ID": "id_station", "Latitude_y": "Latitude station", 
                                  "Longitude_y": "Longitude station", "Altitude": "Altitude station"})
df_fdf

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),...,Précision des surfaces,Statut,Code postal,Longitude commune,Latitude commune,Station la plus proche,id_station,Latitude station,Longitude station,Altitude station
0,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,...,Estimées,Validé,12580,2.566670,44.5333,MILLAU,7558,44.118500,3.019500,712
1,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,...,Estimées,Validé,12600,2.650000,44.8333,MILLAU,7558,44.118500,3.019500,712
2,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,...,Estimées,Validé,12210,2.850000,44.6833,MILLAU,7558,44.118500,3.019500,712
3,2018,1203,12,12119,Laguiole,2018-08-11 22:36:00,300,300,0,0.0,...,Estimées,Validé,12210,2.850000,44.6833,MILLAU,7558,44.118500,3.019500,712
4,2017,77,12,12189,Pradinas,2017-02-23 16:45:00,100,0,100,0.0,...,Estimées,Validé,12240,2.266670,44.2333,MILLAU,7558,44.118500,3.019500,712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,2018,2856,83,83075,Les Mayons,2018-08-14 14:26:00,10,10,0,,...,,Validé,83340,6.366670,43.3167,CAP CEPET,7661,43.079333,5.940833,115
2101,2018,2857,83,83075,Les Mayons,2018-08-10 19:07:00,300,300,0,,...,,Validé,83340,6.366670,43.3167,CAP CEPET,7661,43.079333,5.940833,115
2102,2018,2864,83,83121,Salernes,2018-08-05 17:00:00,2,2,0,,...,,Validé,83690,6.233330,43.5500,CAP CEPET,7661,43.079333,5.940833,115
2103,2018,3116,83,83098,Le Pradet,2018-01-04 02:15:00,1000,1000,0,,...,,Validé,83220,6.016670,43.1000,CAP CEPET,7661,43.079333,5.940833,115


**Obtention du fond de carte des communes:**

In [204]:
import requests
import tempfile
import zipfile

temporary_location = tempfile.gettempdir()

def download_unzip(url, dirname = tempfile.gettempdir(), destname = "borders"):
    myfile = requests.get(url)
    open(dirname + '/' + destname + '.zip', 'wb').write(myfile.content)
    with zipfile.ZipFile(dirname + '/' + destname + '.zip', 'r') as zip_ref:
        zip_ref.extractall(dirname + '/' + destname)

In [205]:
download_unzip("https://www.data.gouv.fr/fr/datasets/r/07b7c9a2-d1e2-4da6-9f20-01a7b72d4b12")
communes_borders = gpd.read_file(temporary_location + "/borders/communes-20190101.json")
communes_borders.head()

NameError: name 'gpd' is not defined

In [None]:
communes_borders['dep'] = communes_borders['insee'].str[:2]

In [None]:
france = communes_borders[communes_borders['dep'] != '97']
fr = france.plot(figsize = (10, 10), alpha = 0.5, edgecolor = 'k')
fr

In [None]:
aveyron = communes_borders[communes_borders.insee.str.startswith("12")]
av = aveyron.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
av

**Fusion des deux bases :**

Nous avons désormais deux bases de données. La première df_meteo indique les rélévés météorologiques prises dans chacune des stations toutes les trois heures. La deuxième df_fdf indique notamment pour chaque feu de forêt la date de première alerte et la station météorologique la plus proche de l'incendie. 


In [63]:
df_meteo

Unnamed: 0,Station,Date,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,7005,2017-01-01 00:00:00,-3.8999999999999773,0.000000,96,0.000000
1,7005,2017-01-01 03:00:00,-5.099999999999966,0.000000,94,0.000000
2,7005,2017-01-01 06:00:00,-4.099999999999966,0.000000,96,0.000000
3,7005,2017-01-01 09:00:00,-2.1999999999999886,0.000000,97,0.000000
4,7005,2017-01-01 12:00:00,-0.8999999999999773,0.000000,98,0.000000
...,...,...,...,...,...,...
331919,89642,2018-12-31 09:00:00,-1.3999999999999773,mq,65,20.100000
331920,89642,2018-12-31 12:00:00,-2.2999999999999545,mq,71,21.600000
331921,89642,2018-12-31 15:00:00,-1.5,mq,64,20.100000
331922,89642,2018-12-31 18:00:00,-2.3999999999999773,mq,72,20.100000


In [64]:
df_fdf

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),...,Précision des surfaces,Statut,Code postal,Longitude commune,Latitude commune,Station la plus proche,id_station,Latitude station,Longitude station,Altitude station
0,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,...,Estimées,Validé,12580,2.566670,44.5333,MILLAU,7558,44.118500,3.019500,712
1,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,...,Estimées,Validé,12600,2.650000,44.8333,MILLAU,7558,44.118500,3.019500,712
2,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,...,Estimées,Validé,12210,2.850000,44.6833,MILLAU,7558,44.118500,3.019500,712
3,2018,1203,12,12119,Laguiole,2018-08-11 22:36:00,300,300,0,0.0,...,Estimées,Validé,12210,2.850000,44.6833,MILLAU,7558,44.118500,3.019500,712
4,2017,77,12,12189,Pradinas,2017-02-23 16:45:00,100,0,100,0.0,...,Estimées,Validé,12240,2.266670,44.2333,MILLAU,7558,44.118500,3.019500,712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,2018,2856,83,83075,Les Mayons,2018-08-14 14:26:00,10,10,0,,...,,Validé,83340,6.366670,43.3167,CAP CEPET,7661,43.079333,5.940833,115
2101,2018,2857,83,83075,Les Mayons,2018-08-10 19:07:00,300,300,0,,...,,Validé,83340,6.366670,43.3167,CAP CEPET,7661,43.079333,5.940833,115
2102,2018,2864,83,83121,Salernes,2018-08-05 17:00:00,2,2,0,,...,,Validé,83690,6.233330,43.5500,CAP CEPET,7661,43.079333,5.940833,115
2103,2018,3116,83,83098,Le Pradet,2018-01-04 02:15:00,1000,1000,0,,...,,Validé,83220,6.016670,43.1000,CAP CEPET,7661,43.079333,5.940833,115


Pour chaque station, les données météorologiques sont relevées toutes les trois heures (de minuit jusqu'à 21h). 

In [65]:
def plus_proche_relevé(date) :
    "retourne la date de relevé météorologique la plus proche de la date de première alerte"
    
    date = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    
    relevé_ant = datetime(date.year, date.month, date.day, date.hour//3 *3)
    relevé_post = relevé_ant + timedelta(hours=3)

    if date - relevé_ant < relevé_post - date : 
        return relevé_ant
    return relevé_post 

In [66]:
df_fdf['Date de relevé'] = df_fdf['Date de première alerte'].apply(lambda row: plus_proche_relevé(row))
df_fdf

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),...,Statut,Code postal,Longitude commune,Latitude commune,Station la plus proche,id_station,Latitude station,Longitude station,Altitude station,Date de relevé
0,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,...,Validé,12580,2.566670,44.5333,MILLAU,7558,44.118500,3.019500,712,2017-01-07 18:00:00
1,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,...,Validé,12600,2.650000,44.8333,MILLAU,7558,44.118500,3.019500,712,2017-01-07 18:00:00
2,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,...,Validé,12210,2.850000,44.6833,MILLAU,7558,44.118500,3.019500,712,2017-01-07 18:00:00
3,2018,1203,12,12119,Laguiole,2018-08-11 22:36:00,300,300,0,0.0,...,Validé,12210,2.850000,44.6833,MILLAU,7558,44.118500,3.019500,712,2018-08-12 00:00:00
4,2017,77,12,12189,Pradinas,2017-02-23 16:45:00,100,0,100,0.0,...,Validé,12240,2.266670,44.2333,MILLAU,7558,44.118500,3.019500,712,2017-02-23 18:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,2018,2856,83,83075,Les Mayons,2018-08-14 14:26:00,10,10,0,,...,Validé,83340,6.366670,43.3167,CAP CEPET,7661,43.079333,5.940833,115,2018-08-14 15:00:00
2101,2018,2857,83,83075,Les Mayons,2018-08-10 19:07:00,300,300,0,,...,Validé,83340,6.366670,43.3167,CAP CEPET,7661,43.079333,5.940833,115,2018-08-10 18:00:00
2102,2018,2864,83,83121,Salernes,2018-08-05 17:00:00,2,2,0,,...,Validé,83690,6.233330,43.5500,CAP CEPET,7661,43.079333,5.940833,115,2018-08-05 18:00:00
2103,2018,3116,83,83098,Le Pradet,2018-01-04 02:15:00,1000,1000,0,,...,Validé,83220,6.016670,43.1000,CAP CEPET,7661,43.079333,5.940833,115,2018-01-04 03:00:00


In [67]:
df_fdf[['Date de relevé',"id_station"]].dtypes

Date de relevé    datetime64[ns]
id_station                 int64
dtype: object

Création de la clé pour fusionner les deux dataframes. Les variables pour la station et la date sont de deux types différents (int et datetime), pour les regrouper sous une seule clé, il faut les convertir en un même type (ici en str).

In [68]:
df_fdf["key"] = df_fdf["id_station"].apply(lambda row: str(row)) + " " + df_fdf["Date de relevé"].apply(lambda row: str(row))
df_meteo["key"] = df_meteo["Station"].apply(lambda row: str(row)) + " " + df_meteo["Date"].apply(lambda row: str(row))

In [69]:
df = pd.merge(df_fdf, df_meteo, on = 'key', how ='inner')
df = df.drop(columns = ['key','Date','Station'])

In [81]:
df.dtypes

Année                                                 int64
Numéro                                                int64
Département                                          object
Code INSEE                                           object
Commune                                              object
Date de première alerte                              object
Surface brûlée (m2)                                   int64
Surface forêt (m2)                                    int64
Surface autres terres boisées (m2)                    int64
Surfaces non boisées naturelles (m2)                float64
Surfaces non boisées artificialisées (m2)           float64
Surfaces non boisées (m2)                             int64
Précision des surfaces                               object
Statut                                               object
Code postal                                          object
Longitude commune                                   float64
Latitude commune                        

In [83]:
df

Unnamed: 0,Année,Numéro,Département,Code INSEE,Commune,Date de première alerte,Surface brûlée (m2),Surface forêt (m2),Surface autres terres boisées (m2),Surfaces non boisées naturelles (m2),...,Station la plus proche,id_station,Latitude station,Longitude station,Altitude station,Date de relevé,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,2017,6,12,12298,Villecomtal,2017-01-07 17:01:00,8000,0,8000,0.0,...,MILLAU,7558,44.118500,3.019500,712,2017-01-07 18:00:00,4.0,0.000000,20,1.700000
1,2017,7,12,12277,Taussac,2017-01-07 17:05:00,10000,0,10000,0.0,...,MILLAU,7558,44.118500,3.019500,712,2017-01-07 18:00:00,4.0,0.000000,20,1.700000
2,2017,9,12,12119,Laguiole,2017-01-07 18:02:00,600000,0,600000,0.0,...,MILLAU,7558,44.118500,3.019500,712,2017-01-07 18:00:00,4.0,0.000000,20,1.700000
3,2018,1203,12,12119,Laguiole,2018-08-11 22:36:00,300,300,0,0.0,...,MILLAU,7558,44.118500,3.019500,712,2018-08-12 00:00:00,18.600000000000023,0.000000,40,0.700000
4,2017,77,12,12189,Pradinas,2017-02-23 16:45:00,100,0,100,0.0,...,MILLAU,7558,44.118500,3.019500,712,2017-02-23 18:00:00,10.800000000000011,0.000000,65,5.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2089,2018,2856,83,83075,Les Mayons,2018-08-14 14:26:00,10,10,0,,...,CAP CEPET,7661,43.079333,5.940833,115,2018-08-14 15:00:00,mq,mq,mq,8.600000
2090,2018,2857,83,83075,Les Mayons,2018-08-10 19:07:00,300,300,0,,...,CAP CEPET,7661,43.079333,5.940833,115,2018-08-10 18:00:00,27.5,0.000000,44,8.900000
2091,2018,2864,83,83121,Salernes,2018-08-05 17:00:00,2,2,0,,...,CAP CEPET,7661,43.079333,5.940833,115,2018-08-05 18:00:00,28.30000000000001,0.000000,62,1.500000
2092,2018,3116,83,83098,Le Pradet,2018-01-04 02:15:00,1000,1000,0,,...,CAP CEPET,7661,43.079333,5.940833,115,2018-01-04 03:00:00,12.700000000000045,0.000000,69,15.400000
