# Feux de forêt

Projet Python de 2A à l'ENSAE portant sur l'étude des feux de forêt en France.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import urllib
import os
from datetime import datetime



# I. Récupération et traitement des données

On récupère les données sur le site de Météo-France. Les données disponibles complètes sur l'année sont de 1997 à 2019.
https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32

Voici notre fonction pour télécharger les données :

In [2]:
def download_data_meteo():
    
    for k in range(1997,2020):
        for i in range(1,13):

            if i <= 9:
                month = '0' + str(i)
            else:
                month = str(i)

            file_date = str(k) + month
            file_url = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.' + file_date + '.csv.gz'
            output_file_name = file_date + '.csv.gz'

            if os.path.exists('bdd_meteo') == 0:
                os.mkdir('bdd_meteo') 

            urllib.request.urlretrieve (file_url, 'bdd_meteo/' + output_file_name)
    
    return 'Téléchargement terminé !'    

Test d'un fichier en ouverture :

In [3]:
pd.read_csv('bdd_meteo/200202.csv.gz', sep = ';')

Unnamed: 0,numer_sta,date,pmer,tend,cod_tend,dd,ff,t,td,u,...,nnuage2,ctype2,hnuage2,nnuage3,ctype3,hnuage3,nnuage4,ctype4,hnuage4,Unnamed: 59
0,7005,20020201000000,101540,20,3,210,7.200000,281.850000,280.650000,92,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
1,7015,20020201000000,101520,-20,5,200,8.700000,281.450000,279.450000,87,...,7,3,3000,mq,mq,mq,mq,mq,mq,
2,7020,20020201000000,101500,180,2,220,9.800000,283.650000,283.050000,96,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
3,7027,20020201000000,101770,380,1,240,5.100000,282.450000,280.550000,88,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
4,7037,20020201000000,101690,10,3,250,8.200000,282.450000,281.850000,96,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12891,78922,20020228210000,101350,-70,5,80,4.100000,298.750000,290.650000,61,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
12892,81401,20020228210000,100980,mq,mq,50,5.100000,300.750000,296.350000,77,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,
12893,81405,20020228210000,100970,mq,mq,70,4.100000,299.750000,297.050000,85,...,4,8,660,5,3,3000,mq,mq,mq,
12894,81415,20020228210000,100960,mq,mq,mq,mq,298.150000,296.950000,93,...,mq,mq,mq,mq,mq,mq,mq,mq,mq,


Nous voyons que les fichiers bruts ont énormément de colonnes avec des noms peu clairs. Il y a également beaucoup de NaN (ici 'mq') que nous devrons traiter.

Voici la fonction pour traiter un fichier :

In [4]:
def cleaned_csv(year, month):
    
    if month <= 9:
        file = 'bdd_meteo/' + str(year) + '0' + str(month) + '.csv.gz'
    
    else:
        file = 'bdd_meteo/' + str(year) + str(month) + '.csv.gz'
        
    #ouvrir le fichier   
    df_file = pd.read_csv(file, sep=';')
    
    #sélectionner et renommer les colonnes
    df_file = df_file.rename(columns={'numer_sta':'Station', 'date':'Date', 't':'Température (°C)', 'rr3':'Précipitations (3 heures)', 'u':'Humidité (%)', 'ff':'Vitesse du vent (m/s)'})
    df_file = df_file[['Station', 'Date', 'Température (°C)', 'Précipitations (3 heures)', 'Humidité (%)', 'Vitesse du vent (m/s)']]
    
    #modifier le format de la date et l'heure
    df_file['Date'] = df_file['Date'].apply(lambda x: datetime(year = int(str(x)[0:4]), month = int(str(x)[4:6]), day = int(str(x)[6:8]), hour = int(str(x)[8:10])))
    
    #convertir les Kelvin en degrés Celsius
    l = list(df_file['Température (°C)'])
    for i in range(len(l)):
        if l[i] != 'mq':
            df_file.loc[i, 'Température (°C)'] = float(l[i]) - 273.15
    
    #trier par station et date ainsi que reset l'index
    df_file = df_file.sort_values(['Station', 'Date']).reset_index(drop = True)
    
    return df_file

Voici un exemple du résultat :

In [5]:
cleaned_csv(2010,11)

Unnamed: 0,Station,Date,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,7005,2010-11-01 00:00:00,6.7,0.000000,94,2.600000
1,7005,2010-11-01 03:00:00,6.2,0.200000,95,2.600000
2,7005,2010-11-01 06:00:00,8.7,0.000000,95,1.500000
3,7005,2010-11-01 09:00:00,10.5,0.000000,95,3.100000
4,7005,2010-11-01 12:00:00,11.5,0.000000,92,5.100000
...,...,...,...,...,...,...
13938,89642,2010-11-30 09:00:00,-2,mq,33,1.500000
13939,89642,2010-11-30 12:00:00,-5.6,mq,29,3.600000
13940,89642,2010-11-30 15:00:00,-8.3,mq,32,1.000000
13941,89642,2010-11-30 18:00:00,-10.1,mq,42,2.600000


Il ne reste plus qu'à regrouper tous les fichiers mensuels en un seul dataframe :

In [7]:
df_meteo = pd.DataFrame()

for k in range(2017,2019):
    for i in range(1,13):       
        df_meteo = pd.concat([df_meteo, cleaned_csv(k,i)], ignore_index = True)

df_meteo

Unnamed: 0,Station,Date,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,7005,2017-01-01 00:00:00,-3.9,0.000000,96,0.000000
1,7005,2017-01-01 03:00:00,-5.1,0.000000,94,0.000000
2,7005,2017-01-01 06:00:00,-4.1,0.000000,96,0.000000
3,7005,2017-01-01 09:00:00,-2.2,0.000000,97,0.000000
4,7005,2017-01-01 12:00:00,-0.9,0.000000,98,0.000000
...,...,...,...,...,...,...
331919,89642,2018-12-31 09:00:00,-1.4,mq,65,20.100000
331920,89642,2018-12-31 12:00:00,-2.3,mq,71,21.600000
331921,89642,2018-12-31 15:00:00,-1.5,mq,64,20.100000
331922,89642,2018-12-31 18:00:00,-2.4,mq,72,20.100000


In [8]:
df_meteo.head(20)

Unnamed: 0,Station,Date,Température (°C),Précipitations (3 heures),Humidité (%),Vitesse du vent (m/s)
0,7005,2017-01-01 00:00:00,-3.9,0.0,96,0.0
1,7005,2017-01-01 03:00:00,-5.1,0.0,94,0.0
2,7005,2017-01-01 06:00:00,-4.1,0.0,96,0.0
3,7005,2017-01-01 09:00:00,-2.2,0.0,97,0.0
4,7005,2017-01-01 12:00:00,-0.9,0.0,98,0.0
5,7005,2017-01-01 15:00:00,0.1,0.0,98,0.0
6,7005,2017-01-01 18:00:00,0.2,0.0,98,0.0
7,7005,2017-01-01 21:00:00,0.6,0.0,98,0.0
8,7005,2017-01-02 00:00:00,0.7,0.0,98,0.0
9,7005,2017-01-02 03:00:00,1.0,-0.1,98,0.7


In [11]:
df_meteo.to_csv('data_meteo_2ans.csv', index = False)