# Individual Project

## Data Analysis and Visualization of Large Wildfire Spread in Portugal

In [219]:
import pandas as pd
import numpy as np
import re
from geopy.geocoders import Nominatim

In [220]:
firespread = pd.read_excel("PT-FireSprd_L3_FireBehavior.xlsx")
firespread.head()

Unnamed: 0,fid,fname,burn_perio,year,sdate,edate,qc,inidoy,enddoy,duration,area,growth_rat,ros,spdir,max_ros,FRE,FRE_perc,FRE_flux
0,29,Abrantes_09082017,1,2017,2017-08-09 18:00,2017-08-09 23:00,3,221.75,221.958333,5.0,500.0,100.00016,1199.623292,155.744885,1199.623292,-1.0,60.0,-1.0
1,29,Abrantes_09082017,2,2017,2017-08-09 23:00,2017-08-10 03:00,3,221.958333,222.125,4.0,195.75,48.937402,388.377944,181.15208,393.534571,-1.0,12.5,-1.0
2,29,Abrantes_09082017,3,2017,na,2017-08-10 12:00,3,-1.0,222.5,-1.0,191.75,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0
3,29,Abrantes_09082017,4,2017,2017-08-10 12:00,2017-08-10 14:30,3,222.5,222.604167,2.5,1184.25,473.698484,1088.042456,238.370571,1088.042456,11.37195,100.0,3.841064
4,29,Abrantes_09082017,5,2017,2017-08-10 14:30,2017-08-10 22:00,3,222.604167,222.916667,7.5,1552.5,207.0,580.932693,170.189772,585.006347,40.08456,100.0,3.442582


In [221]:
firespread.columns

Index(['fid', 'fname', 'burn_perio', 'year', 'sdate', 'edate', 'qc', 'inidoy',
       'enddoy', 'duration', 'area', 'growth_rat', 'ros', 'spdir', 'max_ros',
       'FRE', 'FRE_perc', 'FRE_flux'],
      dtype='object')

### Column Details:

- fid: Fire Id
- fname: Fire Name
- burn_perio: Burning Period
- year
- sdate: start date
- edate: end date
- qc: confidence flag
- inidoy: start day-of-year
- enddoy: end day-of-year
- duration (hours)
- area (ha)
- growth_rat: growth rate (ha/h)
- ros: rate of spread (m/h)
- spdir: spread direction
- max_ros: maximum rate of spread (m/h)
- FRE: Fire Radiative Energy
- FRE_perc: percentage of FRE observations
- FRE_flux: Fire Radiative Energy flux (TJ/ha.h)



### Data cleaning

In [222]:
# Checking df info like total n of entries, columns, null values and data types

firespread.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 18 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   fid         270 non-null    int64  
 1   fname       270 non-null    object 
 2   burn_perio  270 non-null    int64  
 3   year        270 non-null    int64  
 4   sdate       270 non-null    object 
 5   edate       270 non-null    object 
 6   qc          270 non-null    int64  
 7   inidoy      270 non-null    float64
 8   enddoy      270 non-null    float64
 9   duration    270 non-null    float64
 10  area        270 non-null    float64
 11  growth_rat  270 non-null    float64
 12  ros         270 non-null    float64
 13  spdir       270 non-null    float64
 14  max_ros     270 non-null    float64
 15  FRE         270 non-null    float64
 16  FRE_perc    270 non-null    float64
 17  FRE_flux    270 non-null    float64
dtypes: float64(11), int64(4), object(3)
memory usage: 38.1+ KB


In [223]:
# Dropping unnecessary columns from the df

firespread = firespread.drop(['burn_perio', 'qc'], axis=1)
firespread.head()

Unnamed: 0,fid,fname,year,sdate,edate,inidoy,enddoy,duration,area,growth_rat,ros,spdir,max_ros,FRE,FRE_perc,FRE_flux
0,29,Abrantes_09082017,2017,2017-08-09 18:00,2017-08-09 23:00,221.75,221.958333,5.0,500.0,100.00016,1199.623292,155.744885,1199.623292,-1.0,60.0,-1.0
1,29,Abrantes_09082017,2017,2017-08-09 23:00,2017-08-10 03:00,221.958333,222.125,4.0,195.75,48.937402,388.377944,181.15208,393.534571,-1.0,12.5,-1.0
2,29,Abrantes_09082017,2017,na,2017-08-10 12:00,-1.0,222.5,-1.0,191.75,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0
3,29,Abrantes_09082017,2017,2017-08-10 12:00,2017-08-10 14:30,222.5,222.604167,2.5,1184.25,473.698484,1088.042456,238.370571,1088.042456,11.37195,100.0,3.841064
4,29,Abrantes_09082017,2017,2017-08-10 14:30,2017-08-10 22:00,222.604167,222.916667,7.5,1552.5,207.0,580.932693,170.189772,585.006347,40.08456,100.0,3.442582


In [224]:
# Using replace() to transform na strings into NaN values and then use dropna() to eliminate 
# them from the dataset 

firespread = firespread.replace('na', np.nan)
firespread = firespread.dropna()

In [225]:
firespread.head()

Unnamed: 0,fid,fname,year,sdate,edate,inidoy,enddoy,duration,area,growth_rat,ros,spdir,max_ros,FRE,FRE_perc,FRE_flux
0,29,Abrantes_09082017,2017,2017-08-09 18:00,2017-08-09 23:00,221.75,221.958333,5.0,500.0,100.00016,1199.623292,155.744885,1199.623292,-1.0,60.0,-1.0
1,29,Abrantes_09082017,2017,2017-08-09 23:00,2017-08-10 03:00,221.958333,222.125,4.0,195.75,48.937402,388.377944,181.15208,393.534571,-1.0,12.5,-1.0
3,29,Abrantes_09082017,2017,2017-08-10 12:00,2017-08-10 14:30,222.5,222.604167,2.5,1184.25,473.698484,1088.042456,238.370571,1088.042456,11.37195,100.0,3.841064
4,29,Abrantes_09082017,2017,2017-08-10 14:30,2017-08-10 22:00,222.604167,222.916667,7.5,1552.5,207.0,580.932693,170.189772,585.006347,40.08456,100.0,3.442582
5,48,Agueda_05092019,2019,2019-09-05 09:30,2019-09-05 18:00,248.395833,248.75,8.5,123.75,14.55881,346.705124,249.142735,924.814568,8.46612,89.612188,8.048599


In [226]:
# Using astype("string") to change fname type from object to string and then use str.replace() 
# to clean the name of the location so it's more perceptible 
firespread['fname']= firespread['fname'].astype("string")
firespread['fname'] = firespread['fname'].str.replace('\d+', '')
firespread['fname'] = firespread['fname'].str.replace('_', '')

  firespread['fname'] = firespread['fname'].str.replace('\d+', '')


In [227]:
#firespread['fname'] = [re.sub(r"(\w)([A-Z])", r"\1 \2", ele) for ele in firespread['fname']]

In [228]:
#firespread['fname'] = [re.sub(r"de([A-Z])", r"de\1", ele) for ele in firespread['fname']]

In [229]:
name_list = [
    'Abrantes',
    'Agueda',
    'Alcobaca',
    'Alijo',
    'Aljezur',
    'Alvaiazere',
    'Anadia',
    'Arcos',
    'de',
    'Valdevez',
    'Arganil',
    'Arouca',
    'Avis',
    'Baiao',
    'Boticas',
    'Cabeceiras',
    'Basto',
    'Caminha',
    'Castelo',
    'Branco',
    'Castro',
    'Daire',
    'Castro',
    'Marim',
    'Chaves',
    'Cinfaes',
    'Figueira',
    'da',
    'Foz',
    'Freixo',
    'Espada',
    'Cinta',
    'Fundao',
    'Gois',
    'Gouveia',
    'Guarda',
    'Idanha',
    'Nova',
    'Lousa',
    'Miranda',
    'Corvo',
    'Mirandela',
    'Mogadouro',
    'Moncao',
    'Monchique',
    'Mortagua',
    'Nisa',
    'Odemira',
    'Oleiros',
    'Oliveira',
    'Frades',
    'Ourique',
    'Pampilhos',
    'Serra',
    'Paredes',
    'Coura',
    'Pedrogao',
    'Grande',
    'Penedono',
    'Pombal',
    'Ponte',
    'Lima',
    'Porto',
    'Mos',
    'Proenca',
    'Nova',
    'Resende',
    'Ribeira',
    'Pena',
    'Sabugal',
    'Sao',
    'Joao',
    'Pesqueira',
    'Sernancelhe',
    'Serta',
    'Sever',
    'do',
    'Vouga',
    'Silves',
    'Tomar',
    'Torre',
    'Moncorvo',
    'Valenca',
    'Valpacos',
    'Vieira',
    'Minho',
    'Vila',
    'Nova',
    'Cerveira',
    'Vila',
    'Rei']

In [230]:
def separate_words(name):
    keywords = ["de", "da", "do", "dos", "das"]
    separated_name = ""
    for i, char in enumerate(name):
        if char.isupper() and i > 0 and name[i-1].islower():
            separated_name += " "
        separated_name += char
        if i < len(name)-1 and name[i+1:].lower() in keywords:
            separated_name += " "
    return separated_name

In [231]:
firespread['fname_separate'] = firespread['fname'].apply(separate_words)

In [232]:
firespread

Unnamed: 0,fid,fname,year,sdate,edate,inidoy,enddoy,duration,area,growth_rat,ros,spdir,max_ros,FRE,FRE_perc,FRE_flux,fname_separate
0,29,Abrantes,2017,2017-08-09 18:00,2017-08-09 23:00,221.750000,221.958333,5.0,500.00,100.000160,1199.623292,155.744885,1199.623292,-1.000000,60.000000,-1.000000,Abrantes
1,29,Abrantes,2017,2017-08-09 23:00,2017-08-10 03:00,221.958333,222.125000,4.0,195.75,48.937402,388.377944,181.152080,393.534571,-1.000000,12.500000,-1.000000,Abrantes
3,29,Abrantes,2017,2017-08-10 12:00,2017-08-10 14:30,222.500000,222.604167,2.5,1184.25,473.698484,1088.042456,238.370571,1088.042456,11.371950,100.000000,3.841064,Abrantes
4,29,Abrantes,2017,2017-08-10 14:30,2017-08-10 22:00,222.604167,222.916667,7.5,1552.50,207.000000,580.932693,170.189772,585.006347,40.084560,100.000000,3.442582,Abrantes
5,48,Agueda,2019,2019-09-05 09:30,2019-09-05 18:00,248.395833,248.750000,8.5,123.75,14.558810,346.705124,249.142735,924.814568,8.466120,89.612188,8.048599,Ague da
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,59,ViladeRei,2019,2019-07-20 14:30,2019-07-20 23:00,201.604167,201.958333,8.5,3141.75,369.618343,2258.715621,125.157120,5354.374991,86.076792,100.000000,3.223261,Vilade Rei
266,59,ViladeRei,2019,2019-07-20 18:00,2019-07-21 03:00,201.750000,202.125000,9.0,3015.25,335.027778,1040.574990,282.638762,5094.328506,77.008247,100.000000,2.837732,Vilade Rei
267,59,ViladeRei,2019,2019-07-21 14:30,2019-07-21 23:30,202.604167,202.979167,9.0,1984.00,220.444444,1265.237483,128.338786,3668.894012,72.506970,100.000000,4.060650,Vilade Rei
268,59,ViladeRei,2019,2019-07-22 14:00,2019-07-22 18:00,203.583333,203.750000,4.0,878.00,219.499561,1251.174723,112.528592,1584.593086,26.322301,100.000000,7.494960,Vilade Rei


In [110]:
#initializing Nominatim API
geolocator = Nominatim(user_agent="MyApp")

#function to geocode city name
def geocode_city(city):
    location=geolocator.geocode(city)
    if location is None:
        return None, None
    else:
        return location.latitude, location.longitude


In [113]:
firespread['latitude'], firespread['longitude'] = zip(*firespread['fname'].apply(geocode_city))

In [100]:
fs= firespread.groupby(['fid','fname']).agg({'sdate': 'min', 'edate': 'max', 'inidoy':'min', 'enddoy':'max', 'duration': 'sum', 'area':'sum', 'growth_rat':'mean', 'ros':'mean', 'spdir':'mean', 'FRE':'mean', 'FRE_perc':'mean', 'FRE_flux':'mean', 'max_ros':'max' })

In [101]:
fs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,sdate,edate,inidoy,enddoy,duration,area,growth_rat,ros,spdir,FRE,FRE_perc,FRE_flux,max_ros
fid,fname,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Gouveia,2015-08-10 14:30,2015-08-12 02:30,222.604167,224.104167,36.0,2204.75,60.145908,277.440072,69.142305,44.90421,100.0,7.586726,450.666613
2,Oleiros,2015-08-03 14:30,2015-08-04 03:00,215.604167,216.125,12.5,808.0,64.640041,409.342649,140.941171,-1.0,68.0,-1.0,409.342649
3,VilaNovadeCerveira,2015-08-08 12:00,2015-08-09 19:00,220.5,221.791667,17.5,2609.75,164.075609,547.867212,200.139984,13.74709,81.563108,1.805121,916.358599
4,Agueda,2016-08-08 04:00,2016-08-11 03:30,221.166667,224.145833,36.5,4873.75,128.932811,408.876597,213.962692,54.20412,98.039216,2.672113,600.453535
5,Anadia,2016-08-10 02:30,2016-08-11 03:30,223.104167,224.145833,25.0,2816.25,108.859269,334.541844,284.659449,46.13013,95.238095,2.686425,513.06099


In [102]:
fs['sdate'] = pd.to_datetime(fs['sdate'], format = "%Y-%m-%d")

In [103]:
fs['edate'] = pd.to_datetime(fs['edate'], format = "%Y-%m-%d")

In [104]:
fs.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 80 entries, (1, 'Gouveia') to (80, 'Mogadouro')
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   sdate       80 non-null     datetime64[ns]
 1   edate       80 non-null     datetime64[ns]
 2   inidoy      80 non-null     float64       
 3   enddoy      80 non-null     float64       
 4   duration    80 non-null     float64       
 5   area        80 non-null     float64       
 6   growth_rat  80 non-null     float64       
 7   ros         80 non-null     float64       
 8   spdir       80 non-null     float64       
 9   FRE         80 non-null     float64       
 10  FRE_perc    80 non-null     float64       
 11  FRE_flux    80 non-null     float64       
 12  max_ros     80 non-null     float64       
dtypes: datetime64[ns](2), float64(11)
memory usage: 9.5+ KB


In [105]:
fs['month']=fs['sdate'].dt.month
fs['day']=fs['sdate'].dt.day
fs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,sdate,edate,inidoy,enddoy,duration,area,growth_rat,ros,spdir,FRE,FRE_perc,FRE_flux,max_ros,month,day
fid,fname,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,Gouveia,2015-08-10 14:30:00,2015-08-12 02:30:00,222.604167,224.104167,36.0,2204.75,60.145908,277.440072,69.142305,44.90421,100.0,7.586726,450.666613,8,10
2,Oleiros,2015-08-03 14:30:00,2015-08-04 03:00:00,215.604167,216.125,12.5,808.0,64.640041,409.342649,140.941171,-1.0,68.0,-1.0,409.342649,8,3
3,VilaNovadeCerveira,2015-08-08 12:00:00,2015-08-09 19:00:00,220.5,221.791667,17.5,2609.75,164.075609,547.867212,200.139984,13.74709,81.563108,1.805121,916.358599,8,8
4,Agueda,2016-08-08 04:00:00,2016-08-11 03:30:00,221.166667,224.145833,36.5,4873.75,128.932811,408.876597,213.962692,54.20412,98.039216,2.672113,600.453535,8,8
5,Anadia,2016-08-10 02:30:00,2016-08-11 03:30:00,223.104167,224.145833,25.0,2816.25,108.859269,334.541844,284.659449,46.13013,95.238095,2.686425,513.06099,8,10


In [106]:
fs['month'].value_counts()

8     33
9     15
7     15
10    12
6      5
Name: month, dtype: int64

In [107]:
fs.describe()

Unnamed: 0,inidoy,enddoy,duration,area,growth_rat,ros,spdir,FRE,FRE_perc,FRE_flux,max_ros,month,day
count,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0
mean,230.983594,232.342708,24.5125,5531.506285,225.540506,782.691529,148.659009,20.533069,71.648553,3.788799,1978.567848,8.175,12.7125
std,31.406457,31.254736,21.149554,8920.920656,403.831265,569.852974,68.683586,28.465247,33.178894,5.230344,1788.141167,1.09977,7.082147
min,161.5625,161.770833,1.5,209.25,12.004656,138.6253,6.034646,-1.0,0.0,-1.0,200.012043,6.0,1.0
25%,213.348958,213.776042,9.875,670.62515,54.465301,392.049216,96.549034,0.984988,51.969538,-0.181224,797.478636,7.75,7.0
50%,223.072917,225.354167,17.75,1553.5,108.694347,664.27063,154.633109,11.397697,90.638062,2.519281,1320.104079,8.0,10.5
75%,250.53125,251.505208,32.625,6007.50005,186.270672,997.925588,196.978839,26.15571,98.183751,5.975877,2442.762391,9.0,17.0
max,288.791667,290.166667,114.0,45249.25,3115.672163,3262.405676,284.659449,147.588138,100.0,32.187067,8956.19853,10.0,30.0
