# Extra data

The following data was obtained and analyzed for comparison purposes only. 

In [2]:
# Import the necessary libraries

import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import re
import requests
import json
import lxml.html as lh
import requests

### 1. Wildfires data for Brazil as a whole per month in the period 1998-2017.

In [3]:
# Impor the data

fires_month = pd.read_csv("../Data/extra_data/1_rf_incendiosflorestais_focoscalor_brasil_1998-2017.csv")

In [4]:
# Separate the data into columns
# First split the strings

fires_month = fires_month["Ano;Mês;Número;Período"].str.split(";", expand=True)

In [5]:
# Create new column names and then drop the previous columns

fires_month["year"] = fires_month[0]
fires_month["month"] = fires_month[1]
fires_month["number"] = fires_month[2]
fires_month["period"] = fires_month[3]

fires_month.drop(columns=[0, 1, 2, 3], inplace=True)

In [6]:
# Eliminate the rows that do not fit

fires_month = fires_month[~fires_month.year.str.contains("Máximo")]

In [7]:
fires_month = fires_month[~fires_month.year.str.contains("Média")]

In [8]:
fires_month = fires_month[~fires_month.year.str.contains("Mínimo")]

In [9]:
fires_month = fires_month[~fires_month.month.str.contains("Total")]

In [10]:
# Convert the year column into integer type

fires_month["year"] = fires_month.year.astype('int64')

In [11]:
# Create a dictionary with the months id's

months = {"Janeiro": 1, 
          "Fevereiro": 2, 
          "Março": 3, 
          "Abril": 4, 
          "Maio": 5, 
          "Junho": 6, 
          "Julho": 7, 
          "Agosto": 8, 
          "Setembro": 9, 
          "Outubro": 10, 
          "Novembro": 11, 
          "Dezembro": 12}

In [12]:
# Replace the column "month" with month id's

fires_month = fires_month.replace({"month": months})

In [13]:
# Convert the month column into integer type

fires_month["number"] = fires_month.number.astype('int64')

In [14]:
# Convert the period column into a date type

fires_month["period"] = fires_month.period.astype('datetime64[ns]')

In [15]:
fires_month.dtypes

year               int64
month              int64
number             int64
period    datetime64[ns]
dtype: object

In [16]:
# Verify the data for each year

pd.Series(fires_month["year"]).value_counts()

2007    12
2016    12
1999    12
2000    12
2001    12
2002    12
2003    12
2004    12
2005    12
2006    12
1998    12
2008    12
2009    12
2010    12
2011    12
2012    12
2013    12
2014    12
2015    12
2017    11
Name: year, dtype: int64

In [17]:
# We can see that there are entries for each year, except december 2017

In [19]:
# Save the data frame to a csv file

fires_month.to_csv("../Data/extra_data/1_fires_month.csv", index=False)

### 2. Wildfires data for Brazil's Legal Amazon (BLA) per month in the period 1999-2019.

In [20]:
# Import the data 

fires_bla = pd.read_csv("../Data/extra_data/2_inpe_brazilian_amazon_fires_1999_2019.csv")

In [21]:
# Check for missing values

fires_bla.isnull().sum()

year         0
month        0
state        0
latitude     0
longitude    0
firespots    0
dtype: int64

In [22]:
# Convert to title the states

fires_bla["state"] = fires_bla["state"].str.title()

In [23]:
# Verify the data for each state

pd.Series(fires_bla["state"]).value_counts()

Mato Grosso    252
Amazonas       250
Para           250
Rondonia       246
Roraima        243
Maranhao       241
Tocantins      221
Acre           204
Amapa          197
Name: state, dtype: int64

In [24]:
# We can see that only 9 states are present in this data frame. 
# These states belong to Brazil's Legal Amazon (BLA), which is the largest socio-geographic division in Brazil.
# We can also see that the number of entries for each state varies.

In [25]:
# Save the data frame to a csv file

fires_bla.to_csv("../Data/clean_data/2_fires_bla.csv", index=False)

### 3. Severity of climatic phenomena El Nino and La Nina in the period 1999-2019.

In [28]:
# Import the data 

ninos = pd.read_csv("../Data/extra_data/3_el_nino_la_nina_1999_2019.csv")

In [29]:
# Create a dictionary with the severity level as numeric value

severity_dict = {"Weak": 1, 
                 "Moderate": 2, 
                 "Strong": 3, 
                 "Very Strong": 4}

In [30]:
# Create a new column with the severity level

ninos["severity_level"] = ninos["severity"].map(severity_dict)

In [31]:
ninos

Unnamed: 0,start year,end year,phenomenon,severity,severity_level
0,2004,2005,El Nino,Weak,1
1,2006,2007,El Nino,Weak,1
2,2014,2015,El Nino,Weak,1
3,2018,2019,El Nino,Weak,1
4,2002,2003,El Nino,Moderate,2
5,2009,2010,El Nino,Moderate,2
6,2015,2016,El Nino,Very Strong,4
7,2000,2001,La Nina,Weak,1
8,2005,2006,La Nina,Weak,1
9,2008,2009,La Nina,Weak,1


In [32]:
# Save the data frame to a csv file

ninos.to_csv("../Data/extra_data/3_ninos.csv", index=False)