<a href="https://colab.research.google.com/github/SawsanYusuf/Brazil-Wildfire-Visualizations/blob/main/Brazil_fires_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<font size="+3"><strong>Fires Analysis: Brazil Wildfires</strong></font>

# **1. Data Preparation**

## **1.1. Import Libraries**

In [None]:
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline

In [None]:
df = pd.read_csv("/content/amazon.csv", encoding='ISO-8859-1')
df.head()

Unnamed: 0,year,state,month,number,date
0,1998,Acre,Janeiro,0.0,1998-01-01
1,1999,Acre,Janeiro,0.0,1999-01-01
2,2000,Acre,Janeiro,0.0,2000-01-01
3,2001,Acre,Janeiro,0.0,2001-01-01
4,2002,Acre,Janeiro,0.0,2002-01-01


## **1.2. Data Wrangling**

In [None]:
df.drop(columns = 'date', axis=1, inplace=True)

In [None]:
df['year'] = df['year'].astype(str)

In [None]:
months = {'Janeiro': 'January', 'Fevereiro': 'February', 'Março': 'March', 'Abril': 'April',
          'Maio': 'May', 'Junho': 'June', 'Julho': 'July', 'Agosto': 'August',
          'Setembro': 'September', 'Outubro': 'October', 'Novembro': 'November', 'Dezembro': 'December'}

#mapping our translated months
df['month']= df['month'].map(months)

#checking the month column for the second time after the changes were made
df.month.unique()

array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype=object)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6454 entries, 0 to 6453
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    6454 non-null   object 
 1   state   6454 non-null   object 
 2   month   6454 non-null   object 
 3   number  6454 non-null   float64
dtypes: float64(1), object(3)
memory usage: 201.8+ KB


# **2. Data Exploring**

## 1. How many fires were reported in 20 years?

In [None]:
df.number.sum()

698924.073

## 2. Occurrence of forest fire over the period of years

In [None]:
# Get data
year_fires = df.groupby(by = 'year')['number'].sum()

# Create the plot
fig = px.line(year_fires,
              x=year_fires.index,
              y = year_fires.values
)
# Update title and labels
fig.update_layout(xaxis_title="Year", yaxis_title="Total Number of Fires", title="Total Fires in Brazil : General Trend")

## 3. What year had the highest number of fires?

In [None]:
# Creat plot
fig = px.bar(year_fires,
              x=year_fires.index,
              y = year_fires.values,
              color = year_fires.index
)
# Update title and labels
fig.update_layout(xaxis_title="Year", yaxis_title="Total Number of Fires", title="Fires in Brazil : 1998 - 2017")

## 4. Report of forest fire across months over the period 1998–2017

In [None]:
# Get data
months_fires = df.groupby(by = 'month')['number'].sum()
# Create plot
fig = px.bar(months_fires,
              x=months_fires.index,
              y = months_fires.values
)
# Update title and labels
fig.update_layout(xaxis_title="Month", yaxis_title="Total Number of Fires", title="Total Number of Fire by Month")

## 5. What state had the highest number of fires?

In [None]:
# Get data
states_fires = df.groupby(by = 'state')['number'].sum()
# Create plot
fig = px.bar(states_fires,
              x=states_fires.index,
              y = states_fires.values,
              color=states_fires.index
)
# Update title and labels
fig.update_layout(xaxis_title="State", yaxis_title="Total Number of Fires", title="Total Number of Fire by State")

## 6. Occurence of forest fire reported on top 10 states for the period between 1998–2017 :

In [None]:
# Import data
data=pd.DataFrame(df.groupby('state')['number'].sum()).sort_values(by="number",ascending=False)
data=data[0:10]

# Create plot with `bar()` function
fig=px.bar(x=data.index,
           y=data.number,
           color=data.index)

# Update title and labels
fig.update_layout(xaxis_title="State", yaxis_title="Total Number of Fires", title="Total Number of Fire by State")                        #title=' Occurance of forest fire for top 10 states '))
fig.show()

In [None]:
data.reset_index(inplace=True)
data

Unnamed: 0,state,number
0,Mato Grosso,96246.028
1,Paraiba,52435.918
2,Sao Paulo,51121.198
3,Rio,45160.865
4,Bahia,44746.226
5,Piau,37803.747
6,Goias,37695.52
7,Minas Gerais,37475.258
8,Tocantins,33707.885
9,Amazonas,30650.129


## 7. Contribution of forest fire across years for top 10 states:

In [None]:
df_state = df.loc[:,['state','number']].groupby('state').sum().reset_index()
sorted_df = df_state.sort_values(['number'])
sorted_df['state'].unique()
state_10=[]
for i in range(10):
  state_10.append(sorted_df['state'].iloc[i])

In [None]:
# Get data
df_year_state = df.groupby(['year','state'],sort=False).sum().reset_index()
df_year_state_low = df_year_state[df_year_state['state'].isin(state_10)]

In [None]:
fig = px.bar(df_year_state_low,
             x="year",
             y="number",
             color = "state",
             barmode="group"
)
# Update title and labels
fig.update_layout(xaxis_title="Year", yaxis_title="Total Number of Fires", title="Total number of forest fire across each year for top 10 states")
fig.show()

## 8. Total number of forest fire across each month for top 10 states:

In [None]:
# Get data
df_month_state = df.groupby(['month','state'],sort=False).sum().reset_index()
df_month_state_low = df_month_state[df_month_state['state'].isin(state_10)]

In [None]:
fig = px.bar(df_month_state_low,
             x="month",
             y="number",
             color = "state",
             barmode="group"
)
# Update title and labels
fig.update_layout(xaxis_title="Months", yaxis_title="Total Number of Fires", title="Total number of forest fire across each month for top 10 states")
fig.show()

##  9. Percentage distribution of forest fires by across each state and month:

In [None]:
import plotly.graph_objects as go

In [None]:
a=pd.DataFrame(df.groupby('month')['number'].mean())
fig = go.Figure([go.Pie(labels=a.index, values=a.number, hole=0.3)],
               go.Layout(title=' % distribution of average forest fire across each month'))
fig.show()
#plotly.offline.plot(fig,filename='pie_month.html')

#Across states:
a=pd.DataFrame(df.groupby('state')['number'].mean())
fig = go.Figure([go.Pie(labels=a.index, values=a.number, hole=0.3)],
                go.Layout(title=' %  average distribution of forest fire across each state '))
fig.show()

## 10. Mapping locations of Brazil States:

In [None]:
lat=[-16.350000, -22.15847, -23.533773, -22.908333, -11.409874, -21.5089, -16.328547,
     -19.841644, -21.175, -3.416843]
long=[-56.666668, -43.29321, -46.625290, -43.196388, -41.280857, -43.3228, -48.953403,
     -43.986511, -43.01778, -65.856064]

In [None]:
df_geo = pd.DataFrame(
{
"state": data["state"],
"number":data["number"],
"Lat":lat,
'Long':long
}
)
df_geo

Unnamed: 0,state,number,Lat,Long
0,Mato Grosso,96246.028,-16.35,-56.666668
1,Paraiba,52435.918,-22.15847,-43.29321
2,Sao Paulo,51121.198,-23.533773,-46.62529
3,Rio,45160.865,-22.908333,-43.196388
4,Bahia,44746.226,-11.409874,-41.280857
5,Piau,37803.747,-21.5089,-43.3228
6,Goias,37695.52,-16.328547,-48.953403
7,Minas Gerais,37475.258,-19.841644,-43.986511
8,Tocantins,33707.885,-21.175,-43.01778
9,Amazonas,30650.129,-3.416843,-65.856064


In [None]:
fig = px.scatter_geo(data_frame=df_geo, scope='south america',lat='Lat',lon='Long',
                     size='number', color='state', projection='hammer')
fig.update_layout(
        title_text = '1998-2017 Top-10 States in Brazil with reported fires')
fig.show()