In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import pprint
import csv
import random
import seaborn as sns
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.linear_model import LinearRegression

In [3]:
#open the precipitation file
# The Rondonia state (RO) is missing from the Rain data, the merged data will have only 8 of the 9 states from Legal Amazon.
rain = "../output_data/precip_state_year.csv"
rain_df = pd.read_csv(rain)
rain_df = rain_df[["state", "year", "precipitation" ]]
rain_df

Unnamed: 0,state,year,precipitation
0,AC,1998,2296.1
1,AC,1999,2182.2
2,AC,2000,1703.5
3,AC,2001,1926.9
4,AC,2002,1909.9
...,...,...,...
155,TO,2013,1849.3
156,TO,2014,1843.9
157,TO,2015,1393.7
158,TO,2016,1270.5


In [20]:
yeargroup_df = rain_new.groupby(["year"])
precipitation = yeargroup_df.sum()["precipitation"]
p_df = pd.DataFrame(precipitation)

p_df = p_df.reset_index()
p_df

Unnamed: 0,year,precipitation
0,1998,16444.3
1,1999,18499.7
2,2000,18510.7
3,2001,16345.5
4,2002,15949.3
5,2003,15834.6
6,2004,17218.5
7,2005,16782.1
8,2006,19357.0
9,2007,17513.3


In [22]:
fire = "data_joao/forest_fire_1998-2017_new.csv"
fire_csv = pd.read_csv(fire)
fire_new = fire_csv.loc[fire_csv['State'].isin(["AC", "AM", "AP", "MA", "MT", "PA", "RR", "TO"])]
fire_new

Unnamed: 0,Ano,State,Month,Number,Date
0,1998,AC,Jan,0,1/1/1998
1,1999,AC,Jan,0,1/1/1999
2,2000,AC,Jan,0,1/1/2000
3,2001,AC,Jan,0,1/1/2001
4,2002,AC,Jan,0,1/1/2002
...,...,...,...,...,...
6449,2012,TO,Dec,128,1/1/2012
6450,2013,TO,Dec,85,1/1/2013
6451,2014,TO,Dec,223,1/1/2014
6452,2015,TO,Dec,373,1/1/2015


In [23]:
yeargroup_df2 = fire_new.groupby(["Ano"])
fire = yeargroup_df2.sum()["Number"]
fire_sum_df = pd.DataFrame(fire)

fire_sum_df = fire_sum_df.reset_index()
fire_sum_df = fire_sum_df.rename(columns={"Ano":"year", "Number":"Fires"})
fire_sum_df

Unnamed: 0,year,Fires
0,1998,82676
1,1999,81896
2,2000,61698
3,2001,87789
4,2002,133049
5,2003,115997
6,2004,169782
7,2005,132937
8,2006,80497
9,2007,114561


In [24]:
# fire_df = fire_df.rename(columns={"Ano":"year", "Number":"Fires"})
# fire_df

In [25]:
merge_table = pd.merge(p_df, fire_sum_df, on="year")
merge_table

Unnamed: 0,year,precipitation,Fires
0,1998,16444.3,82676
1,1999,18499.7,81896
2,2000,18510.7,61698
3,2001,16345.5,87789
4,2002,15949.3,133049
5,2003,15834.6,115997
6,2004,17218.5,169782
7,2005,16782.1,132937
8,2006,19357.0,80497
9,2007,17513.3,114561


In [26]:
plt.plot(merge_table.year, merge_table.precipitation)
plt.plot(merge_table.year, merge_table.Fires)

tick_locations = [value for value in merge_table.year]
plt.xticks(tick_locations, ['1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017'], rotation=90)

plt.title('Fire Count vs Precipitation Measurement (mm)')
plt.xlabel('Year')
plt.ylabel('Fire and Precipitation')

plt.legend()
plt.show()

<IPython.core.display.Javascript object>

No handles with labels found to put in legend.


In [50]:
# Create 2 bar graph in the same fig for Annual Deforestation and rain in Legal Amazon.
x_axis_merge_table = list(merge_table.year)
tick_locations = [value for value in x_axis_merge_table]

fig, ax1 = plt.subplots(figsize=(20, 10))

# Deforestation
for index, row in merge_table.iterrows():
    def_graph = ax1.bar(merge_table["year"][index], merge_table["Fires"][index],  color='r', alpha=.8,\
                        align="edge",width=-0.45)
ax1.set_title("Annual Fire and Rain in Legal Amazon", fontweight = "bold", fontsize = 15)
ax1.set_xlabel("Year",fontsize = 18)
ax1.set_ylabel("Number of fires",fontsize = 18, color ='r')
ax1.tick_params(axis='y',labelcolor='tab:red', labelsize = 18)

# Rain
ax2 = ax1.twinx()
for index, row in merge_table.iterrows():
    rain_graph = ax2.bar(merge_table["year"][index], merge_table["precipitation"][index],  color='b', alpha=.8,\
                         align="edge", width=0.45)
ax2.set_ylabel("mm fo rain",fontsize = 18, color ='b')
ax2.tick_params(axis='y',labelcolor='tab:blue', labelsize = 18)

#other configurations
plt.xticks(tick_locations, x_axis_merge_table)
plt.tight_layout()
ax1.grid()

#save figure
plt.savefig("output_data/fire_rain_year.png", dpi=fig.dpi)
plt.show()

<IPython.core.display.Javascript object>

In [27]:
state_fire = fire_new.groupby(["State"])
fire_average = state_fire.mean()["Number"]
fire_average_df = pd.DataFrame(fire_average)

fire_average_df = fire_average_df.reset_index()
fire_average_df = fire_average_df.rename(columns={"State":"state", "Number":"Number of Fires"})
fire_average_df

Unnamed: 0,state,Number of Fires
0,AC,285.962343
1,AM,491.493724
2,AP,105.087866
3,MA,1570.205021
4,MT,2738.870293
5,PA,2417.74477
6,RR,123.238494
7,TO,888.83682


In [28]:
# fire_average_df = fire_average_df.rename(columns={"State":"state", "Number":"Number of Fires"})
# fire_average_df

In [12]:
state_rain = rain_df.groupby(["state", "year"])
rain_sum = state_rain.sum()["precipitation"]

rain_sum = pd.DataFrame(rain_sum)

rain_sum = rain_sum.reset_index()


rain_sum

Unnamed: 0,state,year,precipitation
0,AC,1998,2296.1
1,AC,1999,2182.2
2,AC,2000,1703.5
3,AC,2001,1926.9
4,AC,2002,1909.9
...,...,...,...
490,TO,2013,1849.3
491,TO,2014,1843.9
492,TO,2015,1393.7
493,TO,2016,1270.5


In [29]:
rain_avg = rain_new.groupby(["state"])
rain_average = rain_avg.mean()["precipitation"]
rain_average_df = pd.DataFrame(rain_average)

rain_average_df = rain_average_df.reset_index()
rain_average_df

Unnamed: 0,state,precipitation
0,AC,166.022176
1,AM,193.74477
2,AP,207.713445
3,MA,174.100837
4,MT,120.525877
5,PA,275.70251
6,RR,150.466946
7,TO,150.956485


In [30]:
merge_average = pd.merge(fire_average_df, rain_average_df, on="state")
merge_average

Unnamed: 0,state,Number of Fires,precipitation
0,AC,285.962343,166.022176
1,AM,491.493724,193.74477
2,AP,105.087866,207.713445
3,MA,1570.205021,174.100837
4,MT,2738.870293,120.525877
5,PA,2417.74477,275.70251
6,RR,123.238494,150.466946
7,TO,888.83682,150.956485


In [31]:
# Create 2 bar graph in the same fig for Annual Deforestation and rain in Legal Amazon.
fig, ax1 = plt.subplots(figsize=(8, 5))

# Deforestation
for index, row in merge_average.iterrows():
    def_graph = ax1.bar(merge_average["state"][index], merge_average["Number of Fires"][index],  color='r', alpha=.8,\
                        align="edge",width=-0.45)
ax1.set_title("Fire vs Rain : Average 1998-2017", fontweight = "bold", fontsize = 15)
ax1.set_xlabel("States",fontsize = 18)
ax1.set_ylabel("Number of Fires",fontsize = 18, color ='r')
ax1.tick_params(axis='y',labelcolor='tab:red')

# Rain
ax2 = ax1.twinx()
for index, row in merge_average.iterrows():
    rain_graph = ax2.bar(merge_average["state"][index], merge_average["precipitation"][index],  color='b', alpha=.8,\
                         align="edge", width=0.45)
ax2.set_ylabel("Precipitation (mm)",fontsize = 18, color ='b')
ax2.tick_params(axis='y',labelcolor='tab:blue')

#other configurations
#plt.xticks(tick_locations, rotation=90)
plt.tight_layout()
#ax.grid()
#save figure
plt.savefig("Fire vs Rain - Annual Average.png", dpi=fig.dpi)
plt.show()

<IPython.core.display.Javascript object>

In [16]:
rain_amazon = rain_sum[rain_sum['state'].str.contains("AC")|\
               rain_sum['state'].str.contains("AM")|\
               rain_sum['state'].str.contains("AP")|\
               rain_sum['state'].str.contains("MA")|\
               rain_sum['state'].str.contains("MT")|\
               rain_sum['state'].str.contains("PA")|\
               rain_sum['state'].str.contains("RO")|\
               rain_sum['state'].str.contains("RR")|\
               rain_sum['state'].str.contains("TO")]
rain_amazon

Unnamed: 0,state,year,precipitation
0,AC,1998,2296.1
1,AC,1999,2182.2
2,AC,2000,1703.5
3,AC,2001,1926.9
4,AC,2002,1909.9
...,...,...,...
490,TO,2013,1849.3
491,TO,2014,1843.9
492,TO,2015,1393.7
493,TO,2016,1270.5


In [17]:
fire_sum = fire_csv.groupby(["State", "Ano"])
fire_sum = fire_sum.sum()["Number"]
fire_sum_df = pd.DataFrame(fire_sum)

fire_sum_df = fire_sum_df.reset_index()

new_fire_sum = fire_sum_df.rename(columns={"State":"state", "Number":"Number of Fires", "Ano":"year"})
new_fire_sum

Unnamed: 0,State,Ano,Number
0,AC,1998,730
1,AC,1999,333
2,AC,2000,434
3,AC,2001,828
4,AC,2002,3491
...,...,...,...
535,TO,2013,8127
536,TO,2014,7748
537,TO,2015,9123
538,TO,2016,14854


In [18]:
# new_fire_sum = fire_sum_df.rename(columns={"State":"state", "Number":"Number of Fires", "Ano":"year"})
# new_fire_sum

Unnamed: 0,state,year,Number of Fires
0,AC,1998,730
1,AC,1999,333
2,AC,2000,434
3,AC,2001,828
4,AC,2002,3491
...,...,...,...
535,TO,2013,8127
536,TO,2014,7748
537,TO,2015,9123
538,TO,2016,14854


In [19]:
fire_amazon = new_fire_sum[new_fire_sum['state'].str.contains("AC")|\
               new_fire_sum['state'].str.contains("AM")|\
               new_fire_sum['state'].str.contains("AP")|\
               new_fire_sum['state'].str.contains("MA")|\
               new_fire_sum['state'].str.contains("MT")|\
               new_fire_sum['state'].str.contains("PA")|\
               new_fire_sum['state'].str.contains("RO")|\
               new_fire_sum['state'].str.contains("RR")|\
               new_fire_sum['state'].str.contains("TO")]
fire_amazon

Unnamed: 0,state,year,Number of Fires
0,AC,1998,730
1,AC,1999,333
2,AC,2000,434
3,AC,2001,828
4,AC,2002,3491
...,...,...,...
535,TO,2013,8127
536,TO,2014,7748
537,TO,2015,9123
538,TO,2016,14854


In [20]:
merge_amazon_df = pd.merge(fire_amazon, rain_amazon, on=["state","year"])
merge_amazon_df

Unnamed: 0,state,year,Number of Fires,precipitation
0,AC,1998,730,2296.1
1,AC,1999,333,2182.2
2,AC,2000,434,1703.5
3,AC,2001,828,1926.9
4,AC,2002,3491,1909.9
...,...,...,...,...
155,TO,2013,8127,1849.3
156,TO,2014,7748,1843.9
157,TO,2015,9123,1393.7
158,TO,2016,14854,1270.5


In [21]:
merge2 = merge_amazon_df.groupby(["year"])
merge2_df = merge2.sum()
merge_amazon = pd.DataFrame(merge2_df)

merge_amazon = merge_amazon.reset_index()

merge_amazon

Unnamed: 0,year,Number of Fires,precipitation
0,1998,82676,16444.3
1,1999,81896,18499.7
2,2000,61698,18510.7
3,2001,87789,16345.5
4,2002,133049,15949.3
5,2003,115997,15834.6
6,2004,169782,17218.5
7,2005,132937,16782.1
8,2006,80497,19357.0
9,2007,114561,17513.3


In [22]:
x_axis = list(merge_amazon.year)

In [23]:
# Create 2 bar graph in the same fig for Annual Deforestation and rain in Legal Amazon.
fig, ax1 = plt.subplots(figsize=(8, 5))
plt.xticks(tick_locations, x_axis, rotation=90)
# Deforestation
for index, row in merge_amazon.iterrows():
    def_graph = ax1.bar(merge_amazon["year"][index], merge_amazon["Number of Fires"][index],  color='r', alpha=.8,\
                        align="edge",width=-0.45)
ax1.set_title("Fire vs Rain : Legal Amazon 1998-2017", fontweight = "bold", fontsize = 15)
ax1.set_xlabel("Years",fontsize = 18)
ax1.set_ylabel("Number of Fires",fontsize = 18, color ='r')
ax1.tick_params(axis='y',labelcolor='tab:red')

# Rain
ax2 = ax1.twinx()
for index, row in merge_amazon.iterrows():
    rain_graph = ax2.bar(merge_amazon["year"][index], merge_amazon["precipitation"][index],  color='b', alpha=.8,\
                         align="edge", width=0.45)
ax2.set_ylabel("Precipitation (mm)",fontsize = 18, color ='b')
ax2.tick_params(axis='y',labelcolor='tab:blue')

# x_axis = ['1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']
# # print(tick_locations)
# plt.tight_layout()
#ax.grid()
#save figure
plt.savefig("Fire vs Rain Legal Amazon.png", dpi=fig.dpi)
plt.show()

<IPython.core.display.Javascript object>

In [24]:
#linear regression
X = merge_amazon['Number of Fires'].values.reshape(-1, 1)
Y = merge_amazon['precipitation'].values.reshape(-1, 1)
linear_regressor = LinearRegression()
# perform linear regression
linear_regressor.fit(X, Y)
# make predictions
Y_pred = linear_regressor.predict(X)
# correlation matrices
#correlation coefficient

plt.scatter(X, Y)
plt.plot(X, Y_pred, color='red')
plt.show()
print(linear_regressor.coef_)
# Correlation Coefficient
# and R-square
merge_amazon.corr(method ='pearson')

model1=sm.OLS(X,Y)
result=model1.fit()
print(result.summary())

[[-0.02095198]]
                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.882
Model:                            OLS   Adj. R-squared (uncentered):              0.876
Method:                 Least Squares   F-statistic:                              142.4
Date:                Thu, 12 Dec 2019   Prob (F-statistic):                    2.85e-10
Time:                        18:20:25   Log-Likelihood:                         -238.79
No. Observations:                  20   AIC:                                      479.6
Df Residuals:                      19   BIC:                                      480.6
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------