# Goold ol' imports

In [1697]:
import pandas as pd
import pandas_profiling
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
import autopep8

In [1698]:
from math import pi
from bokeh.io import curdoc, show, output_notebook, reset_output
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool, LabelSet
from bokeh.transform import dodge, cumsum
from bokeh.core.properties import value
from bokeh.layouts import column
from bokeh.palettes import Spectral4, Spectral11

reset_output()
output_notebook()

curdoc().theme = 'dark_minimal'
_tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'

In [1699]:
WIDTH = 900
HEIGHT = 700

# Setting themes

In [1700]:
sns.set_palette('rainbow')
plt.style.use("dark_background")

params = {
    'legend.fontsize': 'x-large',
    'figure.figsize': (20, 15),
    'axes.grid': False,
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}

pylab.rcParams.update(params)

# Read that data

In [1701]:
df = pd.read_csv('forest-fires.csv', encoding='latin1',
                 parse_dates=['date'], index_col='date')

In [1702]:
df.head()

Unnamed: 0_level_0,year,state,month,number
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1998-01-01,1998,Acre,Janeiro,0.0
1999-01-01,1999,Acre,Janeiro,0.0
2000-01-01,2000,Acre,Janeiro,0.0
2001-01-01,2001,Acre,Janeiro,0.0
2002-01-01,2002,Acre,Janeiro,0.0


In [1703]:
# this will generate a profile report of the data. Takes around 30 seconds to finish execution.

# report = df.profile_report()
# report

# Dump me a little math?

In [1704]:
df.describe()

Unnamed: 0,year,number
count,6454.0,6454.0
mean,2007.461729,108.293163
std,5.746654,190.812242
min,1998.0,0.0
25%,2002.0,3.0
50%,2007.0,24.0
75%,2012.0,113.0
max,2017.0,998.0


# Info, please.

In [1705]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6454 entries, 1998-01-01 to 2016-01-01
Data columns (total 4 columns):
year      6454 non-null int64
state     6454 non-null object
month     6454 non-null object
number    6454 non-null float64
dtypes: float64(1), int64(1), object(2)
memory usage: 252.1+ KB


# Line Charts and Pie Charts

## Total Fires per year

In [1706]:
data = df.groupby('year')['number'].sum().reset_index(name='count')

data['year'] = data['year'].astype(str)

source = ColumnDataSource(data)

p = figure(x_range=data['year'], plot_height=HEIGHT, plot_width=WIDTH,
           title="Total fires by Year", tools=_tools_to_show)

p.vbar(x='year', top='count', width=0.9, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.y_range.start = 20000

hover = p.select(dict(type=HoverTool))
hover.tooltips = [("Year", "@year"), ("Fires", "@count{int}")]

show(p)

In [1707]:
TOOLTIPS = [
    ("Year", "@year"),
    ("Fires", "@count"),
    ("Percentage", "@percentage%")
]

# Bokeh only supports 20 colors so here we slice our original df to accomodate that.
dummy_data = data

dummy_data["count"] = dummy_data['count'].astype(int)
dummy_data['percentage'] = dummy_data['count'].apply(
    lambda x: (x/sum(dummy_data['count']))*100)

dummy_data['percentage'] = dummy_data['percentage'].astype(dtype='float16')

dummy_data['angle'] = dummy_data['count']/dummy_data['count'].sum() * 2*pi
dummy_data['color'] = Category20c[len(dummy_data)]

p = figure(plot_height=HEIGHT, plot_width=WIDTH, title="Total fires by Year",
           tools=_tools_to_show, tooltips=TOOLTIPS, x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.3,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='year', source=dummy_data)

dummy_data["count"] = dummy_data['count'].astype(str)
dummy_data["count"] = dummy_data["count"].str.pad(45, side="left")
source = ColumnDataSource(dummy_data)

labels = LabelSet(x=0, y=1, text='count', level='glyph',
                  angle=cumsum('angle', include_zero=True), source=source, render_mode='canvas')

p.add_layout(labels)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

dummy_data["count"] = dummy_data['count'].astype(int)

## Total Fires per month

In [1708]:
data = df.groupby('month')['number'].sum().reset_index(
    name='count').sort_values('count')

source = ColumnDataSource(data)

p = figure(x_range=data['month'], plot_height=HEIGHT, plot_width=WIDTH,
           title="Total fires by month", tools=_tools_to_show)

p.vbar(x='month', top='count', width=0.9, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.y_range.start = 0

hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("Month", "@month"),
    ("Fires", "@count{int}")]

show(p)

In [1709]:
TOOLTIPS = [
    ("Month", "@month"),
    ("Fires", "@count"),
    ("Percentage", "@percentage%")
]

# Bokeh only supports 20 colors so here we slice our original df to accomodate that.
dummy_data = data

dummy_data["count"] = dummy_data['count'].astype(int)
dummy_data['percentage'] = dummy_data['count'].apply(
    lambda x: (x/sum(dummy_data['count']))*100).astype(dtype='float16')
# dummy_data['percentage'] = dummy_data['percentage']

dummy_data['angle'] = dummy_data['count']/dummy_data['count'].sum() * 2*pi
dummy_data['color'] = Category20c[len(dummy_data)]

p = figure(plot_height=HEIGHT, plot_width=WIDTH, title="Fires per month",
           tools=_tools_to_show, tooltips=TOOLTIPS, x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.3,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='month', source=dummy_data)

dummy_data["count"] = dummy_data['count'].astype(str)
dummy_data["count"] = dummy_data["count"].str.pad(45, side="left")
source = ColumnDataSource(dummy_data)

labels = LabelSet(x=0, y=1, text='count', level='glyph',
                  angle=cumsum('angle', include_zero=True), source=source, render_mode='canvas')

p.add_layout(labels)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

dummy_data["count"] = dummy_data['count'].astype(int)

## Total Fires per state

In [1710]:
data = df.groupby('state')['number'].sum().reset_index(
    name='count').sort_values('count')

data['state'] = data['state'].astype(str)
data['count'] = data['count'].astype(int)

source = ColumnDataSource(data)

p = figure(x_range=data['state'], plot_height=HEIGHT, plot_width=WIDTH,
           title="Total fires by state", tools=_tools_to_show)

p.vbar(x='state', top='count', width=0.9, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.y_range.start = 3000

hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("State", "@state"),
    ("Fires", "@count{int}")
]

show(p)

In [1711]:
TOOLTIPS = [
    ("State", "@state"),
    ("Fires", "@count"),
    ("Percentage", "@percentage%")
]

# Bokeh only supports 20 colors so here we slice our original df to accomodate that.
dummy_data = data[3:]

dummy_data['percentage'] = dummy_data['count'].apply(
    lambda x: (x/sum(dummy_data['count']))*100)
dummy_data['percentage'] = dummy_data['percentage'].astype(dtype='float16')

dummy_data['angle'] = dummy_data['count']/dummy_data['count'].sum() * 2*pi
dummy_data['color'] = Category20c[len(dummy_data)]

p = figure(plot_height=HEIGHT, plot_width=WIDTH, title="Total fires by state",
           tools=_tools_to_show, tooltips=TOOLTIPS, x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.3,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='state', source=dummy_data)

dummy_data["count"] = dummy_data['count'].astype(str)
dummy_data["count"] = dummy_data["count"].str.pad(45, side="left")
source = ColumnDataSource(dummy_data)

labels = LabelSet(x=0, y=1, text='count', level='glyph',
                  angle=cumsum('angle', include_zero=True), source=source, render_mode='canvas')

p.add_layout(labels)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

dummy_data["count"] = dummy_data['count'].astype(int)

# Time Series

## Total fires per year

In [1712]:
data = df.groupby('year')['number'].sum().reset_index(name='count')

data['year'] = data['year'].astype(str)

source = ColumnDataSource(data)

p = figure(x_range=data['year'], plot_height=HEIGHT, plot_width=WIDTH,
           title="Total fires by Year", tools=_tools_to_show)

p.line(x='year', y='count', line_width=3, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.y_range.start = 20000

hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("Year", "@year{int}"),
    ("Fires", "@count{int}")
]

show(p)

## Total Fires per month

In [1713]:
month_to_num = {
    "Janeiro": 1,
    "Fevereiro": 2,
    "Março": 3,
    "Abril": 4,
    "Maio": 5,
    "Junho": 6,
    "Julho": 7,
    "Agosto": 8,
    "Setembro": 9,
    "Outubro": 10,
    "Novembro": 11,
    "Dezembro": 12
}

In [1714]:
df['month_num'] = df.month.apply(lambda x: month_to_num[x])

In [1715]:
data = df.groupby(['month_num', 'month'])[
    'number'].sum().reset_index(name='count')

data['month_num'] = data['month_num'].astype(str)

source = ColumnDataSource(data)

p = figure(x_range=data['month_num'], plot_height=HEIGHT, plot_width=WIDTH,
           title="Total fires by month", tools=_tools_to_show)

p.line(x='month_num', y='count', line_width=3, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.y_range.start = 25000

hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("Month", "@month"),
    ("Fires", "@count{int}")
]

show(p)

## Firest States

### Per Year

In [1716]:
data = df.groupby(['state'])['number'].sum().reset_index(
    name='count').sort_values('count', ascending=False)

In [1717]:
firest_states = list(set(data[:10].state))

In [1718]:
data_lst = list()

for state in firest_states:
    temp = df[df.state == state]
    temp = temp.groupby('year')['number'].sum().reset_index(name='count')
    data_lst.append(temp)

In [1719]:
p = figure(plot_width=WIDTH, plot_height=HEIGHT)
p.title.text = 'Top 10 firest states: yearly count'

plots = []

for state, name, color in zip(data_lst, [str(state) for state in firest_states], Spectral11):
    plots.append(p.line(state['year'], state['count'],
                        line_width=3, color=color, alpha=0.8, legend=name))

p.legend.location = "top_left"
p.legend.click_policy = "hide"

show(p)

### Per Month

In [1720]:
data_lst = list()

for state in firest_states:
    temp = df[df.state == state]
    temp = temp.groupby('month_num')['number'].sum().reset_index(name='count')
    data_lst.append(temp)

In [1721]:
p = figure(plot_width=WIDTH, plot_height=HEIGHT)
p.title.text = 'Top 10 firest states: monthly count'

plots = []

for state, name, color in zip(data_lst, [str(state) for state in firest_states], Spectral11):
    plots.append(p.line(state['month_num'], state['count'],
                        line_width=3, color=color, alpha=0.8, legend=name))

p.legend.location = "top_left"
p.legend.click_policy = "hide"

show(p)

## Firest Years

In [1722]:
data = df.groupby(['year'])['number'].sum().reset_index(
    name='count').sort_values('count', ascending=False)

In [1723]:
firest_years = list(set(data[:10].year))

In [1724]:
data = list()

for year in firest_years:
    temp = df[df.year == int(year)]
    temp = temp.groupby('month_num')['number'].sum().reset_index(name='count')
    data.append(temp)

In [1725]:
p = figure(plot_width=WIDTH, plot_height=HEIGHT)
p.title.text = 'Top 10 firest years: monthly count'

plots = []

for state, name, color in zip(data, [str(yr) for yr in firest_years], Spectral11):
    plots.append(p.line(state['month_num'], state['count'],
                        line_width=3, color=color, alpha=0.8, legend=name))

p.legend.location = "top_left"
p.legend.click_policy = "hide"


show(p)