In [49]:
import pandas as pd
from plotly import express as px

In [2]:
df = pd.read_csv('amazon.csv', encoding='iso-8859-1')

In [4]:
df['date'] = pd.to_datetime(df['date'])

## 3. Check datatypes

In [5]:
df.dtypes

year               int64
state             object
month             object
number           float64
date      datetime64[ns]
dtype: object

## 4

In [6]:
df.head()

Unnamed: 0,year,state,month,number,date
0,1998,Acre,Janeiro,0.0,1998-01-01
1,1999,Acre,Janeiro,0.0,1999-01-01
2,2000,Acre,Janeiro,0.0,2000-01-01
3,2001,Acre,Janeiro,0.0,2001-01-01
4,2002,Acre,Janeiro,0.0,2002-01-01


## 5

In [7]:
df.tail()

Unnamed: 0,year,state,month,number,date
6449,2012,Tocantins,Dezembro,128.0,2012-01-01
6450,2013,Tocantins,Dezembro,85.0,2013-01-01
6451,2014,Tocantins,Dezembro,223.0,2014-01-01
6452,2015,Tocantins,Dezembro,373.0,2015-01-01
6453,2016,Tocantins,Dezembro,119.0,2016-01-01


## 6

In [10]:
print(F"Rows: {df.shape[0]}")
print(F"Columns: {df.shape[1]}")

Rows: 6454
Columns: 5


## 7

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6454 entries, 0 to 6453
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   year    6454 non-null   int64         
 1   state   6454 non-null   object        
 2   month   6454 non-null   object        
 3   number  6454 non-null   float64       
 4   date    6454 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 252.2+ KB


## 8. Drop duplicates and show how many there were

In [26]:
len([x for x in df.duplicated(keep='first') if x])

32

In [27]:
df = df[~df.duplicated(keep='first')]

## 9. Check for nulls

In [31]:
df.isnull().any()

year      False
state     False
month     False
number    False
date      False
dtype: bool

## 10. Get statistics

In [33]:
df.describe(include='all')

  df.describe(include='all')


Unnamed: 0,year,state,month,number,date
count,6422.0,6422,6422,6422.0,6422
unique,,23,12,,20
top,,Rio,Agosto,,2015-01-01 00:00:00
freq,,697,540,,324
first,,,,,1998-01-01 00:00:00
last,,,,,2017-01-01 00:00:00
mean,2007.490969,,,108.815178,
std,5.731806,,,191.142482,
min,1998.0,,,0.0,
25%,2003.0,,,3.0,


## 11. Translate months to English

In [34]:
set(df['month'])

{'Abril',
 'Agosto',
 'Dezembro',
 'Fevereiro',
 'Janeiro',
 'Julho',
 'Junho',
 'Maio',
 'Marï¿½o',
 'Novembro',
 'Outubro',
 'Setembro'}

In [42]:
month_map = {'Abril': 'April', 'Agosto': 'August', 'Dezembro': 'December', 'Fevereiro': 'February',
                        'Janeiro': 'January', 'Julho': 'July', 'Junho': 'June', 'Maio': 'May', 
                        'Novembro': 'November', 'Outubro': 'October', 'Setembro': 'September', 'Marï¿½o': 'March'}
df['month'] = df['month'].map(lambda x: month_map[x])

## 12. Show how many fires were registered in total

In [44]:
len(df)

6422

## 13. Fires by month

In [52]:
fires_by_month = df.groupby('month').count().reset_index()[['month', 'year']].rename(columns={'year': 'fires'}).sort_values('fires', ascending=False)
px.bar(fires_by_month, x='month', y='fires')

## 14/17. Fires by year

In [55]:
fires_by_year = df.groupby('year').count().reset_index()[['year', 'month']].rename(columns={'month': 'fires'}).sort_values('fires', ascending=False)
px.bar(fires_by_year, x='year', y='fires')

## 15/20. Fires by state

In [56]:
fires_by_year = df.groupby('state').count().reset_index()[['state', 'month']].rename(columns={'month': 'fires'}).sort_values('fires', ascending=False)
px.bar(fires_by_year, x='state', y='fires')

## 16. Fires in the Amazonas state

In [57]:
len(df[df['state'] == 'Amazonas'])

239

## 19. Total number of fires in 2015 by month

In [59]:
fires_2015 = df[df['year'] == 2015].groupby('month').count().reset_index()[['month', 'state']].rename(columns={'state': 'fires'}).sort_values('fires', ascending=False)
px.bar(fires_2015, x='month', y='fires')

## 21. State names in december

In [63]:
df[df['month'] == 'December'][['state']].value_counts()

state           
Rio                 57
Paraiba             38
Mato Grosso         38
Acre                19
Sergipe             19
Sao Paulo           19
Santa Catarina      19
Roraima             19
Rondonia            19
Piau                19
Pernambuco          19
Parï¿½              19
Minas Gerais        19
Alagoas             19
Maranhao            19
Goias               19
Espirito Santo      19
Distrito Federal    19
Ceara               19
Bahia               19
Amazonas            19
Amapa               19
Tocantins           19
dtype: int64