Usando base de dados do [DATASUS TABNET](https://datasus.saude.gov.br/informacoes-de-saude-tabnet/) sobre [casos de Tuberculose](http://tabnet.datasus.gov.br/cgi/tabcgi.exe?sinannet/cnv/tubercbr.def) e [casos de Hanseníase](http://tabnet.datasus.gov.br/cgi/tabcgi.exe?sinannet/cnv/hanswbr.def).

In [30]:
import pandas as pd
from datetime import date
import plotly.express as px
import numpy as np

In [12]:
url_tuberculose = 'https://raw.githubusercontent.com/LucasGabrielB/Alura-Bootcamp-Data-Science-Aplicada/main/Modulo-04/datasets/TUBERCULOSE%20-%20CASOS%20CONFIRMADOS%20NOTIFICADOS%20NO%20SISTEMA%20DE%20INFORMA%C3%87%C3%83O%20DE%20AGRAVOS%20DE%20NOTIFICA%C3%87%C3%83O%20-%20BRASIL.csv'

df_tuberculose = pd.read_csv(url_tuberculose, skiprows=3, skipfooter=17, sep=';', encoding='ISO-8859-1', engine='python')

df_tuberculose.head()

Unnamed: 0,Ano Diagnóstico,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,Total
0,2001,8088,6542,8095,7305,7656,6805,6987,8065,6697,7524,6874,6627,87265
1,2002,8013,7346,7961,8771,7784,6713,7746,8330,7654,8200,7532,6809,92859
2,2003,8115,7985,7474,7975,8024,7082,7949,7585,8136,8521,7624,7303,93773
3,2004,7574,6743,8574,8089,7763,7314,7825,8232,7901,8016,7753,7196,92980
4,2005,7432,6748,8441,7958,8019,7781,7282,8391,7690,7186,7587,7541,92056


In [44]:
url_hanseniase = 'https://raw.githubusercontent.com/LucasGabrielB/Alura-Bootcamp-Data-Science-Aplicada/main/Modulo-04/datasets/ACOMPANHAMENTO%20DOS%20DADOS%20DE%20HANSEN%C3%8DASE%20-%20BRASIL.csv'

df_hanseniase = pd.read_csv(url_hanseniase, skiprows=3, skipfooter=11, sep=';', encoding='ISO-8859-1', engine='python', na_values='-')

# pegando apenas o periodo de 2001 a 2019
df_hanseniase.drop(np.arange(0, 27), inplace=True)

df_hanseniase = df_hanseniase.astype('int64')

df_hanseniase.head()

Unnamed: 0,Ano Diagnóstico,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,Total
27,2001,4107,4103,4787,4135,4724,3940,4116,4890,4035,4728,4197,3250,51012
28,2002,3982,4065,4566,5027,4663,4488,4971,5281,4805,5151,4679,3545,55223
29,2003,4127,5918,5555,4965,5013,4476,4655,4844,5249,5257,4737,3874,58670
30,2004,4168,4169,5612,4557,5440,5299,4975,5640,4825,4233,4520,3925,57363
31,2005,4078,4241,4998,4628,4706,4559,4154,5457,4853,5196,5008,4209,56087


In [34]:
convert_months = {
    'Jan': 1,
    'Fev': 2,
    'Mar': 3,
    'Abr': 4,
    'Mai': 5,
    'Jun': 6,
    'Jul': 7,
    'Ago': 8,
    'Set': 9,
    'Out': 10,
    'Nov': 11,
    'Dez': 12,
}

In [13]:
# transformando o DataFrame em uma série temporal
df_tuberculose_ts = df_tuberculose.drop('Total', axis=1).melt(value_name='Casos', id_vars=['Ano Diagnóstico'], var_name='Mês')
df_tuberculose_ts['Data'] = df_tuberculose_ts.apply(lambda row: date(row['Ano Diagnóstico'], convert_months[row['Mês']], 1), axis=1)
df_tuberculose_ts.drop(['Ano Diagnóstico', 'Mês'], axis=1, inplace=True)
df_tuberculose_ts.sort_values('Data', inplace=True)

df_tuberculose_ts

Unnamed: 0,Casos,Data
0,8088,2001-01-01
19,6542,2001-02-01
38,8095,2001-03-01
57,7305,2001-04-01
76,7656,2001-05-01
...,...,...
151,8239,2019-08-01
170,8143,2019-09-01
189,8592,2019-10-01
208,7615,2019-11-01


In [45]:
# transformando o DataFrame em uma série temporal
df_hanseniase_ts = df_hanseniase.drop('Total', axis=1).melt(value_name='Casos', id_vars=['Ano Diagnóstico'], var_name='Mês')
df_hanseniase_ts['Data'] = df_hanseniase_ts.apply(lambda row: date(row['Ano Diagnóstico'], convert_months[row['Mês']], 1), axis=1)
df_hanseniase_ts.drop(['Ano Diagnóstico', 'Mês'], axis=1, inplace=True)
df_hanseniase_ts.sort_values('Data', inplace=True)

df_hanseniase_ts

Unnamed: 0,Casos,Data
0,4107,2001-01-01
19,4103,2001-02-01
38,4787,2001-03-01
57,4135,2001-04-01
76,4724,2001-05-01
...,...,...
151,3107,2019-08-01
170,3060,2019-09-01
189,3137,2019-10-01
208,2715,2019-11-01


In [14]:
fig = px.line(data_frame=df_tuberculose_ts,
              y='Casos',
              x='Data',
              hover_data=['Casos'],
              color_discrete_sequence=['#ff2e2e'],
              title='Casos de Tuberculose ao longo dos anos no Brasil')

fig.update_traces(mode='markers+lines', hovertemplate=None)
fig.update_layout(hovermode='x')
fig.update_layout(
    title={
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    yaxis_title='Número de casos',
    font=dict(
        family='Courier New, monospace',
        size=16,
    )
)

fig.show()

In [46]:
fig = px.line(data_frame=df_hanseniase_ts,
              y='Casos',
              x='Data',
              hover_data=['Casos'],
              color_discrete_sequence=['#ff2e2e'],
              title='Casos de Hanseníase ao longo dos anos no Brasil')

fig.update_traces(mode='markers+lines', hovertemplate=None)
fig.update_layout(hovermode='x')
fig.update_layout(
    title={
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    yaxis_title='Número de casos',
    font=dict(
        family='Courier New, monospace',
        size=16,
    )
)

fig.show()