This notebook generates a .csv from Ceará's COVID-19 info (Conass)

1. Get data from Conass

In [37]:
import pandas as pd
from tableauscraper import TableauScraper as TS

url = "https://public.tableau.com/shared/6KJMF6FW2?:display_count=y&:origin=viz_share_link&:embed=y"

ts = TS()
ts.loads(url)
dashboard = ts.getDashboard()

2. Data cleaning and visualization

In [87]:
# value or alias
column_type = "value"
df_cases = dashboard.worksheets[2].data.filter(like = column_type, axis = 1)
df_deaths = dashboard.worksheets[4].data.filter(like = column_type, axis = 1)
df = pd.merge(df_cases, df_deaths, on=f"nu_semana_epi-{column_type}", suffixes=('', '_y'))
# Removes duplicated columns
to_drop = [x for x in df if x.endswith('_y')]
df = df.drop(to_drop, axis=1)
# Renames columns for better human comprehension
df = df.rename(columns={
    "MIN(dt_coleta)-value": "data_inicio", 
    "nu_semana_epi-value": "semana_epidemiologica", 
    "SUM(qt_caso_novo)-value": "novos_casos",
    "SUM(qt_obito_nono)-value": "obitos"
})
# Reorders columns
cols = list(df.columns)
a, b = cols.index('data_inicio'), cols.index('semana_epidemiologica')
cols[b], cols[a] = cols[a], cols[b]
df = df[cols]
# Displays dataframe
display(df)

Unnamed: 0,semana_epidemiologica,data_inicio,novos_casos,obitos
0,13,2020-03-27,322,4
1,14,2020-03-29,408,18
2,15,2020-04-05,918,52
3,16,2020-04-12,1380,106
4,17,2020-04-19,2614,145
5,19,2020-05-03,7485,426
6,20,2020-05-10,7619,552
7,21,2020-05-17,10577,694
8,22,2020-05-24,12566,648
9,23,2020-05-31,15354,1009


Finally, generates and saves the 

In [89]:
# index = false means the dataframe's row indexes won't be saved as the first column of .csv file
df.to_csv('notchbooks/conass-covid-extractor/ceara-covid19-conass-info.csv', index=False)