In [1]:
import numpy as np
import pandas as pd
import altair as alt
from altair_saver import save

The function below is a auxiliary function used to create the datasets 'chik_br_2010-2022_quarter.csv' and 'dengue_br_2010-2022_quarter.csv' if we have the datasets with the time series of cases separated by state.

In [2]:

def get_climate_by_year(filename, state):
    """
    Function to aggregate the time series of cases by quarter and compute the incidence based on the population
    
    Parameters
    ----------
    state: str
        two letter code of the state.

    Returns
    -------
    df
        a dataframe with cases and incidence by quarter

    """
    
    df = pd.read_parquet(filename, columns=['temp_med', 'precip_med', 'pressao_med', 'umid_med', 'temp_max'])
    #df.index = pd.to_datetime(df.index)
    df = df.resample('Q').mean()
    
    
    trimestre = []

    for x in df.index:
        if x.month == 3:

            trimestre.append(f'{x.year}01')
        if x.month ==6:

            trimestre.append(f'{x.year}02')
        if x.month == 9:

            trimestre.append(f'{x.year}03')

        if x.month == 12:

            trimestre.append(f'{x.year}04')


    df['trimestre'] = trimestre
    df['state'] = state
    
    return df


The snippet of code below can be used to generate the dataset of climate_variables by quarter for all the stats in Brazil: 


In [5]:
%%time

estados = {'RJ': 'Rio de Janeiro', 'ES': 'Espírito Santo', 'PR': 'Paraná', 'CE': 'Ceará',
               'MA': 'Maranhão', 'MG': 'Minas Gerais', 'SC': 'Santa Catarina', 'PE': 'Pernambuco', 
               'PB': 'Paraíba', 'RN': 'Rio Grande do Norte', 'PI': 'Piauí', 'AL': 'Alagoas',
               'SE': 'Sergipe', 'SP': 'São Paulo', 'RS': 'Rio Grande do Sul','PA': 'Pará',
               'AP': 'Amapá', 'RR': 'Roraima', 'RO': 'Rondônia', 'AM': 'Amazonas', 'AC': 'Acre',
               'MT': 'Mato Grosso', 'MS': 'Mato Grosso do Sul', 'GO': 'Goiás', 'TO': 'Tocantins',
               'DF': 'Distrito Federal', 'BA': 'Bahia'
               }

regions = {'Sul':['SC', 'PR', 'RS'],
          'Sudeste':['SP', 'MG', 'RJ', 'ES'], 
          'Centro-Oeste':['DF', 'MT', 'MS', 'GO'],
          'Nordeste':['BA', 'CE', 'PE', 'PB', 'PI', 'RN', 'MA', 'AL', 'SE'],
          'Norte': ['RO', 'AC', 'AM', 'RR', 'PA', 'AP', 'TO'] }

df_end = pd.DataFrame()

for state in estados.keys():
    
    filename = f'/Users/eduardoaraujo/Documents/Github/paper-dengue-sc/data/climate/{state}_climate.parquet'
    
    df_end = pd.concat([df_end, get_climate_by_year(filename, state)])
    

df_end['region']= np.nan

for region in regions.keys():

    df_end.loc[df_end.state.isin(regions[region]), 'region'] = region
    
df_end = df_end.loc[df_end.index.year >= 2000]

df_end['trimestre_tick'] = df_end.trimestre.astype(str).str[:4] + '-' + df_end.trimestre.astype(str).str[-2:]
    
df_end.head()

CPU times: user 5.48 s, sys: 1.43 s, total: 6.9 s
Wall time: 6.44 s


Unnamed: 0_level_0,temp_med,precip_med,pressao_med,umid_med,temp_max,trimestre,state,region,trimestre_tick
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-03-31,24.027366,0.257375,0.999779,83.244459,27.685427,200001,RJ,Sudeste,2000-01
2000-06-30,19.801918,0.034828,1.005289,79.920818,24.47841,200002,RJ,Sudeste,2000-02
2000-09-30,19.069029,0.106511,1.00593,80.495938,23.136103,200003,RJ,Sudeste,2000-03
2000-12-31,23.348939,0.181697,1.000779,81.163891,27.205556,200004,RJ,Sudeste,2000-04
2001-03-31,25.136111,0.195201,1.000531,81.34163,29.272494,200101,RJ,Sudeste,2001-01


In [6]:
df_end.head()

Unnamed: 0_level_0,temp_med,precip_med,pressao_med,umid_med,temp_max,trimestre,state,region,trimestre_tick
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-03-31,24.027366,0.257375,0.999779,83.244459,27.685427,200001,RJ,Sudeste,2000-01
2000-06-30,19.801918,0.034828,1.005289,79.920818,24.47841,200002,RJ,Sudeste,2000-02
2000-09-30,19.069029,0.106511,1.00593,80.495938,23.136103,200003,RJ,Sudeste,2000-03
2000-12-31,23.348939,0.181697,1.000779,81.163891,27.205556,200004,RJ,Sudeste,2000-04
2001-03-31,25.136111,0.195201,1.000531,81.34163,29.272494,200101,RJ,Sudeste,2001-01


In [7]:
df_end.to_csv('./data/climate_br_2000-2023_quarter.csv')

In [14]:
def plot_heatmap_single(column):
    
    """
    Function to plot a heatmap of the incidence by quarter
    
    Parameters
    ----------
    agravo: str
        name of the disease (dengue or chik)

    Returns
    -------
    fig
        an altair figura

    """
    
    # load the data
    df_end= pd.read_csv(f'./data/climate_br_2000-2023_quarter.csv')
    df_end.set_index('date', inplace = True)
    df_end.index = pd.to_datetime(df_end.index)
    df_end = df_end.loc[df_end.index.year < 2023]
    df_end.index = pd.to_datetime(df_end.index)

    # the column order is used to agg the states in the yaxis based on the region 
    df_end['order'] = np.nan

    df_end.loc[df_end.region == 'Sudeste', 'order'] = 1
    df_end.loc[df_end.region == 'Sul', 'order'] = 2
    df_end.loc[df_end.region == 'Nordeste', 'order'] = 3
    df_end.loc[df_end.region == 'Norte', 'order'] = 4
    df_end.loc[df_end.region == 'Centro-Oeste', 'order'] = 5

    # selection field to filter by region
    regiao_radio = alt.binding_radio(options=np.append([None],df_end['region'].unique()),labels=['All'], name="Selecione")
    
    selection = alt.selection_point(fields=['region'], bind=regiao_radio)


    title=column
    color_scheme = 'orangered'

    # create the figure
    fig = alt.Chart(data= df_end, width=800, height=400, title=title).mark_rect(filled=True).encode(
    x= alt.X('trimestre_tick', type='nominal', title='Quarter', sort = alt.SortField('trimestre', order='ascending')),
        y = alt.Y('state', title='State', sort = alt.SortField('order', order='ascending')), 
        tooltip = [ alt.Tooltip(field = 'state', title = "State", type = "nominal"),
                    alt.Tooltip(field = "trimestre_tick", title = 'Quarter', type = "nominal"),
                    alt.Tooltip(field = column, title = column, type = "quantitative")],
        color=alt.condition(selection, 
                            alt.Color(f'{column}:Q', scale=alt.Scale(scheme=color_scheme),
                            legend=alt.Legend(direction='vertical', orient='left', legendY=30, title = None)),
                            alt.value('lightgray'))
    ).add_params(
        selection
    )
    
    
    
    return fig



In [16]:
fig = plot_heatmap_single("temp_med")

fig

It's possible to save this map as a html

In [9]:
fig.save('files_html/heatmap_climate.html')