# Database migration rules

In [1]:
import fludashboard as flud
import pandas as pd
import os
import glob

In [2]:
# pandas configuration
pd.set_option('display.max_columns', 99)

In [3]:
def get_filename_from_path(file_path: str):
    """
    """
    return file_path.split(os.path.sep)[-1].split('.')[0]

In [4]:
path_data_files = os.path.join(flud.__path__[0], 'data', '*.csv')

In [5]:
dfs = {}
pks = {}

In [6]:
print('Data files:')
for file_path in glob.glob(path_data_files):
    filename = get_filename_from_path(file_path)
    
    print(filename)
    
    dfs[filename] = pd.read_csv(file_path)

Data files:
mem-report
clean_data_epiweek-weekly-incidence_w_situation


  interactivity=interactivity, compiler=compiler, result=result)


historical_estimated_values
current_estimated_values
mem-typical


# Setting IDs

## Datasets

In [7]:
dataset_id = {
    'srag': 1,
    'sragflu': 2,
    'obitoflu': 3,
}

## Scale

In [8]:
scale_id = {
    'incidência': 1,
    'casos': 2
}

## current_estimated_values

In [9]:
dataset = 'current_estimated_values'

In [10]:
dfs[dataset].head()

Unnamed: 0,UF,epiyear,epiweek,SRAG,Tipo,Situation,mean,50%,2.5%,97.5%,L0,L1,L2,L3,Run date,dado,escala
0,11,2009,1,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
1,11,2009,2,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
2,11,2009,3,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
3,11,2009,4,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
4,11,2009,5,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência


In [11]:
migration_rules = {
    'UF': 'state_country',
    'SRAG': 'value',
    'Tipo': 'type',
    'Situation': 'situation',
    '50%': 'median',
    '2.5%': 'ci_lower',
    '97.5%': 'ci_upper',
    'L0': 'low',
    'L1': 'epidemic',
    'L2': 'high',
    'L3': 'very_high',
    'Run date': 'run_date',
    'dado': 'dataset',
    'escala': 'scale'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].dataset = dfs[dataset].dataset.map(dataset_id)
dfs[dataset].scale = dfs[dataset].scale.map(scale_id)
dfs[dataset].head()

Unnamed: 0,state_country,epiyear,epiweek,value,type,situation,mean,median,ci_lower,ci_upper,low,epidemic,high,very_high,run_date,dataset,scale
0,11,2009,1,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,1,1
1,11,2009,2,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,1,1
2,11,2009,3,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,1,1
3,11,2009,4,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,1,1
4,11,2009,5,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,1,1


In [12]:
# primary_keys
pks[dataset] = ['dataset', 'scale', 'state_country', 'epiyear', 'epiweek']

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,value,type,situation,mean,median,ci_lower,ci_upper,low,epidemic,high,very_high,run_date
dataset,scale,state_country,epiyear,epiweek,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,1,11,2009,1,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
1,1,11,2009,2,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
1,1,11,2009,3,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
1,1,11,2009,4,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
1,1,11,2009,5,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11


## historical_estimated_values

In [13]:
dataset = 'historical_estimated_values'

In [14]:
dfs[dataset].head()

Unnamed: 0,UF,epiyear,epiweek,SRAG,Tipo,Situation,mean,50%,2.5%,97.5%,L0,L1,L2,L3,Run date,base_epiyearweek,base_epiyear,base_epiweek,dado,escala
0,11,2017,7,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.055377,0.976,0.024,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
1,11,2017,8,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.972,0.028,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
2,11,2017,9,0.0,Estado,estimated,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
3,11,2017,10,0.0,Estado,estimated,0.0,0.0,0.0,0.055377,0.996,0.004,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
4,11,2017,11,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.964,0.036,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência


In [15]:
migration_rules = {
    'UF': 'state_country',
    'SRAG': 'value',
    'Tipo': 'type',
    'Situation': 'situation',
    '50%': 'median',
    '2.5%': 'ci_lower',
    '97.5%': 'ci_upper',
    'L0': 'low',
    'L1': 'epidemic',
    'L2': 'high',
    'L3': 'very_high',
    'Run date': 'run_date',
    'dado': 'dataset',  # or origin
    'escala': 'scale'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].dataset = dfs[dataset].dataset.map(dataset_id)
dfs[dataset].scale = dfs[dataset].scale.map(scale_id)
dfs[dataset].head()

Unnamed: 0,state_country,epiyear,epiweek,value,type,situation,mean,median,ci_lower,ci_upper,low,epidemic,high,very_high,run_date,base_epiyearweek,base_epiyear,base_epiweek,dataset,scale
0,11,2017,7,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.055377,0.976,0.024,0.0,0.0,2017-10-11,2017W23,2017,23,1,1
1,11,2017,8,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.972,0.028,0.0,0.0,2017-10-11,2017W23,2017,23,1,1
2,11,2017,9,0.0,Estado,estimated,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,2017W23,2017,23,1,1
3,11,2017,10,0.0,Estado,estimated,0.0,0.0,0.0,0.055377,0.996,0.004,0.0,0.0,2017-10-11,2017W23,2017,23,1,1
4,11,2017,11,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.964,0.036,0.0,0.0,2017-10-11,2017W23,2017,23,1,1


In [16]:
# primary_keys
pks[dataset] = [
    'dataset', 'scale', 'state_country', 
    'base_epiyear', 'base_epiweek', 
    'epiyear', 'epiweek'
]

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value,type,situation,mean,median,ci_lower,ci_upper,low,epidemic,high,very_high,run_date,base_epiyearweek
dataset,scale,state_country,base_epiyear,base_epiweek,epiyear,epiweek,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,1,11,2017,23,2017,7,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.055377,0.976,0.024,0.0,0.0,2017-10-11,2017W23
1,1,11,2017,23,2017,8,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.972,0.028,0.0,0.0,2017-10-11,2017W23
1,1,11,2017,23,2017,9,0.0,Estado,estimated,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,2017W23
1,1,11,2017,23,2017,10,0.0,Estado,estimated,0.0,0.0,0.0,0.055377,0.996,0.004,0.0,0.0,2017-10-11,2017W23
1,1,11,2017,23,2017,11,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.964,0.036,0.0,0.0,2017-10-11,2017W23


## clean_data_epiweek-weekly-incidence_w_situation

In [17]:
dataset = 'clean_data_epiweek-weekly-incidence_w_situation'

In [18]:
dfs[dataset].head()

Unnamed: 0,0-4 anos,10-19 anos,2-4 anos,20-29 anos,30-39 anos,40-49 anos,5-9 anos,50-59 anos,60+ anos,< 2 anos,DELAYED,FLU_A,FLU_B,INCONCLUSIVE,Idade desconhecida,NEGATIVE,NOTTESTED,OTHERS,POSITIVE_CASES,SRAG,Situation,TESTING_IGNORED,Tipo,UF,Unidade da Federação,VSR,dado,epiweek,epiyear,epiyearweek,escala,sexo
0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,F
1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,M
2,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,Total
3,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,2,2009,2009W02,incidência,F
4,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,2,2009,2009W02,incidência,M


In [19]:
dfs[dataset].keys()

Index(['0-4 anos', '10-19 anos', '2-4 anos', '20-29 anos', '30-39 anos',
       '40-49 anos', '5-9 anos', '50-59 anos', '60+ anos', '< 2 anos',
       'DELAYED', 'FLU_A', 'FLU_B', 'INCONCLUSIVE', 'Idade desconhecida',
       'NEGATIVE', 'NOTTESTED', 'OTHERS', 'POSITIVE_CASES', 'SRAG',
       'Situation', 'TESTING_IGNORED', 'Tipo', 'UF', 'Unidade da Federação',
       'VSR', 'dado', 'epiweek', 'epiyear', 'epiyearweek', 'escala', 'sexo'],
      dtype='object')

In [20]:
migration_rules = {
    '0-4 anos': '0_4_years',
    '10-19 anos': '10_19_years',
    '2-4 anos': '2_4_years',
    '20-29 anos': '20_29_years',
    '30-39 anos': '30_39_years',
    '40-49 anos': '40_49_years',
    '5-9 anos': '5_9_years',
    '50-59 anos': '50_59_years',
    '60+ anos': '60_years_or_more',
    '< 2 anos': 'lt_2_years',
    'DELAYED': 'delayed',
    'FLU_A': 'flu_a',
    'FLU_B': 'flu_b',
    'INCONCLUSIVE': 'inconclusive',
    'Idade desconhecida': 'unknown_age',
    'NEGATIVE': 'negative',
    'NOTTESTED': 'not_tested',
    'OTHERS': 'others',
    'POSITIVE_CASES': 'positive_cases',
    'SRAG': 'value',
    'Situation': 'situation',
    'Tipo': 'type',
    'UF': 'state_country',
    'Unidade da Federação': 'state_country_name', # Not needed in the table
    'VSR': 'vsr',
    'dado': 'dataset',
    'escala': 'scale',
    'sexo': 'gender'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].dataset = dfs[dataset].dataset.map(dataset_id)
dfs[dataset].scale = dfs[dataset].scale.map(scale_id)
dfs[dataset].head()

Unnamed: 0,0_4_years,10_19_years,2_4_years,20_29_years,30_39_years,40_49_years,5_9_years,50_59_years,60_years_or_more,lt_2_years,delayed,flu_a,flu_b,inconclusive,unknown_age,negative,not_tested,others,positive_cases,value,situation,TESTING_IGNORED,type,state_country,state_country_name,vsr,dataset,epiweek,epiyear,epiyearweek,scale,gender
0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,1,1,2009,2009W01,1,F
1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,1,1,2009,2009W01,1,M
2,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,1,1,2009,2009W01,1,Total
3,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,1,2,2009,2009W02,1,F
4,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,1,2,2009,2009W02,1,M


In [21]:
# primary_keys
pks[dataset] = ['dataset', 'scale', 'state_country', 'epiyear', 'epiweek']

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0_4_years,10_19_years,2_4_years,20_29_years,30_39_years,40_49_years,5_9_years,50_59_years,60_years_or_more,lt_2_years,delayed,flu_a,flu_b,inconclusive,unknown_age,negative,not_tested,others,positive_cases,value,situation,TESTING_IGNORED,type,state_country_name,vsr,epiyearweek,gender
dataset,scale,state_country,epiyear,epiweek,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
1,1,11,2009,1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,Rondônia,0.0,2009W01,F
1,1,11,2009,1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,Rondônia,0.0,2009W01,M
1,1,11,2009,1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,Rondônia,0.0,2009W01,Total
1,1,11,2009,2,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,Rondônia,0.0,2009W02,F
1,1,11,2009,2,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,Rondônia,0.0,2009W02,M


## mem-report

In [22]:
dataset = 'mem-report'

In [23]:
dfs[dataset].head()

Unnamed: 0,UF,População,Média geométrica do pico de infecção das temporadas regulares,região de baixa atividade típica,limiar pré-epidêmico,intensidade alta,intensidade muito alta,SE típica do início do surto,"SE típica do início do surto - IC inferior (2,5%)","SE típica do início do surto - IC superior (97,5%)",duração típica do surto,"duração típica do surto - IC inferior (2,5%)","duração típica do surto - IC superior (97,5%)",temporadas utilizadas para os corredores endêmicos,ano,Unidade da Federação,Tipo,dado,escala
0,11,1805788,0.803458,0,0.103162,0.863224,1.16816,12.0,9.0,22.0,8.0,4.0,15.0,"2010, 2013, 2014, 2016",2017,Rondônia,Estado,srag,incidência
1,12,829619,0.953503,0,0.685939,1.783885,3.053363,24.0,5.0,49.0,6.0,3.0,16.0,"2010, 2011, 2012, 2013, 2014, 2015, 2016",2017,Acre,Estado,srag,incidência
2,13,4063614,0.119378,0,0.042171,0.165851,0.212193,12.0,4.0,25.0,12.0,6.0,13.0,"2010, 2012, 2013, 2014, 2015, 2016",2017,Amazonas,Estado,srag,incidência
3,14,522636,0.473615,0,0.414595,0.593964,0.801757,26.0,13.0,50.0,6.0,3.0,10.0,"2012, 2013, 2014, 2015, 2016",2017,Roraima,Estado,srag,incidência
4,15,8366628,0.25623,0,0.083865,0.466878,0.859727,12.0,10.0,18.0,10.0,5.0,13.0,"2012, 2014, 2015, 2016",2017,Pará,Estado,srag,incidência


In [24]:
dfs[dataset].keys()

Index(['UF', 'População',
       'Média geométrica do pico de infecção das temporadas regulares',
       'região de baixa atividade típica', 'limiar pré-epidêmico',
       'intensidade alta', 'intensidade muito alta',
       'SE típica do início do surto',
       'SE típica do início do surto - IC inferior (2,5%)',
       'SE típica do início do surto - IC superior (97,5%)',
       'duração típica do surto',
       'duração típica do surto - IC inferior (2,5%)',
       'duração típica do surto - IC superior (97,5%)',
       'temporadas utilizadas para os corredores endêmicos', 'ano',
       'Unidade da Federação', 'Tipo', 'dado', 'escala'],
      dtype='object')

In [25]:
migration_rules = {
    'UF': 'state_country',
    'População': 'population',
    'Média geométrica do pico de infecção das temporadas regulares': 'geom_average_peak',
    'região de baixa atividade típica': 'low_activiy_region',
    'limiar pré-epidêmico': 'pre_epidemic_threshold',
    'intensidade alta': 'high_threshold',
    'intensidade muito alta': 'very_high_threshold',
    'SE típica do início do surto': 'epi_start',
    'SE típica do início do surto - IC inferior (2,5%)': 'epi_start_ci_lower',
    'SE típica do início do surto - IC superior (97,5%)': 'epi_start_ci_upper',
    'duração típica do surto': 'epi_duration',
    'duração típica do surto - IC inferior (2,5%)': 'epi_duration_ci_lower',
    'duração típica do surto - IC superior (97,5%)': 'epi_duration_ci_upper',
    'temporadas utilizadas para os corredores endêmicos': 'regular_seasons',
    'ano': 'year',
    'Unidade da Federação': 'state_country_name', # Not needed in the table
    'Tipo': 'type',
    'dado': 'dataset',
    'escala': 'scale'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].dataset = dfs[dataset].dataset.map(dataset_id)
dfs[dataset].scale = dfs[dataset].scale.map(scale_id)
dfs[dataset].head()

Unnamed: 0,state_country,population,geom_average_peak,low_activiy_region,pre_epidemic_threshold,high_threshold,very_high_threshold,epi_start,epi_start_ci_lower,epi_start_ci_upper,epi_duration,epi_duration_ci_lower,epi_duration_ci_upper,regular_seasons,year,state_country_name,type,dataset,scale
0,11,1805788,0.803458,0,0.103162,0.863224,1.16816,12.0,9.0,22.0,8.0,4.0,15.0,"2010, 2013, 2014, 2016",2017,Rondônia,Estado,1,1
1,12,829619,0.953503,0,0.685939,1.783885,3.053363,24.0,5.0,49.0,6.0,3.0,16.0,"2010, 2011, 2012, 2013, 2014, 2015, 2016",2017,Acre,Estado,1,1
2,13,4063614,0.119378,0,0.042171,0.165851,0.212193,12.0,4.0,25.0,12.0,6.0,13.0,"2010, 2012, 2013, 2014, 2015, 2016",2017,Amazonas,Estado,1,1
3,14,522636,0.473615,0,0.414595,0.593964,0.801757,26.0,13.0,50.0,6.0,3.0,10.0,"2012, 2013, 2014, 2015, 2016",2017,Roraima,Estado,1,1
4,15,8366628,0.25623,0,0.083865,0.466878,0.859727,12.0,10.0,18.0,10.0,5.0,13.0,"2012, 2014, 2015, 2016",2017,Pará,Estado,1,1


In [26]:
# primary_keys
pks[dataset] = ['dataset', 'scale', 'state_country', 'year']

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,population,geom_average_peak,low_activiy_region,pre_epidemic_threshold,high_threshold,very_high_threshold,epi_start,epi_start_ci_lower,epi_start_ci_upper,epi_duration,epi_duration_ci_lower,epi_duration_ci_upper,regular_seasons,state_country_name,type
dataset,scale,state_country,year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,1,11,2017,1805788,0.803458,0,0.103162,0.863224,1.16816,12.0,9.0,22.0,8.0,4.0,15.0,"2010, 2013, 2014, 2016",Rondônia,Estado
1,1,12,2017,829619,0.953503,0,0.685939,1.783885,3.053363,24.0,5.0,49.0,6.0,3.0,16.0,"2010, 2011, 2012, 2013, 2014, 2015, 2016",Acre,Estado
1,1,13,2017,4063614,0.119378,0,0.042171,0.165851,0.212193,12.0,4.0,25.0,12.0,6.0,13.0,"2010, 2012, 2013, 2014, 2015, 2016",Amazonas,Estado
1,1,14,2017,522636,0.473615,0,0.414595,0.593964,0.801757,26.0,13.0,50.0,6.0,3.0,10.0,"2012, 2013, 2014, 2015, 2016",Roraima,Estado
1,1,15,2017,8366628,0.25623,0,0.083865,0.466878,0.859727,12.0,10.0,18.0,10.0,5.0,13.0,"2012, 2014, 2015, 2016",Pará,Estado


## mem-typical

In [27]:
dataset = 'mem-typical'

In [28]:
dfs[dataset].head()

Unnamed: 0,UF,População,epiweek,corredor baixo,corredor mediano,corredor alto,ano,Unidade da Federação,Tipo,dado,escala
0,11,1805788,1,0.0,0.013703,0.041109,2017,Rondônia,Estado,srag,incidência
1,11,1805788,2,0.0,0.0,0.0,2017,Rondônia,Estado,srag,incidência
2,11,1805788,3,0.0,0.013703,0.041109,2017,Rondônia,Estado,srag,incidência
3,11,1805788,4,0.0,0.02879,0.087644,2017,Rondônia,Estado,srag,incidência
4,11,1805788,5,0.0,0.0,0.0,2017,Rondônia,Estado,srag,incidência


In [29]:
dfs[dataset].keys()

Index(['UF', 'População', 'epiweek', 'corredor baixo', 'corredor mediano',
       'corredor alto', 'ano', 'Unidade da Federação', 'Tipo', 'dado',
       'escala'],
      dtype='object')

In [30]:
migration_rules = {
    'UF': 'state_country',
    'População': 'population',
    'corredor baixo': 'low',
    'corredor mediano': 'median',
    'corredor alto': 'high',
    'ano': 'year',
    'Unidade da Federação': 'state_country_name', # Not needed in the table
    'Tipo': 'type',
    'dado': 'dataset',
    'escala': 'scale'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].dataset = dfs[dataset].dataset.map(dataset_id)
dfs[dataset].scale = dfs[dataset].scale.map(scale_id)
dfs[dataset].head()

Unnamed: 0,state_country,population,epiweek,low,median,high,year,state_country_name,type,dataset,scale
0,11,1805788,1,0.0,0.013703,0.041109,2017,Rondônia,Estado,1,1
1,11,1805788,2,0.0,0.0,0.0,2017,Rondônia,Estado,1,1
2,11,1805788,3,0.0,0.013703,0.041109,2017,Rondônia,Estado,1,1
3,11,1805788,4,0.0,0.02879,0.087644,2017,Rondônia,Estado,1,1
4,11,1805788,5,0.0,0.0,0.0,2017,Rondônia,Estado,1,1


In [31]:
# primary_keys
pks[dataset] = ['dataset', 'scale', 'state_country', 'year', 'epiweek']

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,population,low,median,high,state_country_name,type
dataset,scale,state_country,year,epiweek,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1,11,2017,1,1805788,0.0,0.013703,0.041109,Rondônia,Estado
1,1,11,2017,2,1805788,0.0,0.0,0.0,Rondônia,Estado
1,1,11,2017,3,1805788,0.0,0.013703,0.041109,Rondônia,Estado
1,1,11,2017,4,1805788,0.0,0.02879,0.087644,Rondônia,Estado
1,1,11,2017,5,1805788,0.0,0.0,0.0,Rondônia,Estado


@TODO convert regions id to numerical value