# Database migration rules

In [None]:
import fludashboard as flud
import pandas as pd
import os
import glob

In [59]:
# pandas configuration
pd.set_option('display.max_columns', 99)

In [19]:
def get_filename_from_path(file_path: str):
    """
    """
    return file_path.split(os.path.sep)[-1].split('.')[0]

In [14]:
path_data_files = os.path.join(flud.__path__[0], 'data', '*.csv')

In [None]:
dfs = {}
pks = {}

In [42]:
print('Data files:')
for file_path in glob.glob(path_data_files):
    filename = get_filename_from_path(file_path)
    
    print(filename)
    
    dfs[filename] = pd.read_csv(file_path)

Data files:
mem-report
current_estimated_values
clean_data_epiweek-weekly-incidence_w_situation


  interactivity=interactivity, compiler=compiler, result=result)


historical_estimated_values
mem-typical


## current_estimated_values

In [43]:
dataset = 'current_estimated_values'

In [34]:
dfs[dataset].head()

Unnamed: 0,UF,epiyear,epiweek,SRAG,Tipo,Situation,mean,50%,2.5%,97.5%,L0,L1,L2,L3,Run date,dado,escala
0,11,2009,1,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
1,11,2009,2,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
2,11,2009,3,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
3,11,2009,4,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
4,11,2009,5,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência


In [44]:
migration_rules = {
    'UF': 'state_country',
    'SRAG': 'value',
    'Tipo': 'type',
    'Situation': 'situation',
    '50%': 'average',
    '2.5%': 'ci_inferior',
    '97.5%': 'ci_superior',
    'L0': 'l0',  # maybe should be better another name more descriptive
    'L1': 'l1',  # maybe should be better another name more descriptive
    'L2': 'l2',  # maybe should be better another name more descriptive
    'L3': 'l3',  # maybe should be better another name more descriptive
    'Run date': 'run_date',
    'dado': 'dataset',  # or origin
    'escala': 'scale'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].head()

Unnamed: 0,state_country,epiyear,epiweek,value,type,situation,mean,average,ci_inferior,ci_superior,l0,l1,l2,l3,run_date,dataset,scale
0,11,2009,1,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
1,11,2009,2,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
2,11,2009,3,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
3,11,2009,4,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência
4,11,2009,5,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,srag,incidência


In [45]:
# primary_keys
pks[dataset] = ['dataset', 'scale', 'state_country', 'epiyear', 'epiweek']

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,value,type,situation,mean,average,ci_inferior,ci_superior,l0,l1,l2,l3,run_date
dataset,scale,state_country,epiyear,epiweek,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
srag,incidência,11,2009,1,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
srag,incidência,11,2009,2,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
srag,incidência,11,2009,3,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
srag,incidência,11,2009,4,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11
srag,incidência,11,2009,5,0.0,Estado,stable,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11


## historical_estimated_values

In [46]:
dataset = 'historical_estimated_values'

In [39]:
dfs[dataset].head()

Unnamed: 0,UF,epiyear,epiweek,SRAG,Tipo,Situation,mean,50%,2.5%,97.5%,L0,L1,L2,L3,Run date,base_epiyearweek,base_epiyear,base_epiweek,dado,escala
0,11,2017,7,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.055377,0.976,0.024,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
1,11,2017,8,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.972,0.028,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
2,11,2017,9,0.0,Estado,estimated,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
3,11,2017,10,0.0,Estado,estimated,0.0,0.0,0.0,0.055377,0.996,0.004,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
4,11,2017,11,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.964,0.036,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência


In [47]:
migration_rules = {
    'UF': 'state_country',
    'SRAG': 'value',
    'Tipo': 'type',
    'Situation': 'situation',
    '50%': 'average',
    '2.5%': 'ci_inferior',
    '97.5%': 'ci_superior',
    'L0': 'l0',  # maybe should be better another name more descriptive
    'L1': 'l1',  # maybe should be better another name more descriptive
    'L2': 'l2',  # maybe should be better another name more descriptive
    'L3': 'l3',  # maybe should be better another name more descriptive
    'Run date': 'run_date',
    'dado': 'dataset',  # or origin
    'escala': 'scale'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].head()

Unnamed: 0,state_country,epiyear,epiweek,value,type,situation,mean,average,ci_inferior,ci_superior,l0,l1,l2,l3,run_date,base_epiyearweek,base_epiyear,base_epiweek,dataset,scale
0,11,2017,7,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.055377,0.976,0.024,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
1,11,2017,8,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.972,0.028,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
2,11,2017,9,0.0,Estado,estimated,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
3,11,2017,10,0.0,Estado,estimated,0.0,0.0,0.0,0.055377,0.996,0.004,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência
4,11,2017,11,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.964,0.036,0.0,0.0,2017-10-11,2017W23,2017,23,srag,incidência


In [50]:
# primary_keys
pks[dataset] = [
    'dataset', 'scale', 'state_country', 
    'base_epiyear', 'base_epiweek', 
    'epiyear', 'epiweek'
]

dfs[dataset].set_index(pks[dataset], inplace=True)
dfs[dataset].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value,type,situation,mean,average,ci_inferior,ci_superior,l0,l1,l2,l3,run_date,base_epiyearweek
dataset,scale,state_country,base_epiyear,base_epiweek,epiyear,epiweek,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
srag,incidência,11,2017,23,2017,7,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.055377,0.976,0.024,0.0,0.0,2017-10-11,2017W23
srag,incidência,11,2017,23,2017,8,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.972,0.028,0.0,0.0,2017-10-11,2017W23
srag,incidência,11,2017,23,2017,9,0.0,Estado,estimated,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2017-10-11,2017W23
srag,incidência,11,2017,23,2017,10,0.0,Estado,estimated,0.0,0.0,0.0,0.055377,0.996,0.004,0.0,0.0,2017-10-11,2017W23
srag,incidência,11,2017,23,2017,11,0.055377,Estado,estimated,0.055377,0.055377,0.055377,0.110755,0.964,0.036,0.0,0.0,2017-10-11,2017W23


## clean_data_epiweek-weekly-incidence_w_situation

In [51]:
dataset = 'clean_data_epiweek-weekly-incidence_w_situation'

In [60]:
dfs[dataset].head()

Unnamed: 0,0-4 anos,10-19 anos,2-4 anos,20-29 anos,30-39 anos,40-49 anos,5-9 anos,50-59 anos,60+ anos,< 2 anos,DELAYED,FLU_A,FLU_B,INCONCLUSIVE,Idade desconhecida,NEGATIVE,NOTTESTED,OTHERS,POSITIVE_CASES,SRAG,Situation,TESTING_IGNORED,Tipo,UF,Unidade da Federação,VSR,dado,epiweek,epiyear,epiyearweek,escala,sexo
0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,F
1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,M
2,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,Total
3,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,2,2009,2009W02,incidência,F
4,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,2,2009,2009W02,incidência,M


In [54]:
dfs[dataset].keys()

Index(['0-4 anos', '10-19 anos', '2-4 anos', '20-29 anos', '30-39 anos',
       '40-49 anos', '5-9 anos', '50-59 anos', '60+ anos', '< 2 anos',
       'DELAYED', 'FLU_A', 'FLU_B', 'INCONCLUSIVE', 'Idade desconhecida',
       'NEGATIVE', 'NOTTESTED', 'OTHERS', 'POSITIVE_CASES', 'SRAG',
       'Situation', 'TESTING_IGNORED', 'Tipo', 'UF', 'Unidade da Federação',
       'VSR', 'dado', 'epiweek', 'epiyear', 'epiyearweek', 'escala', 'sexo'],
      dtype='object')

In [61]:
# @TODO: these rules is under construction :)
migration_rules = {
    '0-4 anos': '0_4_years',
    '10-19 anos': '10_19_years',
    '2-4 anos': '2_4_years',
    '20-29 anos': '20_29_years',
    '30-39 anos': '30_39_years',
    '40-49 anos': '40_49_years',
    '5-9 anos': '5_9_years',
    '50-59 anos': '50_59_years',
    '60+ anos': '60_years_or_more',
    '< 2 anos': 'lt_2_years',
    'DELAYED': 'delayed',
    'FLU_A': 'flu_a',
    'FLU_B': 'flu_b',
    'INCONCLUSIVE': 'inconclusive',
    'Idade desconhecida': 'unknown_age',
    'NEGATIVE': 'negative',
    'NOTTESTED': 'not_tested',
    'OTHERS': 'others',
    'POSITIVE_CASES': 'positive_cases',
    'SRAG': 'value',
    'Situation': 'situation',
    'Tipo': 'type',
    'UF': 'state_contry',
    'Unidade da Federação': 'state_country_name',
    'VSR': 'vsr',
    'dado': 'dataset',  # or origin
    'escala': 'scale',
    'sexo': 'gender'
}

dfs[dataset].rename(
    columns=migration_rules, inplace=True
)
dfs[dataset].head()

Unnamed: 0,0_4_years,10_19_years,2_4_years,20_29_years,30_39_years,40_49_years,5_9_years,50_59_years,60_years_or_more,lt_2_years,delayed,flu_a,flu_b,inconclusive,unknown_age,negative,not_tested,others,positive_cases,value,situation,TESTING_IGNORED,type,state_contry,state_country_name,vsr,dataset,epiweek,epiyear,epiyearweek,scale,gender
0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,F
1,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,M
2,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,1,2009,2009W01,incidência,Total
3,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,2,2009,2009W02,incidência,F
4,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,stable,0.0,Estado,11,Rondônia,0.0,srag,2,2009,2009W02,incidência,M
