Bellow you will find a simple exploration of data pertaining to Neglected Tropical Diseases that can be found on Datasus.


\
\
Each instance of data for each of the conditions consists of several notifications of the diseases containing information such as the date, location, personal informations etc. The code bellow does two simple actions: 
- Firstly, it organizes the information pertaining to the columns that point to a Federative Unit (UF). Every file originated from 2006 and before has that information as the acronym of the state, however every file from 2007-Present refers to it by a code and so a dictionary was applied to translate the code to the acronym.

- Secondly, it counts the number of ocorrences of each condition per month, grouping them by Year of Notification and Federative Unit.

### Setup

In [None]:
import os
import pandas as pd
import numpy as np
from collections import defaultdict

In [None]:
path = '../../data/Datasus'

uf_dict = defaultdict(lambda: 'Invalid')

uf_dict[11] = 'RO'	
uf_dict[12] = 'AC'	
uf_dict[13] = 'AM'	
uf_dict[14] = 'RR'	
uf_dict[15] = 'PA'	
uf_dict[16] = 'AP'	
uf_dict[17] = 'TO'	
uf_dict[21] = 'MA'	
uf_dict[22] = 'PI'	
uf_dict[23] = 'CE'	
uf_dict[24] = 'RN'	
uf_dict[25] = 'PB'	
uf_dict[26] = 'PE'	
uf_dict[27] = 'AL'	
uf_dict[28] = 'SE'	
uf_dict[29] = 'BA'	
uf_dict[31] = 'MG'	
uf_dict[32] = 'ES'	
uf_dict[33] = 'RJ'	
uf_dict[35] = 'SP'	
uf_dict[41] = 'PR'	
uf_dict[42] = 'SC'	
uf_dict[43] = 'RS'	
uf_dict[50] = 'MS'	
uf_dict[51] = 'MT'	
uf_dict[52] = 'GO'	
uf_dict[53] = 'DF'

In [None]:
files_list = list(os.listdir(path))

files_dict = defaultdict(list)

for file in files_list:
    if 'ANIMBR' in file: files_dict['ANIMBR'].append(file)
    elif 'CHAGBR' in file: files_dict['CHAGBR'].append(file)
    elif 'CHIKBR' in file: files_dict['CHIKBR'].append(file)
    elif 'DENGBR' in file: files_dict['DENGBR'].append(file)
    elif 'ESQUBR' in file: files_dict['ESQUBR'].append(file)
    elif 'HANSBR' in file: files_dict['HANSBR'].append(file)
    elif 'LEIVBR' in file: files_dict['LEIVBR'].append(file)
    elif 'LTANBR' in file: files_dict['LTANBR'].append(file)
    elif 'RAIVBR' in file: files_dict['RAIVBR'].append(file)


In [None]:
def extract_data(data_list:list, columns:list[str]=['DT_NOTIFIC', 'SG_UF_NOT'], path:str=path, low_memory:bool=False) -> object:
    primary_dict = defaultdict(lambda: ['Year', 'State', np.zeros(12, dtype=int), 0])

    for file in data_list:
        df = pd.read_csv(f'{path}/{file}', encoding='ISO-8859-1', low_memory=low_memory)
        df = df[columns]
        
        for row in df.to_dict(orient='records'):
            if isinstance(row[columns[1]], int|float): row[columns[1]] = uf_dict[row[columns[1]]]

            date = row[columns[0]].split('-')
            #print(row)
            index = date[0] + '-' + row[columns[1]]
            primary_dict[index][0] = int(date[0])
            primary_dict[index][1] = row[columns[1]]
            primary_dict[index][2][int(date[1]) - 1] += 1
            primary_dict[index][3] += 1

    
    main_df = pd.DataFrame.from_dict(primary_dict, orient='index', columns=['Year', 'State', 'Months', 'Total'])

    aux_df = pd.DataFrame(main_df['Months'].tolist(), columns=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
    aux_df.index = main_df.index
    main_df = pd.concat([main_df, aux_df], axis=1)
    main_df.drop(columns=['Months'])

    organize = ['Year', 'State', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Total']
    main_df = main_df[organize]

    return main_df

### Animais Peçonhentos

In [None]:
df = pd.read_csv(f'{path}/ANIMBR07.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['ANIMBR'])
df

### Doença de Chagas

In [None]:
df = pd.read_csv(f'{path}/CHAGBR19.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['CHAGBR'])
df

### Chikungunya

In [None]:
df = pd.read_csv(f'{path}/CHIKBR15.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['CHIKBR'])
df

### Dengue

In [None]:
df = pd.read_csv(f'{path}/DENGBR00.csv', encoding='ISO-8859-1')

list(df.columns)

In [None]:
df = pd.read_csv(f'{path}/{files_dict['DENGBR'][9]}', encoding='ISO-8859-1')
df

In [None]:
files_dict['DENGBR']

In [None]:
df = extract_data(data_list=files_dict['DENGBR'][7:8], low_memory=False)
df

### Esquistossomose

In [None]:
df = pd.read_csv(f'{path}/ESQUBR07.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['ESQUBR'])
df

### Hanseníase

In [None]:
df = pd.read_csv(f'{path}/HANSBR01.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['HANSBR'])
df

### Leishmaniose Viceral

In [None]:
df = pd.read_csv(f'{path}/LEIVBR01.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['LEIVBR'])
df

### Leishmaniose Tegumentar Americana

In [None]:
df = pd.read_csv(f'{path}/LTANBR01.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['LTANBR'])
df

### Raiva

In [None]:
df = pd.read_csv(f'{path}/RAIVBR07.csv', encoding='ISO-8859-1')

df.columns

In [None]:
df = extract_data(data_list=files_dict['RAIVBR'])
df