In [181]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme()  

<h3 style="color:green">This piece of code is meant to automatize the creation of reports that are essentially the same every month, parts of the code are in PT-BR due to the language of the dataset, which is portuguese. Here we are working with data from SUS (Brazil's public healthcare system) regarding baby births from the state of Roraima in 2019.</h3>

In [180]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def read_months(meses: list, sinasc_list: list = None) -> list:
    if sinasc_list is None:
        sinasc_list = []
    
    for mes in meses:
        file_path = f'./input/SINASC_RO_2019_{mes}.csv'
        try:
            df = pd.read_csv(file_path)
            sinasc_list.append(df)
        except FileNotFoundError:
            print(f'File {file_path} not found.')

    return sinasc_list

months = ['MAR', 'ABR', 'MAI', 'JUN', 'DEZ']

df_list = read_months(months)

def plot_pivot_table(df, value, index, func, ylabel, xlabel, opcao='nada'):
    pivot_table = pd.pivot_table(df, values=value, index=index, aggfunc=func)
    
    if opcao == 'sort':
        pivot_table = pivot_table.sort_values(by=value)
    elif opcao == 'unstack':
        pivot_table = pivot_table.unstack()

    pivot_table.plot(figsize=[15, 5])
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.title(f'{ylabel} vs {xlabel}')
    plt.grid(True)
    plt.legend(title=value)
    return None

plt.close('all')

def list_dtnasc_months(dtnasc_list: list = None) -> list:
    if dtnasc_list is None:
        dtnasc_list = []

    for df in df_list:
        dtnasc_list.append(df.DTNASC.max()[:7])
    return dtnasc_list

def savefile(df_list, dtnasc_list: list = list_dtnasc_months()):
    for dtnasc, df in zip(dtnasc_list, df_list):
        print(f'Date: {dtnasc}, Shape: {df.shape}')
        os.makedirs('./output/figs/' + dtnasc, exist_ok=True)
        plot_pivot_table(df, 'IDADEMAE', 'DTNASC', 'mean', 'quantidade de nascimento', 'data de nascimento')
        plt.savefig('./output/figs/' + dtnasc + '/media quantidade de nascimentos por data.png')
        plt.close() 

df_list = read_months(meses)
savefile(df_list)

Date: 2019-03, Shape: (6844, 11)
Date: 2019-04, Shape: (9217, 11)
Date: 2019-05, Shape: (11582, 11)
Date: 2019-06, Shape: (13758, 11)
Date: 2019-12, Shape: (27028, 11)
