In [1]:
# Read packedges

import pandas as pd
import pandas.testing as tm
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import statsmodels.api as sm
import statsmodels.discrete.discrete_model as dm

from patsy import dmatrices
import statsmodels.graphics.tsaplots as tsa


from scipy.fft import fft, ifft, fftfreq

import statsmodels.formula.api as smf
from statsmodels.tsa.stattools import acf

import itertools
from itertools import combinations, chain

from scipy.stats import pearsonr

import re

from datetime import datetime

import pymannkendall as mk

import math

from scipy.stats import friedmanchisquare

from sklearn.metrics import r2_score

from pmdarima.preprocessing import FourierFeaturizer
from pmdarima.datasets import load_wineind
from sklearn.linear_model import LinearRegression


import warnings
warnings.filterwarnings("ignore")


# Our functions

import function_single_serie

# Read data

In [2]:
df = pd.read_parquet('/opt/storage/refined/aesop/visualization/aesop_2025_05_09_mun_new.parquet')




# Process data for Negbi Model

In [3]:
# Create key new variables
lst_dfs_cities = function_single_serie.lst_dfs_cities(df, city_code_col='co_ibge', 
                             epiweek_date_col='year_week', 
                             serie_col= 'num_otc_ivas')

# Identify if the series has significant trend

final_kendall_negbi = function_single_serie.final_kendall_negbi(lst_dfs_cities, serie = 'num_otc_ivas_4')
final_kendall_negbi  = final_kendall_negbi.assign(
                            dtrend_otc_negbi = final_kendall_negbi.num_otc_ivas_4 - final_kendall_negbi.trend_line_negbi_num_otc_ivas_4
                            )

lst = []

for city in final_kendall_negbi.co_ibge.unique():

    set_muni = final_kendall_negbi[final_kendall_negbi.co_ibge == city]

    lst.append(set_muni)



lst_dfs_cities = lst

# Identify significant  Seasonality using  Friedman test 

for i in range(0, len(lst_dfs_cities)):

    data = lst_dfs_cities[i]['dtrend_otc_negbi'].to_numpy()
    
    p = len(data) // 2

    res_test = function_single_serie.friedman_test(data, freq= p)

    lst_dfs_cities[i] = lst_dfs_cities[i].assign(p_value_otc_negbi_friedman = res_test['p_value'],
                                                test_stat_otc_negbi_friedman = res_test['test_stat'])

# Format data 

final = pd.concat(lst_dfs_cities)

lst_var = ['co_ibge', 'epiyear', 'epiweek', 'atend_totais', 'atend_ivas',
       'atend_arbov', 'perc_missing', 'perc_completude', 'diff_2w', 'diff_3w',
       'sum_miss', 'completude', 'tempestividade', 'dqi', 'epidemi_cal_start',
       'epidemi_cal_end', 'sinal_ears_ivas', 'lim_esp_ears_ivas',
       'lim_esp_mmaing_ivas', 'sinal_mmaing_ivas', 'num_otc_ivas',
       'lim_esp_otc_ivas', 'sinal_otc_ivas', 'prob_gro_trend_ivas',
       'sinal_ears_arbov', 'lim_esp_ears_arbov', 'prob_gro_trend_arbov',
       'lim_esp_evi_ivas', 'sinal_evi_ivas', 'exc_evi_ivas',
       'lim_esp_evi_arbov', 'sinal_evi_arbov', 'exc_evi_arbov', 'co_uf',
       'nm_uf', 'co_ibge7', 'nm_municipio', 'sigla_uf', 'pop', 'year_week',
       'year_week_ts', 'ra_atend_ivas', 'ra_atend_ivas_ma',
       'gr_ra_atend_ivas_ma', 'ra_atend_arbov', 'ra_atend_arbov_ma',
       'gr_ra_atend_arbov_ma', 'sinal_ens_ivas', 'sinal_ens_arbov',
       'lim_esp_ens_ivas', 'lim_esp_ens_arbov', 'sinal_ears_consec_ivas',
       'sinal_ears_consec_ivas_012', 'sinal_mmaing_consec_ivas',
       'sinal_mmaing_consec_ivas_012', 'sinal_otc_consec_ivas',
       'sinal_otc_consec_ivas_012', 'sinal_ears_consec_arbov',
       'sinal_ears_consec_arbov_012', 'sinal_evi_consec_ivas',
       'sinal_evi_consec_ivas_012', 'sinal_evi_consec_arbov',
       'sinal_evi_consec_arbov_012', 'sinal_ens_consec_ivas',
       'sinal_ens_consec_ivas_012', 'sinal_ens_consec_arbov',
       'sinal_ens_consec_arbov_012', 'vot_ivas', 'vot_arbov',
       'lim_esp_ambos_ivas', 'lim_esp_ambos_arbov', 'exc_ears_ivas',
       'exc_mmaing_ivas', 'exc_otc_ivas', 'exc_ens_ivas', 'exc_ambos_ivas',
       'exc_ears_arbov', 'exc_ens_arbov', 'exc_ambos_arbov', 'ratio_otc_ivas',
       'sinal_evi_ivas_y', 'sinal_ears_ivas_y', 'sinal_mmaing_ivas_y',
       'sinal_otc_ivas_y', 'sinal_evi_arbov_y', 'sinal_ears_arbov_y',
       'sinal_ens_arbov_y', 'sinal_ens_ivas_y', 'vot_arbov_y', 'vot_ivas_y',
       'lim_esp_ml_ivas', 'sinal_ml_ivas', 'sinal_ml_consec_ivas',
       'sinal_ml_consec_ivas_012', 'exc_ml_ivas', 'sinal_ml_ivas_y', 
        'num_otc_ivas_4','num_otc_ivas_lag_1', 'num_otc_ivas_lag_2',
        'num_otc_ivas_lag_3','num_otc_ivas_lag_4','time_trend',
        'p_values_negbi_num_otc_ivas_4', 'dtrend_otc_negbi',
       'p_value_otc_negbi_friedman']

final = final[lst_var]

final = final.rename(columns={"num_otc_ivas_lag_1": "otc_4_lag_1", 
                      "num_otc_ivas_lag_2": "otc_4_lag_2",
                     "num_otc_ivas_lag_3":"otc_4_lag_3",
                     'num_otc_ivas_lag_4':'otc_4_lag_4'})

In [5]:
final.columns.to_list()

['co_ibge',
 'epiyear',
 'epiweek',
 'atend_totais',
 'atend_ivas',
 'atend_arbov',
 'perc_missing',
 'perc_completude',
 'diff_2w',
 'diff_3w',
 'sum_miss',
 'completude',
 'tempestividade',
 'dqi',
 'epidemi_cal_start',
 'epidemi_cal_end',
 'sinal_ears_ivas',
 'lim_esp_ears_ivas',
 'lim_esp_mmaing_ivas',
 'sinal_mmaing_ivas',
 'num_otc_ivas',
 'lim_esp_otc_ivas',
 'sinal_otc_ivas',
 'prob_gro_trend_ivas',
 'sinal_ears_arbov',
 'lim_esp_ears_arbov',
 'prob_gro_trend_arbov',
 'lim_esp_evi_ivas',
 'sinal_evi_ivas',
 'exc_evi_ivas',
 'lim_esp_evi_arbov',
 'sinal_evi_arbov',
 'exc_evi_arbov',
 'co_uf',
 'nm_uf',
 'co_ibge7',
 'nm_municipio',
 'sigla_uf',
 'pop',
 'year_week',
 'year_week_ts',
 'ra_atend_ivas',
 'ra_atend_ivas_ma',
 'gr_ra_atend_ivas_ma',
 'ra_atend_arbov',
 'ra_atend_arbov_ma',
 'gr_ra_atend_arbov_ma',
 'sinal_ens_ivas',
 'sinal_ens_arbov',
 'lim_esp_ens_ivas',
 'lim_esp_ens_arbov',
 'sinal_ears_consec_ivas',
 'sinal_ears_consec_ivas_012',
 'sinal_mmaing_consec_ivas',
 's