In [1]:
import pandas as pd 
import os
import matplotlib.pyplot as plt 

## Lendo o dataset

In [2]:
YEAR = 2025

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

datalake_dir = os.path.join(project_root, "datalake", "gold")

os.makedirs(datalake_dir, exist_ok=True)

read_path = os.path.join(datalake_dir, f"srag_gold_notebook_{YEAR}.csv") 

df = pd.read_csv(read_path, sep=",") 

df.head()

Unnamed: 0,DT_NOTIFIC,AVE_SUINO,FEBRE,DISPNEIA,DESC_RESP,FATOR_RISC,VACINA,ANTIVIRAL,UTI,EVOLUCAO,SURTO_SG
0,2024-12-29,"Não, nenhum",,Sim,,Sim,Não,Não,Não,Cura,Não
1,2024-12-29,"Não, nenhum",Não,Sim,Sim,,Não,Não,,Cura,Não
2,2024-12-29,"Não, nenhum",Não,Sim,Sim,Sim,Não,Sim,Sim,Óbito,Não
3,2024-12-30,"Não, nenhum",Não,Não,Sim,Sim,Não,Não,Sim,Cura,
4,2024-12-30,Ignorado,Sim,Não,Não,Sim,Sim,Não,Não,Cura,Ignorado


## Definindo uma data de ínicio e final para teste

In [3]:
current_start_date = '2025-01-01'
current_end_date = '2025-12-31' 
previous_start_date = '2024-01-01'
previous_end_date = '2024-12-31'   

start_date = current_start_date 
end_date = current_end_date

## Proporção de casos notificados no dataset

In [4]:
current_event_cases = df[(df['DT_NOTIFIC'] >= current_start_date) & (df['DT_NOTIFIC'] <= current_end_date)].shape[0] 
previous_event_cases = df[(df['DT_NOTIFIC'] >= previous_start_date) & (df['DT_NOTIFIC'] <= previous_end_date)].shape[0] 
        


perc_increase = ((current_event_cases - previous_event_cases) / previous_event_cases) * 100   

df_increase = pd.DataFrame({"Start Date": [current_start_date], 
                            "End Date": [current_end_date],
                            "Current Cases": [current_event_cases],
                            "Previous Cases": [previous_event_cases],
                            "Percentage Increase": [perc_increase]}) 
df_increase 

Unnamed: 0,Start Date,End Date,Current Cases,Previous Cases,Percentage Increase
0,2025-01-01,2025-12-31,171108,184,92893.478261


## Proporção de pacientes que apresentaram contato com Aves ou Suínos

In [5]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

ave_suino_evolution = interval_df[interval_df['AVE_SUINO'].isin(["Sim, aves e/ou suínos", "Sim, outros", "Não, nenhum"])]  

if ave_suino_evolution.empty:
    ave_suino_case = 0
else:
    ave_suino_case = ave_suino_evolution[ave_suino_evolution['AVE_SUINO'].isin(["Sim, aves e/ou suínos", "Sim, outros"])].shape[0]

percentage_ave_suino = (ave_suino_case / ave_suino_evolution.shape[0]) * 100 

df_ave_suino = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [ave_suino_evolution.shape[0]],
                            "Cases with Contact": [ave_suino_case],
                            "Percentage with Contact": [percentage_ave_suino]}) 
df_ave_suino

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Contact,Percentage with Contact
0,2025-01-01,2025-12-31,110186,3173,2.879676


## Porcentagem de pacientes com Febre

In [6]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

febre_evolution = interval_df[interval_df['FEBRE'].isin(["Sim", "Não"])]  

if febre_evolution.empty:
    febre_case = 0
else:
    febre_case = febre_evolution[febre_evolution['FEBRE'] == "Sim"].shape[0]

percentage_febre = (febre_case / febre_evolution.shape[0]) * 100 

df_febre = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [febre_evolution.shape[0]],
                            "Cases with Fever": [febre_case],
                            "Percentage with Fever": [percentage_febre]}) 
df_febre

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Fever,Percentage with Fever
0,2025-01-01,2025-12-31,148539,95955,64.599196


## Porcentagem de pacientes com dispneia

In [7]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

dispneia_evolution = interval_df[interval_df['DISPNEIA'].isin(["Sim", "Não"])]  

if dispneia_evolution.empty:
    dispneia_case = 0
else:
    dispneia_case = dispneia_evolution[dispneia_evolution['DISPNEIA'] == "Sim"].shape[0]

percentage_dispneia = (dispneia_case / dispneia_evolution.shape[0]) * 100 

df_dispneia = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [dispneia_evolution.shape[0]],
                            "Cases with Dispneia": [dispneia_case],
                            "Percentage with Dispneia": [percentage_dispneia]}) 
df_dispneia

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Dispneia,Percentage with Dispneia
0,2025-01-01,2025-12-31,150011,108800,72.528015


## Porcentagem de pessoas com fatores de risco

In [8]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

fator_risc_evolution = interval_df[interval_df['FATOR_RISC'].isin(["Sim", "Não"])]  

if fator_risc_evolution.empty:
    fator_risc_case = 0
else:
    fator_risc_case = fator_risc_evolution[fator_risc_evolution['FATOR_RISC'] == "Sim"].shape[0]

percentage_fator_risc = (fator_risc_case / fator_risc_evolution.shape[0]) * 100 

df_fator_risc = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [fator_risc_evolution.shape[0]],
                            "Cases with Risk Factor": [fator_risc_case],
                            "Percentage with Risk Factor": [percentage_fator_risc]}) 
df_fator_risc

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Risk Factor,Percentage with Risk Factor
0,2025-01-01,2025-12-31,77659,77659,100.0


## Porcentagem de pessoas vacinadas

In [9]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

vacina_evolution = interval_df[interval_df['VACINA'].isin(["Sim", "Não"])]  

if vacina_evolution.empty:
    vacina_case = 0
else:
    vacina_case = vacina_evolution[vacina_evolution['VACINA'] == "Sim"].shape[0]

percentage_vacina = (vacina_case / vacina_evolution.shape[0]) * 100 

df_vacina = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [vacina_evolution.shape[0]],
                            "Cases with Vaccination": [vacina_case],
                            "Percentage with Vaccination": [percentage_vacina]}) 
df_vacina

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Vaccination,Percentage with Vaccination
0,2025-01-01,2025-12-31,149223,48688,32.627678


## Porcentagem de pacientes que estão consumindo antiviral

In [10]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

antiviral_evolution = interval_df[interval_df['ANTIVIRAL'].isin(["Sim", "Não"])]  

if antiviral_evolution.empty:
    antiviral_case = 0
else:
    antiviral_case = antiviral_evolution[antiviral_evolution['ANTIVIRAL'] == "Sim"].shape[0]

percentage_antiviral = (antiviral_case / antiviral_evolution.shape[0]) * 100 

df_antiviral = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [antiviral_evolution.shape[0]],
                            "Cases with Antiviral Treatment": [antiviral_case],
                            "Percentage with Antiviral Treatment": [percentage_antiviral]}) 
df_antiviral

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Antiviral Treatment,Percentage with Antiviral Treatment
0,2025-01-01,2025-12-31,118700,24881,20.961247


## Porcentagem de pacientes na UTI

In [11]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

uti_evolution = interval_df[interval_df['UTI'].isin(["Sim", "Não"])]  

if uti_evolution.empty:
    uti_case = 0
else:
    uti_case = uti_evolution[uti_evolution['UTI'] == "Sim"].shape[0]

percentage_uti = (uti_case / uti_evolution.shape[0]) * 100 

df_uti = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [uti_evolution.shape[0]],
                            "Cases with UTI": [uti_case],
                            "Percentage with UTI": [percentage_uti]}) 
df_uti

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with UTI,Percentage with UTI
0,2025-01-01,2025-12-31,143923,52471,36.457689


## Porcentagem da evolução do paciente

In [12]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

evolucao = interval_df[interval_df['EVOLUCAO'].isin(["Cura", "Óbito"])] 

if evolucao.empty:
    evolucao_case = 0
else:
    evolucao_case = evolucao[evolucao['EVOLUCAO'] == "Óbito"].shape[0]

percentage_evolucao = (evolucao_case / evolucao.shape[0]) * 100 

df_evolucao = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [evolucao.shape[0]],
                            "Cases with Evolução": [evolucao_case],
                            "Percentage of Deaths": [percentage_evolucao]}) 
df_evolucao

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with Evolução,Percentage of Deaths
0,2025-01-01,2025-12-31,134398,15527,11.552999


## Porcentagem de surto de síndrome gripal

In [13]:
interval_df = df[(df['DT_NOTIFIC'] >= start_date) & (df['DT_NOTIFIC'] <= end_date)]

surto_sg_evolution = interval_df[interval_df['SURTO_SG'].isin(["Sim", "Não"])]  

if surto_sg_evolution.empty:
    surto_sg_case = 0
else:
    surto_sg_case = surto_sg_evolution[surto_sg_evolution['SURTO_SG'] == "Sim"].shape[0]
percentage_surto_sg = (surto_sg_case / surto_sg_evolution.shape[0]) * 100
df_surto_sg = pd.DataFrame({"Start Date": [start_date], 
                            "End Date": [end_date],
                            "Total Cases with Info": [surto_sg_evolution.shape[0]],
                            "Cases with SURTO_SG": [surto_sg_case],
                            "Percentage with SURTO_SG": [percentage_surto_sg]}) 
df_surto_sg

Unnamed: 0,Start Date,End Date,Total Cases with Info,Cases with SURTO_SG,Percentage with SURTO_SG
0,2025-01-01,2025-12-31,101694,8758,8.612111
