# ETL Silver -> Gold Layer
## Data Warehouse Dengue 2025 - Star Schema
### Padrao: Nomenclatura corporativa 3 letras UPPERCASE

In [25]:
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import execute_values
from datetime import datetime, date
import warnings
warnings.filterwarnings('ignore')

DB_CONFIG = {
    'host': 'localhost',
    'port': 5432,
    'database': 'gis',
    'user': 'postgres',
    'password': 'postgres'
}

BATCH_SIZE = 50_000
print("Setup concluido")

Setup concluido


## 1. Conexao e Verificacao Silver Layer

In [26]:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()

cursor.execute("""
    SELECT COUNT(*) as total,
           MIN(data_notificacao) as data_min,
           MAX(data_notificacao) as data_max,
           COUNT(DISTINCT uf_sigla) as qtd_ufs
    FROM silver.dengue_silver
""")
result = cursor.fetchone()
total_silver = result[0]

print("Silver Layer - Resumo:")
print(f"   Total: {total_silver:,}")
print(f"   Periodo: {result[1]} a {result[2]}")
print(f"   UFs: {result[3]}")

Silver Layer - Resumo:
   Total: 1,661,634
   Periodo: 2024-12-29 a 2026-01-05
   UFs: 27


## 2. Carregar Dados Silver

In [27]:
query = """
SELECT 
    id_notificacao, uf_sigla, data_notificacao, data_sintomas,
    idade_anos, faixa_etaria, sexo_desc, raca_desc,
    qtd_sintomas, qtd_alarmes, classificacao_desc, evolucao_desc,
    fl_confirmado, fl_grave, fl_obito, fl_hospitalizado
FROM silver.dengue_silver
"""

print("Carregando dados...")
df_silver = pd.read_sql(query, conn)
print(f"Carregados {len(df_silver):,} registros")

Carregando dados...
Carregados 1,661,634 registros


## 3. Construcao das Dimensoes

In [28]:
# DIM_TMP - Dimensao Tempo
DIAS_SEMANA = {0: 'Segunda', 1: 'Terca', 2: 'Quarta', 3: 'Quinta', 4: 'Sexta', 5: 'Sabado', 6: 'Domingo'}

datas_notif = pd.to_datetime(df_silver['data_notificacao'].dropna().unique())
datas_sint = pd.to_datetime(df_silver['data_sintomas'].dropna().unique())
datas_unicas = pd.Series(list(set(datas_notif) | set(datas_sint))).dropna().unique()

dim_tempo_data = []
for i, d in enumerate(sorted(pd.to_datetime(datas_unicas))):
    dim_tempo_data.append({
        'TMP_SRK': i + 1,
        'DAT_COM': d.date(),
        'NUM_ANO': d.year,
        'NUM_MES': d.month,
        'NUM_DIA': d.day,
        'NUM_TRI': (d.month - 1) // 3 + 1,
        'NUM_SEM_EPI': d.isocalendar()[1],
        'NUM_DIA_SEM': d.dayofweek + 1,
        'NOM_DIA': DIAS_SEMANA[d.dayofweek],
        'IND_FDS': 1 if d.dayofweek >= 5 else 0,
        'DES_MES_ANO': f"{d.year}-{d.month:02d}",
        'DES_ANO_TRI': f"{d.year}-Q{(d.month - 1) // 3 + 1}"
    })

df_dim_tempo = pd.DataFrame(dim_tempo_data)
print(f"DIM_TMP: {len(df_dim_tempo)} registros")

DIM_TMP: 373 registros


In [29]:
# DIM_LOC - Dimensao Localizacao
UFS_INFO = {
    'AC': ('Acre', 'Norte', 12), 'AL': ('Alagoas', 'Nordeste', 27), 'AP': ('Amapa', 'Norte', 16),
    'AM': ('Amazonas', 'Norte', 13), 'BA': ('Bahia', 'Nordeste', 29), 'CE': ('Ceara', 'Nordeste', 23),
    'DF': ('Distrito Federal', 'Centro-Oeste', 53), 'ES': ('Espirito Santo', 'Sudeste', 32),
    'GO': ('Goias', 'Centro-Oeste', 52), 'MA': ('Maranhao', 'Nordeste', 21), 'MT': ('Mato Grosso', 'Centro-Oeste', 51),
    'MS': ('Mato Grosso do Sul', 'Centro-Oeste', 50), 'MG': ('Minas Gerais', 'Sudeste', 31),
    'PA': ('Para', 'Norte', 15), 'PB': ('Paraiba', 'Nordeste', 25), 'PR': ('Parana', 'Sul', 41),
    'PE': ('Pernambuco', 'Nordeste', 26), 'PI': ('Piaui', 'Nordeste', 22), 'RJ': ('Rio de Janeiro', 'Sudeste', 33),
    'RN': ('Rio Grande do Norte', 'Nordeste', 24), 'RS': ('Rio Grande do Sul', 'Sul', 43),
    'RO': ('Rondonia', 'Norte', 11), 'RR': ('Roraima', 'Norte', 14), 'SC': ('Santa Catarina', 'Sul', 42),
    'SP': ('Sao Paulo', 'Sudeste', 35), 'SE': ('Sergipe', 'Nordeste', 28), 'TO': ('Tocantins', 'Norte', 17)
}

ufs_unicas = df_silver['uf_sigla'].dropna().unique()
dim_loc_data = []
for i, uf in enumerate(sorted(ufs_unicas)):
    if uf in UFS_INFO:
        info = UFS_INFO[uf]
        dim_loc_data.append({
            'LOC_SRK': i + 1, 'SIG_UNF': uf, 'NOM_UNF': info[0],
            'NOM_REG': info[1], 'COD_IBG': info[2], 'NOM_CAP': 'N/A'
        })

df_dim_loc = pd.DataFrame(dim_loc_data)
print(f"DIM_LOC: {len(df_dim_loc)} registros")

DIM_LOC: 27 registros


In [30]:
# DIM_PAC - Dimensao Paciente
def faixa_etaria_det(idade):
    if pd.isna(idade): return 'UNKNOWN'
    if idade < 1: return 'Lactente'
    elif idade < 12: return 'Crianca'
    elif idade < 18: return 'Adolescente'
    elif idade < 60: return 'Adulto'
    else: return 'Idoso'

df_silver['DES_FAI_ETA'] = df_silver['faixa_etaria'].fillna('Nao informado')
df_silver['DES_SEX'] = df_silver['sexo_desc'].fillna('UNKNOWN')
df_silver['DES_RAC'] = df_silver['raca_desc'].fillna('UNKNOWN')
df_silver['DES_FAI_ETA_DET'] = df_silver['idade_anos'].apply(faixa_etaria_det)
df_silver['COD_DEM'] = df_silver['DES_FAI_ETA'] + '|' + df_silver['DES_SEX'] + '|' + df_silver['DES_RAC']

df_dim_pac_temp = df_silver[['COD_DEM', 'DES_FAI_ETA', 'DES_SEX', 'DES_RAC', 'DES_FAI_ETA_DET']].drop_duplicates(subset=['COD_DEM']).reset_index(drop=True)
df_dim_pac_temp['PAC_SRK'] = df_dim_pac_temp.index + 1
df_dim_pac = df_dim_pac_temp[['PAC_SRK', 'COD_DEM', 'DES_FAI_ETA', 'DES_SEX', 'DES_RAC', 'DES_FAI_ETA_DET']]
print(f"DIM_PAC: {len(df_dim_pac)} registros")

DIM_PAC: 123 registros


In [31]:
# DIM_CLS - Dimensao Classificacao
CLASSIF_INFO = {
    'Dengue': ('10', 'Confirmado', 'Leve', 'A90', 1),
    'Dengue com Sinais de Alarme': ('11', 'Confirmado', 'Moderado', 'A91', 1),
    'Dengue Grave': ('12', 'Confirmado', 'Grave', 'A91', 1),
    'Inconclusivo': ('8', 'Indeterminado', 'N/A', None, 0),
    'Em investigacao': ('0', 'Em Investigacao', 'N/A', None, 0)
}

classifs = df_silver['classificacao_desc'].dropna().unique()
dim_cls_data = []
for i, c in enumerate(sorted(classifs)):
    info = CLASSIF_INFO.get(c, ('99', 'Outros', 'N/A', None, 0))
    dim_cls_data.append({'CLS_SRK': i+1, 'COD_CLS': info[0], 'DES_CLS': c, 'DES_GRP': info[1], 'DES_GRA': info[2], 'COD_CID': info[3], 'IND_CON': info[4]})

df_dim_cls = pd.DataFrame(dim_cls_data)
print(f"DIM_CLS: {len(df_dim_cls)} registros")

DIM_CLS: 5 registros


In [32]:
# DIM_EVL - Dimensao Evolucao
EVOL_INFO = {
    'Cura': ('1', 'Favoravel', 0, 'Baixa'),
    'Obito pelo agravo': ('2', 'Obito', 1, 'Critica'),
    'Obito por outras causas': ('3', 'Obito', 1, 'Critica'),
    'Obito em investigacao': ('4', 'Obito', 1, 'Critica'),
    'Ignorado': ('9', 'Indeterminado', 0, 'Indeterminada'),
    'Em investigacao': ('0', 'Em Investigacao', 0, 'Indeterminada')
}

evols = df_silver['evolucao_desc'].dropna().unique()
dim_evl_data = []
for i, e in enumerate(sorted(evols)):
    info = EVOL_INFO.get(e, ('99', 'Outros', 0, 'Indeterminada'))
    dim_evl_data.append({'EVL_SRK': i+1, 'COD_EVL': info[0], 'DES_EVL': e, 'TIP_EVL': info[1], 'IND_OBI': info[2], 'DES_GRA_DES': info[3]})

df_dim_evl = pd.DataFrame(dim_evl_data)
print(f"DIM_EVL: {len(df_dim_evl)} registros")

DIM_EVL: 6 registros


In [33]:
# DIM_SNT - Dimensao Sintomas
def fx_sint(q): return 'Nenhum' if pd.isna(q) or q==0 else 'Poucos (1-2)' if q<=2 else 'Moderado (3-5)' if q<=5 else 'Muitos (6+)'
def fx_alr(q): return 'Nenhum' if pd.isna(q) or q==0 else 'Poucos (1-2)' if q<=2 else 'Multiplos (3+)'
def perf_cli(s,a): return 'Assintomatico' if s==0 and a==0 else 'Dengue Classica' if a==0 else 'Dengue com Alarme' if a<=2 else 'Dengue Grave'

df_silver['DES_FAI_SNT'] = df_silver['qtd_sintomas'].apply(fx_sint)
df_silver['DES_FAI_ALR'] = df_silver['qtd_alarmes'].apply(fx_alr)
df_silver['DES_PER_CLI'] = df_silver.apply(lambda x: perf_cli(x['qtd_sintomas'], x['qtd_alarmes']), axis=1)
df_silver['IND_SNT'] = (df_silver['qtd_sintomas'] > 0).astype(int)
df_silver['IND_ALR'] = (df_silver['qtd_alarmes'] > 0).astype(int)
df_silver['COD_SNT'] = df_silver['DES_FAI_SNT'] + '|' + df_silver['DES_FAI_ALR']

df_dim_snt_temp = df_silver[['COD_SNT', 'DES_FAI_SNT', 'DES_FAI_ALR', 'DES_PER_CLI', 'IND_SNT', 'IND_ALR']].drop_duplicates().reset_index(drop=True)
df_dim_snt_temp['SNT_SRK'] = df_dim_snt_temp.index + 1
df_dim_snt = df_dim_snt_temp[['SNT_SRK', 'COD_SNT', 'DES_FAI_SNT', 'DES_FAI_ALR', 'DES_PER_CLI', 'IND_SNT', 'IND_ALR']]
print(f"DIM_SNT: {len(df_dim_snt)} registros")

DIM_SNT: 12 registros


## 4. Criar Schema Gold e Tabelas

In [34]:
# PASSO 1: Criar schema se nao existir
conn.rollback()

cursor.execute("SELECT schema_name FROM information_schema.schemata WHERE schema_name = 'dw'")
if cursor.fetchone() is None:
    print("Criando schema dw...")
    cursor.execute("CREATE SCHEMA dw")
    conn.commit()
    print("Schema dw criado!")
else:
    print("Schema dw ja existe")

Schema dw ja existe


In [35]:
# PASSO 2: Dropar tabelas existentes e recriar
print("Recriando tabelas...")

# Drop todas as tabelas (fato primeiro por causa das FKs)
for t in ['FAT_DEN', 'DIM_TMP', 'DIM_LOC', 'DIM_PAC', 'DIM_CLS', 'DIM_EVL', 'DIM_SNT']:
    cursor.execute(f"DROP TABLE IF EXISTS dw.{t} CASCADE")
conn.commit()

# Criar DIM_TMP
cursor.execute("""
    CREATE TABLE dw.DIM_TMP (
        TMP_SRK BIGINT PRIMARY KEY,
        DAT_COM DATE NOT NULL UNIQUE,
        NUM_ANO INTEGER NOT NULL,
        NUM_MES INTEGER NOT NULL,
        NUM_DIA INTEGER NOT NULL,
        NUM_TRI INTEGER NOT NULL,
        NUM_SEM_EPI INTEGER NOT NULL,
        NUM_DIA_SEM INTEGER NOT NULL,
        NOM_DIA VARCHAR(20) NOT NULL,
        IND_FDS INTEGER NOT NULL,
        DES_MES_ANO VARCHAR(10) NOT NULL,
        DES_ANO_TRI VARCHAR(10) NOT NULL
    )
""")
print("   DIM_TMP criada")

# Criar DIM_LOC
cursor.execute("""
    CREATE TABLE dw.DIM_LOC (
        LOC_SRK BIGINT PRIMARY KEY,
        SIG_UNF CHAR(2) NOT NULL UNIQUE,
        NOM_UNF VARCHAR(50) NOT NULL,
        NOM_REG VARCHAR(20) NOT NULL,
        COD_IBG INTEGER,
        NOM_CAP VARCHAR(50)
    )
""")
print("   DIM_LOC criada")

# Criar DIM_PAC
cursor.execute("""
    CREATE TABLE dw.DIM_PAC (
        PAC_SRK BIGINT PRIMARY KEY,
        COD_DEM VARCHAR(100) NOT NULL UNIQUE,
        DES_FAI_ETA VARCHAR(30) NOT NULL,
        DES_SEX VARCHAR(20) NOT NULL,
        DES_RAC VARCHAR(30) NOT NULL,
        DES_FAI_ETA_DET VARCHAR(50) NOT NULL
    )
""")
print("   DIM_PAC criada")

# Criar DIM_CLS
cursor.execute("""
    CREATE TABLE dw.DIM_CLS (
        CLS_SRK BIGINT PRIMARY KEY,
        COD_CLS VARCHAR(10) NOT NULL,
        DES_CLS VARCHAR(50) NOT NULL UNIQUE,
        DES_GRP VARCHAR(30) NOT NULL,
        DES_GRA VARCHAR(20) NOT NULL,
        COD_CID VARCHAR(10),
        IND_CON INTEGER NOT NULL
    )
""")
print("   DIM_CLS criada")

# Criar DIM_EVL
cursor.execute("""
    CREATE TABLE dw.DIM_EVL (
        EVL_SRK BIGINT PRIMARY KEY,
        COD_EVL VARCHAR(10) NOT NULL,
        DES_EVL VARCHAR(50) NOT NULL UNIQUE,
        TIP_EVL VARCHAR(30) NOT NULL,
        IND_OBI INTEGER NOT NULL,
        DES_GRA_DES VARCHAR(30) NOT NULL
    )
""")
print("   DIM_EVL criada")

# Criar DIM_SNT
cursor.execute("""
    CREATE TABLE dw.DIM_SNT (
        SNT_SRK BIGINT PRIMARY KEY,
        COD_SNT VARCHAR(50) NOT NULL UNIQUE,
        DES_FAI_SNT VARCHAR(20) NOT NULL,
        DES_FAI_ALR VARCHAR(20) NOT NULL,
        DES_PER_CLI VARCHAR(30) NOT NULL,
        IND_SNT INTEGER NOT NULL,
        IND_ALR INTEGER NOT NULL
    )
""")
print("   DIM_SNT criada")

# Criar FAT_DEN
cursor.execute("""
    CREATE TABLE dw.FAT_DEN (
        FAT_SRK BIGINT PRIMARY KEY,
        NUM_NOT BIGINT NOT NULL,
        TMP_SRK BIGINT NOT NULL REFERENCES dw.DIM_TMP(TMP_SRK),
        LOC_SRK BIGINT NOT NULL REFERENCES dw.DIM_LOC(LOC_SRK),
        PAC_SRK BIGINT NOT NULL REFERENCES dw.DIM_PAC(PAC_SRK),
        CLS_SRK BIGINT NOT NULL REFERENCES dw.DIM_CLS(CLS_SRK),
        EVL_SRK BIGINT NOT NULL REFERENCES dw.DIM_EVL(EVL_SRK),
        SNT_SRK BIGINT NOT NULL REFERENCES dw.DIM_SNT(SNT_SRK),
        VAL_CON INTEGER NOT NULL,
        VAL_GRA INTEGER NOT NULL,
        VAL_OBI INTEGER NOT NULL,
        VAL_HOS INTEGER NOT NULL,
        QTD_SNT INTEGER NOT NULL,
        QTD_ALR INTEGER NOT NULL,
        VAL_IDA NUMERIC(5,2),
        DAT_NOT DATE NOT NULL,
        DAT_SNT DATE
    )
""")
print("   FAT_DEN criada")

conn.commit()
print("\nTabelas criadas com sucesso!")

Recriando tabelas...
   DIM_TMP criada
   DIM_LOC criada
   DIM_PAC criada
   DIM_CLS criada
   DIM_EVL criada
   DIM_SNT criada
   FAT_DEN criada

Tabelas criadas com sucesso!


## 5. Carga das Dimensoes

In [36]:
def load_dim(df, table, cols):
    sql = f"INSERT INTO {table} ({','.join(cols)}) VALUES ({','.join(['%s']*len(cols))})"
    data = [tuple(row[c] if pd.notna(row[c]) else None for c in cols) for _, row in df.iterrows()]
    cursor.executemany(sql, data)
    conn.commit()
    return len(data)

print("Carregando dimensoes...")

n = load_dim(df_dim_tempo, 'dw.DIM_TMP', ['TMP_SRK','DAT_COM','NUM_ANO','NUM_MES','NUM_DIA','NUM_TRI','NUM_SEM_EPI','NUM_DIA_SEM','NOM_DIA','IND_FDS','DES_MES_ANO','DES_ANO_TRI'])
print(f"   DIM_TMP: {n}")

n = load_dim(df_dim_loc, 'dw.DIM_LOC', ['LOC_SRK','SIG_UNF','NOM_UNF','NOM_REG','COD_IBG','NOM_CAP'])
print(f"   DIM_LOC: {n}")

n = load_dim(df_dim_pac, 'dw.DIM_PAC', ['PAC_SRK','COD_DEM','DES_FAI_ETA','DES_SEX','DES_RAC','DES_FAI_ETA_DET'])
print(f"   DIM_PAC: {n}")

n = load_dim(df_dim_cls, 'dw.DIM_CLS', ['CLS_SRK','COD_CLS','DES_CLS','DES_GRP','DES_GRA','COD_CID','IND_CON'])
print(f"   DIM_CLS: {n}")

n = load_dim(df_dim_evl, 'dw.DIM_EVL', ['EVL_SRK','COD_EVL','DES_EVL','TIP_EVL','IND_OBI','DES_GRA_DES'])
print(f"   DIM_EVL: {n}")

n = load_dim(df_dim_snt, 'dw.DIM_SNT', ['SNT_SRK','COD_SNT','DES_FAI_SNT','DES_FAI_ALR','DES_PER_CLI','IND_SNT','IND_ALR'])
print(f"   DIM_SNT: {n}")

print("\nDimensoes carregadas!")

Carregando dimensoes...
   DIM_TMP: 373
   DIM_LOC: 27
   DIM_PAC: 123
   DIM_CLS: 5
   DIM_EVL: 6
   DIM_SNT: 12

Dimensoes carregadas!


## 6. Criar Lookups e Carregar Fato

In [37]:
print("Criando lookups...")

cursor.execute("SELECT TMP_SRK, DAT_COM FROM dw.DIM_TMP")
lk_tmp = {row[1]: row[0] for row in cursor.fetchall()}

cursor.execute("SELECT LOC_SRK, SIG_UNF FROM dw.DIM_LOC")
lk_loc = {row[1]: row[0] for row in cursor.fetchall()}

cursor.execute("SELECT PAC_SRK, COD_DEM FROM dw.DIM_PAC")
lk_pac = {row[1]: row[0] for row in cursor.fetchall()}

cursor.execute("SELECT CLS_SRK, DES_CLS FROM dw.DIM_CLS")
lk_cls = {row[1]: row[0] for row in cursor.fetchall()}

cursor.execute("SELECT EVL_SRK, DES_EVL FROM dw.DIM_EVL")
lk_evl = {row[1]: row[0] for row in cursor.fetchall()}

cursor.execute("SELECT SNT_SRK, COD_SNT FROM dw.DIM_SNT")
lk_snt = {row[1]: row[0] for row in cursor.fetchall()}

print(f"   TMP:{len(lk_tmp)} LOC:{len(lk_loc)} PAC:{len(lk_pac)} CLS:{len(lk_cls)} EVL:{len(lk_evl)} SNT:{len(lk_snt)}")

Criando lookups...
   TMP:373 LOC:27 PAC:123 CLS:5 EVL:6 SNT:12


In [38]:
print("Preparando tabela fato...")

df_fato = pd.DataFrame()
df_fato['FAT_SRK'] = range(1, len(df_silver) + 1)
df_fato['NUM_NOT'] = df_silver['id_notificacao']
df_fato['TMP_SRK'] = df_silver['data_notificacao'].apply(lambda x: lk_tmp.get(x, -1) if pd.notna(x) else -1)
df_fato['LOC_SRK'] = df_silver['uf_sigla'].apply(lambda x: lk_loc.get(x, -1) if pd.notna(x) else -1)
df_fato['PAC_SRK'] = df_silver['COD_DEM'].apply(lambda x: lk_pac.get(x, -1) if pd.notna(x) else -1)
df_fato['CLS_SRK'] = df_silver['classificacao_desc'].apply(lambda x: lk_cls.get(x, -1) if pd.notna(x) else -1)
df_fato['EVL_SRK'] = df_silver['evolucao_desc'].apply(lambda x: lk_evl.get(x, -1) if pd.notna(x) else -1)
df_fato['SNT_SRK'] = df_silver['COD_SNT'].apply(lambda x: lk_snt.get(x, -1) if pd.notna(x) else -1)
df_fato['VAL_CON'] = df_silver['fl_confirmado'].fillna(0).astype(int)
df_fato['VAL_GRA'] = df_silver['fl_grave'].fillna(0).astype(int)
df_fato['VAL_OBI'] = df_silver['fl_obito'].fillna(0).astype(int)
df_fato['VAL_HOS'] = df_silver['fl_hospitalizado'].fillna(0).astype(int)
df_fato['QTD_SNT'] = df_silver['qtd_sintomas'].fillna(0).astype(int)
df_fato['QTD_ALR'] = df_silver['qtd_alarmes'].fillna(0).astype(int)
df_fato['VAL_IDA'] = df_silver['idade_anos']
df_fato['DAT_NOT'] = df_silver['data_notificacao']
df_fato['DAT_SNT'] = df_silver['data_sintomas']

print(f"Fato preparada: {len(df_fato):,} registros")

Preparando tabela fato...
Fato preparada: 1,661,634 registros


In [39]:
cols = ['FAT_SRK','NUM_NOT','TMP_SRK','LOC_SRK','PAC_SRK','CLS_SRK','EVL_SRK','SNT_SRK',
        'VAL_CON','VAL_GRA','VAL_OBI','VAL_HOS','QTD_SNT','QTD_ALR','VAL_IDA','DAT_NOT','DAT_SNT']

sql = f"INSERT INTO dw.FAT_DEN ({','.join(cols)}) VALUES %s"
total = len(df_fato)
batches = (total + BATCH_SIZE - 1) // BATCH_SIZE

print(f"Carregando {total:,} registros em {batches} batches...")

for i in range(batches):
    start, end = i * BATCH_SIZE, min((i + 1) * BATCH_SIZE, total)
    batch = df_fato.iloc[start:end]
    data = [tuple(row[c] if pd.notna(row[c]) else None for c in cols) for _, row in batch.iterrows()]
    execute_values(cursor, sql, data)
    conn.commit()
    print(f"   Batch {i+1}/{batches}: {end:,} ({(i+1)/batches*100:.0f}%)")

print(f"\nCarga concluida!")

Carregando 1,661,634 registros em 34 batches...
   Batch 1/34: 50,000 (3%)
   Batch 2/34: 100,000 (6%)
   Batch 3/34: 150,000 (9%)
   Batch 4/34: 200,000 (12%)
   Batch 5/34: 250,000 (15%)
   Batch 6/34: 300,000 (18%)
   Batch 7/34: 350,000 (21%)
   Batch 8/34: 400,000 (24%)
   Batch 9/34: 450,000 (26%)
   Batch 10/34: 500,000 (29%)
   Batch 11/34: 550,000 (32%)
   Batch 12/34: 600,000 (35%)
   Batch 13/34: 650,000 (38%)
   Batch 14/34: 700,000 (41%)
   Batch 15/34: 750,000 (44%)
   Batch 16/34: 800,000 (47%)
   Batch 17/34: 850,000 (50%)
   Batch 18/34: 900,000 (53%)
   Batch 19/34: 950,000 (56%)
   Batch 20/34: 1,000,000 (59%)
   Batch 21/34: 1,050,000 (62%)
   Batch 22/34: 1,100,000 (65%)
   Batch 23/34: 1,150,000 (68%)
   Batch 24/34: 1,200,000 (71%)
   Batch 25/34: 1,250,000 (74%)
   Batch 26/34: 1,300,000 (76%)
   Batch 27/34: 1,350,000 (79%)
   Batch 28/34: 1,400,000 (82%)
   Batch 29/34: 1,450,000 (85%)
   Batch 30/34: 1,500,000 (88%)
   Batch 31/34: 1,550,000 (91%)
   Batch 32

## 7. Validacao Final

In [40]:
print("VALIDACAO FINAL")
print("=" * 50)

cursor.execute("SELECT COUNT(*) FROM dw.FAT_DEN")
total_gold = cursor.fetchone()[0]
print(f"\nSilver: {total_silver:,} | Gold: {total_gold:,} | {'OK' if total_gold==total_silver else 'ERRO'}")

cursor.execute("SELECT SUM(VAL_CON), SUM(VAL_GRA), SUM(VAL_OBI), SUM(VAL_HOS) FROM dw.FAT_DEN")
m = cursor.fetchone()
print(f"\nConfirmados: {m[0]:,} | Graves: {m[1]:,} | Obitos: {m[2]:,} | Hosp: {m[3]:,}")
if m[0] > 0:
    print(f"Taxa letalidade: {m[2]/m[0]*100:.4f}% | Taxa gravidade: {m[1]/m[0]*100:.2f}%")

VALIDACAO FINAL

Silver: 1,661,634 | Gold: 1,661,634 | OK

Confirmados: 1,445,765 | Graves: 37,208 | Obitos: 1,773 | Hosp: 72,684
Taxa letalidade: 0.1226% | Taxa gravidade: 2.57%


In [41]:
print("\nTOP 5 UFs:")
cursor.execute("""
    SELECT l.SIG_UNF, l.NOM_UNF, SUM(f.VAL_CON) as casos, SUM(f.VAL_OBI) as obitos
    FROM dw.FAT_DEN f JOIN dw.DIM_LOC l ON f.LOC_SRK = l.LOC_SRK
    GROUP BY l.SIG_UNF, l.NOM_UNF ORDER BY casos DESC LIMIT 5
""")
for r in cursor.fetchall():
    print(f"   {r[0]} ({r[1]}): {r[2]:,} casos, {r[3]:,} obitos")


TOP 5 UFs:
   SP (Sao Paulo): 876,832 casos, 1,118 obitos
   MG (Minas Gerais): 119,016 casos, 149 obitos
   GO (Goias): 96,685 casos, 105 obitos
   PR (Parana): 92,514 casos, 145 obitos
   RS (Rio Grande do Sul): 44,075 casos, 53 obitos


In [42]:
cursor.close()
conn.close()
print("\n" + "=" * 50)
print("ETL SILVER -> GOLD CONCLUIDO!")
print("=" * 50)


ETL SILVER -> GOLD CONCLUIDO!
