## ETLF Silver para GOLD

Popular o starschema da gold a partir dos dados limpos da silver

## 1. Imports

bibliotecas necessarias para o ETL

In [1]:
import pandas as pd
import psycopg2
from psycopg2.extras import execute_batch
import warnings

warnings.filterwarnings('ignore', message='.*SQLAlchemy.*')

## 2. Conexão com o banco de dados da silver 

In [2]:
DB_CONFIG = {
    'host': 'localhost',
    'port': 5433,
    'database': 'social_media',
    'user': 'postgres',
    'password': 'postgres'
}


## 3. Extração de dados

In [3]:
print("Conectando ao PostgreSQL...")
conn = psycopg2.connect(**DB_CONFIG)
print("Conectado")

# Verificar se a tabela tem dados
cur_check = conn.cursor()
cur_check.execute("SELECT COUNT(*) FROM silver.user")
total_rows = cur_check.fetchone()[0]
cur_check.close()
print(f"\nTotal de registros em silver.user: {total_rows:,}")

if total_rows == 0:
    print("Tabela silver.user vazia")

query = """ 
    SELECT user_id, age, gender, country, urban_rural, income_level, 
        employment_status, relationship_status, exercise_hours_per_week, 
        sleep_hours_per_night, diet_quality, smoking, alcohol_frequency, 
        perceived_stress_score, self_reported_happiness, body_mass_index, 
        blood_pressure_systolic, blood_pressure_diastolic, 
        daily_steps_count, weekly_work_hours, social_events_per_month, 
        books_read_per_year, volunteer_hours_per_month, 
        daily_active_minutes_instagram, reels_watched_per_day, 
        stories_viewed_per_day, ads_viewed_per_day, ads_clicked_per_day, 
        time_on_feed_per_day, time_on_explore_per_day, 
        time_on_reels_per_day, followers_count, following_count, 
        content_type_preference, preferred_content_theme, 
        user_engagement_score
        FROM silver.user
        """

df_Stg = pd.read_sql_query(query,conn)

Conectando ao PostgreSQL...
Conectado

Total de registros em silver.user: 608,923


## 4. Preparação dos cursores

In [4]:
cur = conn.cursor()
print("Cursor criado")

Cursor criado


## 5. Limpeza das tabelas

In [11]:
cur.execute("TRUNCATE TABLE DW.dim_act_inf,DW.dim_hlt_inf,DW.dim_psn_inf,DW.fat_usr CASCADE;")
conn.commit()
print("Tabelas limpas")

Tabelas limpas


## 6. Tabela dimensão de Informações Pessoais

In [13]:
cur.execute("""
    INSERT INTO DW.dim_psn_inf(
    ubr_rrl,icm_lvl,ept_stt,rlt_stt,wkl_wrk_hrs,scl_evt_per_mnt,bok_rad_per_yea,
    vlt_hrs_per_mnt
    )
    SELECT
        urban_rural, income_level, employment_status, relationship_status,weekly_work_hours,social_events_per_month,
        books_read_per_year,volunteer_hours_per_month
    FROM silver.user;
""")
conn.commit()
print("Valores inseridos na tabela dim_psn_inf")


Valores inseridos na tabela dim_psn_inf


## 7. Populando a tabela de Informações de Saúde

In [14]:
cur.execute("""
    INSERT INTO DW.dim_hlt_inf(
    exr_hrs_per_wek,
    slp_hrs_per_ngt,
    dit_qly,
    smk,
    alc_frq,
    pcd_str_scr,
    slf_rpt_hap,
    bdy_mss_idx,
    bld_prs_sys,
    bld_prs_dis,
    dly_stp_cnt 
            )
    SELECT
            exercise_hours_per_week,sleep_hours_per_night, diet_quality, smoking, alcohol_frequency, 
        perceived_stress_score, self_reported_happiness, body_mass_index, 
        blood_pressure_systolic, blood_pressure_diastolic, 
        daily_steps_count
        FROM silver.user;
            """)
conn.commit()
print("Valores inseridos na tabela dim_hlt_inf")

Valores inseridos na tabela dim_hlt_inf


## 8. Populando a tabela de Informações de Conta

In [15]:

cur.execute("""
    INSERT INTO DW.dim_act_inf(
    dly_atv_mnt_itm,
    rls_wtd_per_day,
    ste_vwd_per_day,
    ads_vwd_per_day,
    ads_clc_per_day,
    tme_on_fed_per_day,
    tme_on_exp_per_day,
    tme_on_rls_per_day,
    fls_cnt,
    flg_cnt,
    ctt_typ_pce,
    prd_ctt_thm,
    usr_egm_scr)
            
    SELECT
        daily_active_minutes_instagram, reels_watched_per_day, 
        stories_viewed_per_day, ads_viewed_per_day, ads_clicked_per_day, 
        time_on_feed_per_day, time_on_explore_per_day, 
        time_on_reels_per_day, followers_count, following_count, 
        content_type_preference, preferred_content_theme, 
        user_engagement_score
        FROM silver.user;
            """)
conn.commit()
print("Valores inseridos na tabela dim_act_inf")

Valores inseridos na tabela dim_act_inf


## 9. Populando a tabela fato 

In [10]:
conn.rollback()

In [16]:
cur.execute("""
    INSERT INTO DW.fat_usr (
    srk_act_inf, 
    srk_hlt_inf, 
    srk_psn_inf,
    srk_usr, 
    age, 
    gdr, 
    cty
)
SELECT 
    ROW_NUMBER() OVER() as skr_act_inf,
    ROW_NUMBER() OVER() as skr_hlt_inf,
    ROW_NUMBER() OVER() as skr_psn_ifn,
    user_id, 
    age, 
    gender, 
    country
FROM silver.user;""")
conn.commit()
print("Valores inseridos na tabela fat_usr")

Valores inseridos na tabela fat_usr
