In [None]:
# ==================================================================================================================
# Import bibliotecas
from monitoramento_qualidadde_dados import generate_completeness_report, generate_uniqueness_report, generate_summary_report
from extracao_dados import extract_postgres_data, extract_csv_data_from_drive, transform_data, load_data_to_postgres
from datetime import datetime
import credenciais
import pandas as pd
import os

# ==================================================================================================================
# Leitura dos dados 
# Tabelas do banco de dados SQL
customer_customer_demo = extract_postgres_data("customer_customer_demo")
customer_demographics = extract_postgres_data("customer_demographics")
employee_territories = extract_postgres_data("employee_territories")
orders = extract_postgres_data("orders")
customers = extract_postgres_data("customers")
products = extract_postgres_data("products")
shippers = extract_postgres_data("shippers")
suppliers = extract_postgres_data("suppliers")
territories = extract_postgres_data("territories")
us_states = extract_postgres_data("us_states")
categories = extract_postgres_data("categories")
region = extract_postgres_data("region")
employees = extract_postgres_data("employees")

# Extraindo dados do Google Drive
drive_url = os.environ['CSV_URL']
order_details = extract_csv_data_from_drive(drive_url)

# ==================================================================================================================
# Tratamento dos dados
# Listas com campos de datas das tabelas
no_date = ''
date_orders = ['order_date', 'required_date', 'shipped_date']
date_employees = ['birth_date', 'hire_date']

# Transformação de dados atraves da função transform_data
customer_customer_demo = transform_data(customer_customer_demo, no_date)
customer_demographics = transform_data(customer_demographics, no_date)
employee_territories = transform_data(employee_territories, no_date)
orders = transform_data(orders, date_orders)
customers = transform_data(customers, no_date)
products = transform_data(products, no_date)
shippers = transform_data(shippers, no_date)
suppliers = transform_data(suppliers, no_date)
territories = transform_data(territories, no_date)
us_states = transform_data(us_states, no_date)
categories = transform_data(categories, no_date)
region = transform_data(region, no_date)
employees = transform_data(employees, date_employees)
order_details = transform_data(order_details, no_date)

# Tratamento detalhado do csv order_details para insert incremental
# Converter os tipos de dados
order_details['order_id'] = order_details['order_id'].astype(str)
order_details['product_id'] = order_details['product_id'].astype('int16')
order_details['unit_price'] = order_details['unit_price'].astype(float)
order_details['quantity'] = order_details['quantity'].astype('int16')
order_details['discount'] = order_details['discount'].astype(float)

# ==================================================================================================================
# Envio dos dados para Data Warehouse
# Dicionario das tabelas dimensões
dim_tables = {
  'dim_shippers': {
        'data': shippers,
        'keys': ['shipper_id'],
        'column': None
    },
    'dim_us_states': {
        'data': us_states,
        'keys': ['state_id'],
        'column': None
    },
    'dim_categories': {
        'data': categories,
        'keys': ['category_id'],
        'column': None
    },
    'dim_suppliers': {
        'data': suppliers,
        'keys': ['supplier_id'],
        'column': None
    },
    'dim_customers': {
        'data': customers,
        'keys': ['customer_id'],
        'column': None
    },
    'dim_employees': {
        'data': employees,
        'keys': ['employee_id'],
        'column': None
    },
    'dim_region': {
        'data': region,
        'keys': ['region_id'],
        'column': None
    },
    'dim_customer_demographics': {
        'data': customer_demographics,
        'keys': ['customer_type_id'],
        'column': None
    },
    'dim_territories': {
        'data': territories,
        'keys': ['territory_id'],
        'column': None
    },
    'dim_customer_customer_demo': {
        'data': customer_customer_demo,
        'keys': ['customer_id', 'customer_type_id'],
        'column': None
    },
    'dim_employee_territories': {
        'data': employee_territories,
        'keys': ['territory_id'],
        'column': None
    },
    'dim_products': {
        'data': products,
        'keys': ['product_id'],
        'column': None
    }
}

# Dicionario das tabelas fatos
fact_tables = {
    'fact_orders': {
        'data': orders,
        'keys': ['order_id'],
        'column': None
    },
    'fact_order_details': {
        'data': order_details,
        'keys': None,
        'column': ['order_id', 'product_id', 'unit_price', 'quantity', 'discount']
    }
}

# Inicialização da lista de relatórios de monitoramento e qualidade dos dados
completeness_reports = [] # Lista relatorio de Complitude
uniqueness_reports = [] # Lista relatorio de Unicidade
summary_reports = [] # Lista relatorio de Resumo dos dados

# Carregar as tabelas de dimensão no Data Warehouse
for table_name, table_info in dim_tables.items():
    df = table_info['data']
    unique_keys = table_info['keys']
    column_key = table_info['column']
    load_data_to_postgres(df, table_name, unique_keys)

    # Gerar relatório de completude
    completeness_report = generate_completeness_report(df, table_name)
    completeness_reports.append(completeness_report)

    # Gerar relatório de unicidade
    uniqueness_report = generate_uniqueness_report(df, table_name, unique_keys)
    uniqueness_reports.append(uniqueness_report)

    # Gerar relatório de resumo
    summary_report = generate_summary_report(df, table_name)
    summary_reports.append(summary_report)

# Carregar as tabelas de fato no Data Warehouse
for table_name, table_info in fact_tables.items():
    df = table_info['data']
    unique_keys = table_info['keys']
    column_key = table_info['column']
    load_data_to_postgres(df, table_name, unique_keys, column_key)

    # Gerar relatório de completude
    completeness_report = generate_completeness_report(df, table_name)
    completeness_reports.append(completeness_report)

    # Gerar relatório de unicidade (apenas se unique_keys estiver definido)
    if unique_keys:
        uniqueness_report = generate_uniqueness_report(df, table_name, unique_keys)
        uniqueness_reports.append(uniqueness_report)

    # Gerar relatório de resumo
    summary_report = generate_summary_report(df, table_name)
    summary_reports.append(summary_report)

# ==================================================================================================================
# Geração de relatórios de monitoramento e qualidade

# Data do dia 
current_date = datetime.now().strftime("%d_%m_%Y")

# Combinar todos os relatórios de completude em um único DataFrame
completeness_reports_df = pd.concat(completeness_reports, ignore_index=True)

# Combinar todos os relatórios de unicidade em um único DataFrame
uniqueness_reports_df = pd.concat(uniqueness_reports, ignore_index=True)

# Combinar todos os relatórios de resumo em um único DataFrame
summary_reports_df = pd.concat(summary_reports, ignore_index=True)

# Salvar o relatório de completude em um arquivo CSV
completeness_reports_df.to_csv(f'relatorios/completeness_report_{current_date}.csv', index=False)

# Salvar o relatório de unicidade em um arquivo CSV
uniqueness_reports_df.to_csv(f'relatorios/uniqueness_report_{current_date}.csv', index=False)

# Salvar o relatório de resumo em um arquivo CSV
summary_reports_df.to_csv(f'relatorios/summary_report_{current_date}.csv', index=False)