# AWS Glue Studio Notebook
##### You are now running a AWS Glue Studio notebook; To start using your notebook you need to start an AWS Glue Interactive Session.


#### Optional: Run this cell to see available notebook commands ("magics").


In [None]:
%help

####  Run this cell to set up and start your interactive session.


In [1]:
%idle_timeout 2880
%glue_version 5.0
%worker_type G.1X
%number_of_workers 5

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 1.0.8 
Current idle_timeout is None minutes.
idle_timeout has been set to 2880 minutes.
Setting Glue version to: 5.0
Previous worker type: None
Setting new worker type to: G.1X
Previous number of workers: None
Setting new number of workers to: 5
Trying to create a Glue session for the kernel.
Session Type: glueetl
Worker Type: G.1X
Number of Workers: 5
Idle Timeout: 2880
Session ID: 14971ea3-7331-41c0-9dbf-6faf5c3359a8
Applying the following default arguments:
--glue_kernel_version 1.0.8
--enable-glue-datacatalog true
Waiting for session 14971ea3-7331-41c0-9dbf-6faf5c3359a8 to get into ready status...
Session 14971ea3-7331-41c0-9dbf-6faf5c3359a8 ha

In [None]:
# 1. Cargar datos desde el catálogo
products = glueContext.create_dynamic_frame.from_catalog(
    database="ecommerce_central_db",
    table_name="processed_products"
)
purchases = glueContext.create_dynamic_frame.from_catalog(
    database="ecommerce_central_db",
    table_name="processed_purchases"
)

purchase_relations = glueContext.create_dynamic_frame.from_catalog(
    database="ecommerce_central_db",
    table_name="purchase_relations"  # Nombre consistente (sin guión)
)

# 2. Configuración común para Redshift
redshift_config = {
    "catalog_connection": "Redshift connection",
    "redshift_tmp_dir": "s3://ecommerce-data-raw-dataengineer/temp/"
}

try:
    # 3. Cargar en orden lógico (primero dimensiones)
    # 3.1 Tabla de productos
    glueContext.write_dynamic_frame.from_jdbc_conf(
        frame=products,
        connection_options={
            "dbtable": "processed_products",
            "database": "ecommerce"
        },
        **redshift_config
    )
    
    # 3.2 Tabla de compras
    glueContext.write_dynamic_frame.from_jdbc_conf(
        frame=purchases,
        connection_options={
            "dbtable": "processed_purchases",
            "database": "ecommerce"
        },
        **redshift_config
    )
    
    # 3.3 Tabla de relaciones (debe cargarse después)
    glueContext.write_dynamic_frame.from_jdbc_conf(
        frame=purchase_relations,  
        connection_options={
            "dbtable": "purchase_relations",
            "database": "ecommerce"
        },
        **redshift_config
    )
    
    print("Carga completada exitosamente")
    
except Exception as e:
    print(f"Error durante la carga: {str(e)}")
    raise

Carga completada exitosamente
