# AWS Glue Studio Notebook
##### You are now running a AWS Glue Studio notebook; To start using your notebook you need to start an AWS Glue Interactive Session.


#### Optional: Run this cell to see available notebook commands ("magics").


In [None]:
%help

####  Run this cell to set up and start your interactive session.


In [None]:
%idle_timeout 2880
%glue_version 4.0
%worker_type G.1X
%number_of_workers 2

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

In [15]:
import boto3
from pyspark.sql.functions import rand
from awsglue.dynamicframe import DynamicFrame

account_id = boto3.client('sts').get_caller_identity(
                              )["Account"]
s3_bucket = f"glue-recipes-{account_id}"
s3_path = f"s3://{s3_bucket}/pushdown_recipe_table/"
database = "default"
table_name = "pushdown_recipe_table"

df = spark.range(1 << 8).withColumn("value", rand())
sink = glueContext.getSink(connection_type="s3",
          path=s3_path, enableUpdateCatalog=True,
          updateBehavior="UPDATE_IN_DATABASE",
          partitionKeys=["id"])
sink.setFormat("avro")
sink.setCatalogInfo(catalogDatabase=database, catalogTableName=table_name)
sink.writeFrame(DynamicFrame.fromDF(df, glueContext))

<awsglue.dynamicframe.DynamicFrame object at 0x7ff9290ab670>


In [16]:
# Number of rows in the table
glueContext.create_dynamic_frame.from_catalog(
      database=database,
      table_name=table_name).count()

1024


In [17]:
# Rows in specific partitions
glueContext.create_dynamic_frame.from_catalog(
    database=database, table_name=table_name,
    push_down_predicate="id in (3, 6, 9)",
    additional_options={
           "catalogPartitionPredicate":"id < 10"
    }
).count()

12


In [18]:
# Same query using SparkSQL and automatic pushdown
spark.sql(f"SELECT * FROM {database}.{table_name}"
           " WHERE id IN (3, 6, 9)").count()

12


In [19]:
boto3.client('glue').delete_table( 
  DatabaseName=database,Name=table_name)

{'ResponseMetadata': {'RequestId': 'ea16bb4f-c0d4-476b-9cbb-351d37e0b622', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 16 May 2024 13:05:26 GMT', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'connection': 'keep-alive', 'x-amzn-requestid': 'ea16bb4f-c0d4-476b-9cbb-351d37e0b622'}, 'RetryAttempts': 0}}
