#### [Boto3 Documentations: GlueDataBrew](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/databrew.html)

In [None]:
import boto3, botocore
from botocore.exceptions import ClientError
from dotenv import load_dotenv
import os, time, json, shutil, subprocess, zipfile
from datetime import date
from pathlib import Path

from misc import load_from_yaml, save_to_yaml
import s3, iam, lf, glue, lambdafn, rds, dynamodb as ddb, eventbridge as event

load_dotenv(".env")

In [None]:
ACCOUNT_ID        = os.environ['AWS_ACCOUNT_ID_ROOT']
REGION            = os.environ['AWS_DEFAULT_REGION']
VPC_ID            = os.environ['AWS_DEFAULT_VPC']
SECURITY_GROUP_ID = os.environ['AWS_DEFAULT_SG_ID']
SUBNET_IDS        = SUBNET_IDS = os.environ["AWS_DEFAULT_SUBNET_IDS"].split(":")
SUBNET_ID         = SUBNET_IDS[0]
print(SUBNET_ID)

In [15]:
sts_client           = boto3.client('sts')
rds_client           = boto3.client('rds')
iam_client           = boto3.client('iam')
s3_client            = boto3.client('s3')
lakeformation_client = boto3.client('lakeformation')
ec2_client           = boto3.client('ec2', region_name=REGION)
ec2_resource         = boto3.resource('ec2', region_name=REGION)
dynamodb_client      = boto3.client('dynamodb')
events_client        = boto3.client('events')
lambda_client        = boto3.client('lambda')
glue_client          = boto3.client('glue')
databrew_client      = boto3.client('databrew')

#### Create IAM Role

- Create aws glue role by the name of `glue_role_name`.
- Assign Power User Access Policy (`PowerUserAccess`) to the role.

In [46]:
GLUE_ROLE_NAME = 'glue-pipeline-role'
DATABREW_ROLE_NAME = 'databrew-pipeline-role'
LFN_ROLE_NAME = 'lfn-pipeline-role'

In [17]:
policy_arns = [
    "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole",
    "arn:aws:iam::aws:policy/CloudWatchFullAccess",
    "arn:aws:iam::aws:policy/AmazonS3FullAccess",
    "arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess",
    # "arn:aws:iam::aws:policy/AdministratorAccess",
    # "arn:aws:iam::aws:policy/PowerUserAccess"
]

In [18]:
assume_role_policy_doc = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "glue.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}
GLUE_ROLE_ARN = iam_client.create_role(
    RoleName=GLUE_ROLE_NAME,
    AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
    Description="Glue Service Role"
)['Role']['Arn']

In [None]:
# Attach AWS managed policy with the role
[iam_client.attach_role_policy(RoleName=GLUE_ROLE_NAME, PolicyArn=parn) for parn in policy_arns]

In [None]:
assume_role_policy_document = {
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "databrew.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

# Create the IAM role with the assume role policy document
DATABREW_ROLE_ARN = iam_client.create_role(
    RoleName=DATABREW_ROLE_NAME,
    AssumeRolePolicyDocument=json.dumps(assume_role_policy_document)
)['Role']['Arn']


policy_arns = [
    'arn:aws:iam::aws:policy/service-role/AWSGlueDataBrewServiceRole',
    'arn:aws:iam::aws:policy/AwsGlueDataBrewFullAccessPolicy',
]

[iam_client.attach_role_policy(RoleName=DATABREW_ROLE_NAME, PolicyArn=policy_arn) for policy_arn in policy_arns]

In [64]:
policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action":[
                "ec2:DescribeSubnets",
                "ec2:DescribeVpcEndpoints",
                "ec2:DescribeRouteTables",
                "ec2:DescribeVpcs",
                "s3:GetObject",
                "s3:ListBucket",
                "glue:*",
                "databrew:*"
            ],
            "Resource": "*"
        }
    ]
}


# Attach the inline policy to the role
response = iam_client.put_role_policy(
    RoleName=DATABREW_ROLE_NAME,
    PolicyName='DescribeVpcNetwarkPolicy',
    PolicyDocument=json.dumps(policy_document)
)

In [22]:
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "lambda.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

# Create the IAM role with the assume role policy document
LFN_ROLE_ARN = iam_client.create_role(
    RoleName=LFN_ROLE_NAME,
    AssumeRolePolicyDocument=json.dumps(assume_role_policy_document)
)['Role']['Arn']


In [None]:
# Attach AWS managed policy with the role
[iam_client.attach_role_policy(RoleName=LFN_ROLE_NAME, PolicyArn=parn) for parn in policy_arns[1:]]

In [None]:
# #### Create IAM Role Policy (SQS, S3, Logs Permissions)
# policy_document = {
#     "Version": "2012-10-17",
#     "Statement": [
#         {
#             "Effect": "Allow",
#             "Action": [
#                 "s3:*",
#                 "s3-object-lambda:*"
#             ],
#             "Resource": "*"
#         },
#         {
#             "Effect": "Allow",
#             "Action": [
#                 "logs:*"
#             ],
#             "Resource": "*"
#         }
#     ]
# }

# policy_name = "s3_logs_policies"

# # Attach the inline policy to the IAM role
# iam_client.put_role_policy(
#     RoleName=LFN_ROLE_NAME,
#     PolicyName=policy_name,
#     PolicyDocument=json.dumps(policy_document)
# )
# print(f"Policy {policy_name} attached to role {LFN_ROLE_NAME}")

#### Create S3 Bucket and Folders

In [25]:
S3_BUCKET_DATALAKE = "httx-datalake-bkt"
S3_BUCKET_GLUE_ASSETS = "httx-glue-assets-bkt"

In [None]:
folders1 = ['raw/employees', 'cleansed/employees']
folders2 = ['temporary', 'sparkHistoryLogs']
folders3 = ['dq', 'output', 'sales']

s3.create_s3_bucket(S3_BUCKET_DATALAKE, folders1+folders3)
s3.create_s3_bucket(S3_BUCKET_GLUE_ASSETS, folders2)

In [None]:
response = s3_client.list_objects_v2(Bucket=S3_BUCKET_DATALAKE)
# print(response)
for obj in response.get('Contents', []):
    print(f'Object: {obj["Key"]}')

#### Create RDS Databases & it's Resources

In [29]:
DB_NAME = 'EmployeeDB'
DB_USERNAME = os.environ['USERNAME']
DB_PASSWORD = os.environ['PASSWORD']
SUBNET_GROUP_NAME = 'httx-rds-subnet-group'

In [None]:
## Create the RDS subnet group
response = rds_client.create_db_subnet_group(
    DBSubnetGroupName=SUBNET_GROUP_NAME,
    DBSubnetGroupDescription='Subnet group for RDS instance',
    SubnetIds=SUBNET_IDS
)
print(response)

In [32]:
instances = [
    {
        'db_instance_identifier': 'httx-rds-mysql',
        'db_name': DB_NAME,
        'db_username': DB_USERNAME,
        'db_password': DB_PASSWORD,
        'engine': 'mysql',
        'port': 3306,
        'engine_version': '8.0.32',
        'db_instance_class': 'db.t3.micro',
        'allocated_storage': 20,
        'availability_zone': 'us-east-1a',
        'tags': [{'Key': 'Project', 'Value': 'glue-rds-Crawler'}],
        'security_group_ids': [SECURITY_GROUP_ID],
        'db_subnet_group_name': SUBNET_GROUP_NAME,
    },
    {
        'db_instance_identifier': 'httx-rds-postgresql',
        'db_name': DB_NAME,
        'db_username': DB_USERNAME,
        'db_password': DB_PASSWORD,
        'port': 5432,
        'engine': 'postgres',
        'engine_version': '14.13',
        'db_instance_class': 'db.t3.micro',
        'allocated_storage': 20,
        'availability_zone': 'us-east-1a',
        'tags': [{'Key': 'Project', 'Value': 'glue-rds-Crawler'}],
        'security_group_ids': [SECURITY_GROUP_ID],
        'db_subnet_group_name': SUBNET_GROUP_NAME,
    },
    {
        'db_instance_identifier': 'httx-rds-mssql',
        'db_name': '',
        'db_username': DB_USERNAME,
        'db_password': DB_PASSWORD,
        'port': 1433,
        'engine': 'sqlserver-ex',
        'engine_version': '15.00.4153.1.v1',
        'db_instance_class': 'db.t3.micro',
        'allocated_storage': 20,
        'availability_zone': 'us-east-1a',
        'tags': [{'Key': 'Project', 'Value': 'glue-rds-Crawler'}],
        'security_group_ids': [SECURITY_GROUP_ID],
        'db_subnet_group_name': SUBNET_GROUP_NAME,
    },
]

In [None]:
rds.create_rds_instance(**instances[0])   # 'httx-rds-mysql'

In [None]:
# Describe the RDS instance
response = rds_client.describe_db_instances(
    DBInstanceIdentifier=instances[0]['db_instance_identifier']
)

# Extract the instance details
db_instances = response['DBInstances']
if db_instances:
    instance = db_instances[0]
    status = instance['DBInstanceStatus']
    
    if status == 'available':
        mysql_endpoint = instance['Endpoint']['Address']
        print(f"RDS Endpoint: {mysql_endpoint}")
    else:
        print(f"RDS instance is in {status} state, NO ENDPOINT AVAILABLE YET!!")
else:
    print("No RDS instance found.")

-   `Gateway` endpoints serve as a target for a route in your route table for traffic destined for the service.

In [None]:
# VPC Endpoint parameters
VPC_ENDPOINT_TAG = 'rds-vpc-endpoint' + date.today().strftime('%Y%m%d')
VPC_ENDPOINT_SERVICE_NAME = f"com.amazonaws.{REGION}.s3"
SECURITY_GROUP_IDS = [SECURITY_GROUP_ID]  # Security group(s) associated with the endpoint
ROUTE_TABLE_IDS = ['rtb-0ec4311296ec952f8']

# Create an Interface Endpoint
VPC_ENDPOINT_ID = ec2_client.create_vpc_endpoint(
    VpcEndpointType='Gateway',
    VpcId=VPC_ID,
    ServiceName=VPC_ENDPOINT_SERVICE_NAME,
    RouteTableIds=ROUTE_TABLE_IDS,
    # SubnetIds=sg_id,
    # SecurityGroupIds=security_group_ids,
    PrivateDnsEnabled=False  # Enable private DNS to resolve service names within the VPC
)['VpcEndpoint']['VpcEndpointId']

In [None]:
vpc_endpoints = ec2_client.describe_vpc_endpoints(
    Filters=[
        {'Name': 'vpc-id', 'Values': [VPC_ID]},
        {'Name': 'service-name', 'Values': [VPC_ENDPOINT_SERVICE_NAME]}
    ]
)
print(vpc_endpoints['VpcEndpoints'][0]['VpcEndpointId'])

In [None]:
ec2_client.create_tags(Resources=[VPC_ENDPOINT_ID],Tags=[{'Key': 'Name', 'Value': VPC_ENDPOINT_TAG}])

###### Load sql data from Local Machine to RDS Instance

-   Load into MySQL (TESTED):

    -   `$ mysql -h <rds-endpoint> -p <port> -U <username> -d <dbname>` -> Connect via Command Line if needed
    -   `$ mysql -h {mysql_endpoint} -P {mysql_port} -u httxadmin -p'{DB_PASSWORD}' interview_questions < /Users/am/mydocs/Software_Development/Web_Development/aws/aws_rds/interview_questions.sql`

In [None]:
# ! mysql -h {mysql_endpoint} -P {instances[0]['port']} -u {DB_USERNAME} -p'{DB_PASSWORD}' {DB_NAME} < ./glue_etl_pipelines/glue_etl_pipeline/mysql_employees.sql

#### Create Glue Catalog Database

In [43]:
CATALOG_DB_NAME = 'httx-catalog-db'

In [None]:
## Example usage
DATALAKE_LOCATION_URI = f"s3://{S3_BUCKET_DATALAKE}"

create_database_response = glue_client.create_database(
    CatalogId=ACCOUNT_ID,
    DatabaseInput={
        'Name': CATALOG_DB_NAME,
        'Description': 'A Multi-purpose Database',
        'LocationUri': DATALAKE_LOCATION_URI,
    }
)
print(create_database_response)

- Grant `CREATE_TABLE` permission on `Catalog DB` to `glue_role_name`.

In [45]:
# Arn for glue_role_name
lf_principle = GLUE_ROLE_ARN

# Grant 'CREATE_TABLE' LF Permission to `glue_role_name` Role
response = lakeformation_client.grant_permissions(
    Principal={
        'DataLakePrincipalIdentifier': lf_principle
    },
    Resource={
        'Database': {
            'Name': CATALOG_DB_NAME
        }
    },
    Permissions=['CREATE_TABLE', 'DROP'],
    PermissionsWithGrantOption=[]
)

- Grant `SELECT` permission on `Catalog DB` to `DATABREW_ROLE_NAME`.

In [47]:
lf_principle = DATABREW_ROLE_ARN
response = lakeformation_client.grant_permissions(
    Principal={
        'DataLakePrincipalIdentifier': lf_principle
    },
    Resource={
        'Table': {
            'DatabaseName': f"{CATALOG_DB_NAME}",
            'TableWildcard': {}
        }
    },
    Permissions=['ALL'],
    PermissionsWithGrantOption=[]
)

In [None]:
# lf.grant_table_level_permissions(GLUE_ROLE_ARN, CATALOG_DB_NAME, 'employees', ['DROP'])

In [None]:
# glue_client.update_database(
#     CatalogId=ACCOUNT_ID,
#     Name=CATALOG_DB_NAME,
#     DatabaseInput={
#         'Name': CATALOG_DB_NAME,
#         'UseOnlyIamAccessControl': False
#     }
# )
# lf.register_s3_path_as_data_lake_location(LFDB_LOCATION_URI)

#### Create Glue Resources

**Crawler-0**(Sources): Wait for RDS instance come into AVAILABE State

In [None]:
MYSQL_CONNECTION_NAME = 'mysql_connection'
MYSQL_CRAWLER_NAME = "httx-mysqlcrawler"
mysql_endpoint = rds.get_rds_endpoint(instances[0]['db_instance_identifier'])
mysql_connection_url = f"jdbc:mysql://{mysql_endpoint}:{instances[0]['port']}/{instances[0]['db_name']}"
RDS_CRAWLER_TARGET_PATH = f"{instances[0]['db_name']}/Employee"
SOURCE_TABLE_PREFIX = "src_"

In [49]:
TEM_DIR = f"s3://{S3_BUCKET_GLUE_ASSETS}/temporary/"
SPARK_EVENT_LOG_PATH = f"s3://{S3_BUCKET_GLUE_ASSETS}/sparkHistoryLogs/"

In [None]:
glue.create_glue_connection(
    MYSQL_CONNECTION_NAME, 
    mysql_connection_url, 
    DB_USERNAME, 
    DB_PASSWORD, 
    SECURITY_GROUP_ID, 
    SUBNET_ID, 
    REGION
)

In [None]:
glue.create_glue_jdbc_crawler(
    MYSQL_CRAWLER_NAME, 
    MYSQL_CONNECTION_NAME, 
    GLUE_ROLE_ARN, 
    CATALOG_DB_NAME, 
    RDS_CRAWLER_TARGET_PATH, 
    table_prefix=SOURCE_TABLE_PREFIX
)

In [None]:
glue_client.start_crawler(Name=MYSQL_CRAWLER_NAME)

In [None]:
# lf.grant_table_level_permissions(
#     GLUE_ROLE_ARN, 
#     CATALOG_DB_NAME, 
#     f"{SOURCE_TABLE_PREFIX}{DB_NAME}_employee", 
#     ['SELECT']
# )

### [AWS Tutorials - Working with AWS Glue DataBrew](https://www.youtube.com/watch?v=rHRppriCGvg&list=PLO95rE9ahzRsdzmZ_ZT-3uOn1Nh2eEpWB&index=22)

- [lab](https://aws-dojo.com/ws32/labs/)

- Enable Data API on RDS Instance
- To enable the Data API for an AWS RDS MySQL instance, you need to use boto3 to modify the instance settings. The Data API is a feature specifically available for Amazon Aurora Serverless v1 clusters. If you're using MySQL on a standard RDS instance, the Data API is not supported.
- The primary purpose of enabling Data API is to allow developers to run SQL queries on Amazon Aurora Serverless v1 databases without needing a persistent database connection. Instead, it uses HTTPS requests via an API endpoint, enabling serverless and lightweight interactions.
- `FAILED`: Failed with RDS Data Sources
- `SUCCEDED`: Succeed with S3 Data Sources

In [None]:
S3_SALES_CRAWLER_NAME = "httx-s3_raw_crawler"
S3_CRAWLER_TARGET_PATH = f"s3://{S3_BUCKET_DATALAKE}/{'sales'}"

sales_data_file = os.environ['DATA']+'/sales.csv'  # The local file you want to upload
object_name1 = f"sales/sales.csv"                  # The name to save the file as in the S3 bucket
s3.upload_file_to_s3(S3_BUCKET_DATALAKE, sales_data_file, object_name1)
glue.create_glue_s3_crawler(
    S3_SALES_CRAWLER_NAME,
    GLUE_ROLE_ARN,
    CATALOG_DB_NAME,
    S3_CRAWLER_TARGET_PATH,
    table_prefix="raw_"
)
glue_client.start_crawler(Name=S3_SALES_CRAWLER_NAME)

In [None]:
TABLE_NAME = "raw_sales"
# TABLE_NAME = f"{SOURCE_TABLE_PREFIX}{DB_NAME}_employee"
DATABREW_PROJECT_NAME = 'httx-databrew-project'  # (Optional: define a DataBrew project if you want)
DATASET_NAME = 'databrew-sales-dataset'
RECIPE_NAME = 'httx-databrew-project-recipe'
DATABREW_JOB_NAME = 'httx-databrew-job'
OUTPUT_S3_LOCATION = f"s3://{S3_BUCKET_DATALAKE}/sales/sales" # S3 path where the results will be stored
print(OUTPUT_S3_LOCATION)

In [None]:
# SUCCESS
response = databrew_client.create_dataset(
    Name=DATASET_NAME,  # The name of the dataset in DataBrew
    Input={
        'DataCatalogInputDefinition': {  # Correct parameter for Glue Data Catalog
            'DatabaseName': CATALOG_DB_NAME,
            'TableName': TABLE_NAME
        }
    },
    # Format='CSV',  # Output format, e.g., 'CSV', 'PARQUET'
    # FormatOptions={
    #     'Csv': {
    #         'Delimiter': ',',          # Field delimiter
    #         'HeaderRow': True          # Boolean indicating the first row is a header
    #     }
    # },
    # Tags={
    #     'Project': 'Sales_Dataset'  # Optional: Add tags for resource management
    # }
)
print(response)

In [None]:
response = databrew_client.create_project(
    Name=DATABREW_PROJECT_NAME,
    DatasetName=DATASET_NAME,  # The dataset you created earlier
    RoleArn=DATABREW_ROLE_ARN,  # IAM role that has permissions to access Glue Data Catalog and DataBrew
    RecipeName=RECIPE_NAME  # Optionally, add an existing recipe here (you can leave it out if not needed)
)
print(response)

In [None]:
# Start the project session
databrew_client.start_project_session(Name=DATABREW_PROJECT_NAME)

In [None]:
databrew_client.describe_project(Name=DATABREW_PROJECT_NAME)

In [None]:
# FAILED
recipe_steps = [
    {
        'Action': {
            'Operation': 'CREATE_COLUMN',  # Correct operation to create a new column
            'Parameters': {
                'ColumnName': 'full_name',  # Name of the new column
                'Expression': 'concat(first_name, " ", last_name)'  # Valid DataBrew expression
            }
        }
    }
]

response = databrew_client.create_recipe(
    Name=RECIPE_NAME,  # Replace with your recipe name
    Steps=recipe_steps,
    Description='A recipe to add a full_name column by concatenating first_name and last_name'
)

print("Recipe created successfully:", response)


In [None]:
response = databrew_client.create_recipe_job(
    Name=DATABREW_JOB_NAME,              # The name of the job
    RoleArn=GLUE_ROLE_ARN,      # IAM role to run the job
    DatasetName=DATASET_NAME,   # The dataset to use
    RecipeName=RECIPE_NAME,     # The recipe to apply to the dataset
    Output={
        'S3': {
            'Location': OUTPUT_S3_LOCATION,     # Output location in S3
            'Format': 'CSV'         # Output file format (e.g., CSV)
        }
    },
    MaxCapacity=2,  # Optional: Adjust the job capacity (resources) for the job
    Timeout=60  # Optional: Timeout in minutes (default is 60 minutes)
)
print(response)

In [None]:
# Start the created DataBrew job
response = databrew_client.start_job_run(
    Name=DATABREW_JOB_NAME  # The name of the job to start
)
print(response)

In [None]:
response = databrew_client.describe_job_run(
    Name=DATABREW_JOB_NAME,  # The name of the DataBrew job
    RunId=run_id  # The job run ID
)

### [AWS Tutorials - AWS Glue Data Quality - Automated Data Quality Monitoring](https://www.youtube.com/watch?v=mmLijuT2rLE)

In [74]:
S3_SALES_CRAWLER_NAME = "httx-s3_raw_crawler"
S3_CRAWLER_TARGET_PATH = f"s3://{S3_BUCKET_DATALAKE}/{'sales'}"

sales_data_file = os.environ['DATA']+'/sales.csv'  # The local file you want to upload
object_name1 = f"sales/sales.csv"                  # The name to save the file as in the S3 bucket

In [None]:
s3.upload_file_to_s3(S3_BUCKET_DATALAKE, sales_data_file, object_name1)

In [None]:
glue.create_glue_s3_crawler(
    S3_SALES_CRAWLER_NAME,
    GLUE_ROLE_ARN,
    CATALOG_DB_NAME,
    S3_CRAWLER_TARGET_PATH,
    table_prefix="raw_"
)

In [None]:
glue_client.start_crawler(Name=S3_SALES_CRAWLER_NAME)

In [None]:
response = glue_client.start_data_quality_rule_recommendation_run(
    DataSource={
        'GlueTable': {
            'DatabaseName': CATALOG_DB_NAME,
            'TableName': 'raw_customers'
        }
    },
    Role=GLUE_ROLE_NAME,
    NumberOfWorkers=2,
    Timeout=123,
    CreatedRulesetName='customers_dq_ruleset',
    # DataQualitySecurityConfiguration='string',
    # ClientToken='string'
)

print(response)

### Delete Resources

In [None]:
glue_client.delete_database(CatalogId=ACCOUNT_ID,Name=CATALOG_DB_NAME)

In [None]:
s3 = boto3.resource('s3')
bucket1 = s3.Bucket(S3_BUCKET_DATALAKE)
bucket2 = s3.Bucket(S3_BUCKET_GLUE_ASSETS)

# Delete all objects in the bucket
bucket1.objects.all().delete()
bucket2.objects.all().delete()

# Delete all object versions (if versioning is enabled)
# bucket1.object_versions.all().delete()
# bucket2.object_versions.all().delete()

# Finally, delete the bucket
bucket1.delete()
bucket2.delete()

In [None]:
rds_client.delete_db_subnet_group(DBSubnetGroupName=SUBNET_GROUP_NAME)
ec2_client.delete_vpc_endpoints(VpcEndpointIds=[VPC_ENDPOINT_ID])

In [None]:
rds.delete_rds_instance(instances[0]['db_instance_identifier'])

In [None]:
glue_client.delete_connection(ConnectionName=MYSQL_CONNECTION_NAME)
glue_client.delete_crawler(Name=MYSQL_CRAWLER_NAME)
glue_client.delete_crawler(Name=S3_RAW_CRAWLER_NAME)

In [None]:
dynamodb_client.delete_table(TableName=config_table_name)

In [None]:
lambda_client.delete_function(FunctionName=LFN_NAME)

In [None]:
# List all rules associated with the given prefix
rules = events_client.list_rules(NamePrefix="httx")['Rules']

# List all targates associated with each rule
targets_list = [events_client.list_targets_by_rule(Rule=rule['Name'])['Targets'] for rule in rules]

# Remove all targets associated with each rule
[events_client.remove_targets(Rule=rule['Name'], Ids=[target['Id'] for target in targets]) for rule, targets, in zip(rules, targets_list)]

# Delete all rules
[events_client.delete_rule(Name=rule['Name']) for rule in rules]

In [None]:
databrew_client.delete_project(Name=DATABREW_PROJECT_NAME)
databrew_client.delete_dataset(Name=DATASET_NAME)

In [90]:
## DELETE IAM ROLE AT THE END AFTER DELETING ALL OTHER RESOURCES.
iam.delete_iam_role(LFN_ROLE_NAME)


Detached policy arn:aws:iam::aws:policy/CloudWatchFullAccess from role lfn-pipeline-role
Detached policy arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess from role lfn-pipeline-role
Detached policy arn:aws:iam::aws:policy/AmazonS3FullAccess from role lfn-pipeline-role
Deleted role lfn-pipeline-role


In [None]:
iam.delete_iam_role(DATABREW_ROLE_ARN)
iam.delete_iam_role(GLUE_ROLE_NAME)
