In [None]:
import boto3
import botocore
from botocore.exceptions import ClientError
import os, time, json, time
from datetime import date

from dotenv import load_dotenv

from misc import load_from_yaml, save_to_yaml
import s3, iam, lf, glue, lambdafn, sns, eventbridge as event

from ads.utils import red

In [None]:
ACCOUNT_ID        = os.environ['AWS_ACCOUNT_ID_ROOT']
REGION            = os.environ['AWS_DEFAULT_REGION']
VPC_ID            = os.environ['AWS_DEFAULT_VPC']
SECURITY_GROUP_ID = os.environ['AWS_DEFAULT_SG_ID']
SUBNET_IDS        = SUBNET_IDS = os.environ["AWS_DEFAULT_SUBNET_IDS"].split(":")
SUBNET_ID         = SUBNET_IDS[0]
print(SUBNET_IDS)

In [None]:
sts_client           = boto3.client('sts')
rds_client           = boto3.client('rds')
iam_client           = boto3.client('iam')
s3_client            = boto3.client('s3')
glue_client          = boto3.client('glue')
lakeformation_client = boto3.client('lakeformation')
ec2_client           = boto3.client('ec2', region_name=REGION)
ec2_resource         = boto3.resource('ec2', region_name=REGION)
dynamodb_client      = boto3.client('dynamodb')
events_client        = boto3.client('events')
lambda_client        = boto3.client('lambda')
sns_client           = boto3.client('sns')
cw_logs_client       = boto3.client('logs')

# Create a CloudWatch client for Logs
logs_client = boto3.client('logs')

### [Knowledge Amplifier: Build Serverless DataLake using Glue , Lambda , Cloudwatch](https://www.youtube.com/watch?v=3f7UY5R9Q9U&t=0s)

<b style="color:green">WORKING AS EXPECTED</b>

<div style="text-align:center" ><img src="./design_diagram.png" width="600" height="300" /></div>

#### Create IAM Role

- Create aws glue role by the name of `glue_role_name`.
- Assign Power User Access Policy (`PowerUserAccess`) to the role.

In [207]:
GLUE_ROLE_NAME = 'glue-pipeline-role'
LFN_ROLE_NAME = 'lfn-pipeline-role'

In [208]:
policy_arns = [
    "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole",
    "arn:aws:iam::aws:policy/CloudWatchFullAccess",
    "arn:aws:iam::aws:policy/AmazonS3FullAccess",
    # "arn:aws:iam::aws:policy/AdministratorAccess",
    # "arn:aws:iam::aws:policy/PowerUserAccess"
]

##### Glue Role

In [202]:
assume_role_policy_doc = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "glue.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}
GLUE_ROLE_ARN = iam_client.create_role(
    RoleName=GLUE_ROLE_NAME,
    AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
    Description="Glue Service Role"
)['Role']['Arn']

In [None]:
# Attach AWS managed policy with the role
[iam_client.attach_role_policy(RoleName=GLUE_ROLE_NAME, PolicyArn=parn) for parn in policy_arns]

In [None]:
# glue_put_event_policy = {
#   "Version": "2012-10-17",
#   "Statement": [
#     {
#       "Effect": "Allow",
#       "Action": [
#         "events:PutEvents"
#       ],
#       "Resource": "*"
#     }
#   ]
# }

# # Attach the inline policy to the IAM role
# iam_client.put_role_policy(
#     RoleName=GLUE_ROLE_NAME,
#     PolicyName="glue_put_event",
#     PolicyDocument=json.dumps(glue_put_event_policy)
# )

##### Lambda Role

In [None]:
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "lambda.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

# Create the IAM role with the assume role policy document
LFN_ROLE_ARN = iam_client.create_role(
    RoleName=LFN_ROLE_NAME,
    AssumeRolePolicyDocument=json.dumps(assume_role_policy_document)
)['Role']['Arn']

In [None]:
[iam_client.attach_role_policy(RoleName=LFN_ROLE_NAME, PolicyArn=parn) for parn in policy_arns]

In [None]:

# #### Create IAM Role Policy (, S3, Logs Permissions)
# policy_document = {
#     "Version": "2012-10-17",
#     "Statement": [
#         {   # StartCrawler permission
#             "Effect": "Allow",
#             "Action": [
#                 "glue:StartCrawler"
#             ],
#             "Resource": f"arn:aws:glue:region:account-id:crawler/*"
#             # "Resource": f"arn:aws:glue:region:account-id:crawler/{crawler-name}"
#         },
#         {
#             "Effect": "Allow",
#             "Action": [
#                 "glue:StartJobRun",
#                 "glue:GetJob",
#                 "glue:GetJobRun"
#             ],
#             "Resource": f"arn:aws:glue:region:account-id:job/*"
#         },
#         {   # s3 full access
#             "Effect": "Allow",
#             "Action": [
#                 "s3:*",
#                 "s3-object-lambda:*"
#             ],
#             "Resource": "*"
#         },
#         {
#             "Effect": "Allow",
#             "Action": [
#                 "logs:*"
#             ],
#             "Resource": "*"
#         }
#     ]
# }

# policy_name = "s3_logs_policies"

# # Attach the inline policy to the IAM role
# iam_client.put_role_policy(
#     RoleName=LFN_ROLE_NAME,
#     PolicyName=policy_name,
#     PolicyDocument=json.dumps(policy_document)
# )
# print(f"Policy {policy_name} attached to role {LFN_ROLE_NAME}")


#### Create S3 Bucket and Folders

In [212]:
S3_BUCKET_DATALAKE = "httx-datalake-bkt"
S3_BUCKET_GLUE_ASSETS = "httx-glue-assets-bkt"

In [None]:
acl = 'public-read'                         # Set the ACL (e.g., 'private', 'public-read')
enable_versioning = False                   # Enable versioning
enable_encryption = False                   # Enable server-side encryption

folders1 = ['raw/customers/', 'processed/customers/']
folders2 = ['temporary/', 'sparkHistoryLogs/']

s3.create_s3_bucket(S3_BUCKET_DATALAKE, folders1)
s3.create_s3_bucket(S3_BUCKET_GLUE_ASSETS, folders2)

#### Create Glue Catalog Database

In [214]:
CATALOG_DB_NAME = 'httx-catalog-db'

In [None]:
## Example usage
DATALAKE_LOCATION_URI = f"s3://{S3_BUCKET_DATALAKE}"

create_database_response = glue_client.create_database(
    CatalogId=ACCOUNT_ID,
    DatabaseInput={
        'Name': CATALOG_DB_NAME,
        'Description': '',
        'LocationUri': DATALAKE_LOCATION_URI,
    }
)
print(create_database_response)

- Grant `CREATE_TABLE` permission to `glue_role_name` on data catalog DB.

In [216]:
# Arn for glue_role_name
lf_principle = GLUE_ROLE_ARN

# Grant 'CREATE_TABLE' LF Permission to `glue_role_name` Role
response = lakeformation_client.grant_permissions(
    Principal={
        'DataLakePrincipalIdentifier': lf_principle
    },
    Resource={
        'Database': {
            'Name': CATALOG_DB_NAME
        }
    },
    Permissions=['CREATE_TABLE', 'DROP'],
    PermissionsWithGrantOption=[]
)

In [84]:
# lf.grant_table_level_permissions(GLUE_ROLE_ARN, CATALOG_DB_NAME, 'employees', ['DROP'])

#### SNS

In [None]:
TOPIC_NAME = 'httx-sns-topic'
JOB_COMPLETE_TOPIC_ARN = sns_client.create_topic(Name=TOPIC_NAME)['TopicArn']

protocol="email"
endpoint="bbcredcap3@gmail.com"

sns_client.subscribe(
    TopicArn=JOB_COMPLETE_TOPIC_ARN,
    Protocol=protocol,
    Endpoint=endpoint
)

#### **Lambda 1**:

In [37]:
# ! source setup.sh

In [230]:
LFN_CRAWLER_NAME = "crawler_triggerer"
zip_file = "./lambdas/lfn1/package.zip"  # Change this to the actual zip file path

# Create Lambda function
with open(zip_file, 'rb') as f:
    zipped_code = f.read()

LFN_CRAWLER_ARN = lambda_client.create_function(
    FunctionName=LFN_CRAWLER_NAME,
    Runtime='python3.9',
    Role=LFN_ROLE_ARN,
    Handler='crawler_triggerer.lambda_handler',
    Code={'ZipFile': zipped_code},
    Timeout=120,
    Environment={
        'Variables': {
            'foo': 'BAR'
        }
    }
)['FunctionArn']

In [None]:
response = lambda_client.add_permission(
    FunctionName=LFN_CRAWLER_NAME,  # Replace with your Lambda function name
    StatementId='s3-invoke-permission',  # An identifier for this statement, unique for each permission you add
    Action='lambda:InvokeFunction',
    Principal='s3.amazonaws.com',
    SourceArn=f"arn:aws:s3:::{S3_BUCKET_DATALAKE}",  # Replace with your S3 bucket ARN
    SourceAccount=ACCOUNT_ID  # Your AWS account ID
)

print("Lambda permission added:", response)


In [None]:
# Add S3 trigger to the Lambda function
response = s3_client.put_bucket_notification_configuration(
    Bucket=S3_BUCKET_DATALAKE,
    NotificationConfiguration={
        'LambdaFunctionConfigurations': [
            {
                'LambdaFunctionArn': LFN_CRAWLER_ARN,
                'Events': [
                    's3:ObjectCreated:*'  # Trigger Lambda on object creation
                ],
                'Filter': {
                    'Key': {
                        'FilterRules': [
                            {
                                'Name': 'prefix',
                                'Value': 'raw/customers/'  # Trigger only on this prefix
                            },
                        ]
                    }
                }
            }
        ]
    }
)

print("S3 bucket notification configuration updated successfully.")


#### **Crawler 1**: Catalog Data from `raw/customers` as a table by the name `raw_customers`

In [None]:
S3_RAW_CRAWLER_NAME = "httx-s3crawler"
S3_RAW_CRAWLER_TARGET = {
    'S3Targets': [{'Path': f"s3://{S3_BUCKET_DATALAKE}/{'raw/customers'}"},]
}
glue.create_glue_crawler(S3_RAW_CRAWLER_NAME, GLUE_ROLE_ARN, CATALOG_DB_NAME, S3_RAW_CRAWLER_TARGET, table_prefix='raw_')

In [234]:
# glue_client.start_crawler(Name=S3_CRAWLER_NAME)

#### **Lambda-2**: It Triggers the Glue Job when executed

In [235]:
# ! source setup.sh

In [236]:
LFN_JOB_TRIGGERER_NAME = "glue_job_triggerer"
zip_file = "./lambdas/lfn2/package.zip"  # Change this to the actual zip file path

# Create Lambda function
with open(zip_file, 'rb') as f:
    zipped_code = f.read()

LFN_JOB_TRIGGERER_ARN = lambda_client.create_function(
    FunctionName=LFN_JOB_TRIGGERER_NAME,
    Runtime='python3.9',
    Role=LFN_ROLE_ARN,
    Handler='glue_job_triggerer.lambda_handler',
    Code={'ZipFile': zipped_code},
    Timeout=120,
    Environment={
        'Variables': {
            'foo': 'BAR'
        }
    }
)['FunctionArn']

#### **Job**: Transforme data from `raw/customers`  and load into `processed/customers`

In [238]:
TEM_DIR = f"s3://{S3_BUCKET_GLUE_ASSETS}/temporary/"
SPARK_EVENT_LOG_PATH = f"s3://{S3_BUCKET_GLUE_ASSETS}/sparkHistoryLogs/"
TARGET = f"s3://{S3_BUCKET_DATALAKE}/processed/customers"

In [None]:
file_name1 = './glue_scripts/jb1_s3csv_s3parquet.py'       # The local file you want to upload
object_name1 = f"glues_scripts/jb1_s3csv_s3parquet.py"     # The name to save the file as in the S3 bucket
s3.upload_file_to_s3(S3_BUCKET_GLUE_ASSETS, file_name1, object_name1)

In [None]:
JOB_NAME = 'jb1_s3csv_s3parquet'
JOB_SCRIPT_LOCATION = f"s3://{S3_BUCKET_GLUE_ASSETS}/{object_name1}"
glue.create_glue_job(JOB_NAME, JOB_SCRIPT_LOCATION, GLUE_ROLE_ARN, TEM_DIR, SPARK_EVENT_LOG_PATH)

In [241]:
# glue.start_glue_job(JOB_NAME1)

##### Parametarization of the Job [`NOT TESTED YET`]

In [None]:
DEFAULT_ARGS = {
    '--TempDir': TEM_DIR,
    '--spark-event-logs-path': SPARK_EVENT_LOG_PATH,
    '--extra-py-files': '',
    '--catalog_db_name': CATALOG_DB_NAME,
    '--table_name': 'raw_customers',
    '--target': TARGET,
}

# Create the Glue job
response = glue_client.create_job(
    Name=JOB_NAME,
    Role=GLUE_ROLE_ARN,
    ExecutionProperty={
        'MaxConcurrentRuns': 1
    },
    Command={
        'Name': 'glueetl',
        'ScriptLocation': JOB_SCRIPT_LOCATION,
        'PythonVersion': '3'
    },
    DefaultArguments=DEFAULT_ARGS,
    MaxRetries=0,
    Timeout=5,  # in minutes, max is 2,880 min (48 Hours)
    GlueVersion='4.0',
    NumberOfWorkers=1,
    WorkerType='G.1X',  # can be 'Standard', 'G.1X', or 'G.2X'    # ExecutionClass='STANDARD',  # Default execution class for Glue jobs (can be 'STANDARD' or 'FLEX')
    # MaxCapacity=10.0,  # Default maximum capacity for the Glue job
)

glue_client.start_job_run(
    JobName=JOB_NAME,
    Arguments={
        '--catalog_db_name': CATALOG_DB_NAME,
        '--table_name': 'raw_customers',
        '--target': TARGET,
    }
)

In [242]:
# ! aws logs tail --follow /aws-glue/jobs --filter-pattern "SUCCEEDED"

#### **Event Rule 1**: It matches "Glue Crawler State Change" pattern with target (LFN_JOB_TRIGGERER_NAME)

-   `Event Source`: AWS Glue Crawler (S3_RAW_CRAWLER_NAME)
-   `Event Type`: "Glue Crawler State Change" (crawler_rule_event_pattern)
-   `Evnet Target`: Lambda Function (LFN_JOB_TRIGGERER)

In [243]:
S3_RAW_CRAWLER_RULE_NAME = 'httx-raw-crawler-rule'

In [244]:
crawler_rule_event_pattern = {
  "source": ["aws.glue"],
  "detail-type": ["Glue Crawler State Change"],
  "detail": {
    "state": ["Succeeded"],
    "crawlerName": [S3_RAW_CRAWLER_NAME]
  }
}

# Create EventBridge Rule to catch Glue Crawler State Change events
S3_RAW_CRAWLER_RULE_ARN = events_client.put_rule(
    Name=S3_RAW_CRAWLER_RULE_NAME,
    EventPattern=json.dumps(crawler_rule_event_pattern),
    State='ENABLED',
    Description='Rule to capture AWS Glue Crawler state changes',
)['RuleArn']

In [None]:
# Attach the Lambda function as a target to the EventBridge Rule
events_client.put_targets(
    Rule=S3_RAW_CRAWLER_RULE_NAME,
    Targets=[{
        'Id': f"{S3_RAW_CRAWLER_RULE_NAME}_sns_topic",
        'Arn': JOB_COMPLETE_TOPIC_ARN
    }]
)

- Event Data sent by 'Crawler state change event' into SNS Topic

```json
{
    "version": "0",
    "id": "f971dd0e-4705-d8ba-f46c-7028e8f5e0ab",
    "detail-type": "Glue Crawler State Change",
    "source": "aws.glue",
    "account": "381492255899",
    "time": "2024-10-20T15:44:53Z",
    "region": "us-east-1",
    "resources": [],
    "detail": {
        "tablesCreated": "1",
        "warningMessage": "N/A",
        "partitionsUpdated": "0",
        "tablesUpdated": "0",
        "message": "Crawler Succeeded",
        "partitionsDeleted": "0",
        "accountId": "381492255899",
        "runningTime (sec)": "26",
        "tablesDeleted": "0",
        "crawlerName": "httx-s3crawler",
        "completionDate": "2024-10-20T15:44:53Z",
        "state": "Succeeded",
        "partitionsCreated": "0",
        "cloudWatchLogLink": "https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws-glue/crawlers;stream=httx-s3crawler"
    }
}
```

In [None]:
# Attach the Lambda function as a target to the EventBridge Rule created earlier
# so that the rule cant trigger the lambda function
events_client.put_targets(
    Rule=S3_RAW_CRAWLER_RULE_NAME,
    Targets=[{
        'Id': f"{S3_RAW_CRAWLER_RULE_NAME}_trigger_lfn",
        'Arn': LFN_JOB_TRIGGERER_ARN
    }]
)

In [None]:
# Grant EventBridge permission to invoke the Lambda function
lambda_client.add_permission(
    FunctionName=LFN_JOB_TRIGGERER_NAME,
    StatementId=f"{S3_RAW_CRAWLER_RULE_NAME}-invoke-permission",
    Action="lambda:InvokeFunction",
    Principal="events.amazonaws.com",
    SourceArn=S3_RAW_CRAWLER_RULE_ARN
)

#### **Event Rule 2**:

-   `Event Source`: AWS Glue Job (JOB_NAME)
-   `Event Type`: "Glue Job State Change" (job_rule_event_pattern)
-   `Evnet Target`: SNS Topic (JOB_COMPLETE_RULE_NAME)

In [None]:
JOB_COMPLETE_RULE_NAME = 'httx-job-complete-rule'
job_rule_event_pattern = {
    "source": ["aws.glue"],
    "detail-type": ["Glue Job State Change"], # Event Type
    "detail": {
        "jobName": [JOB_NAME],
        "state": ["SUCCEEDED"]
    }
}

response = events_client.put_rule(
    Name=JOB_COMPLETE_RULE_NAME,
    EventPattern=json.dumps(job_rule_event_pattern),
    State='ENABLED',
    Description='Rule to capture AWS Glue job state changes',
)

# Attach the Lambda function as a target to the EventBridge Rule
events_client.put_targets(
    Rule=JOB_COMPLETE_RULE_NAME,
    Targets=[{
        'Id': f"{JOB_COMPLETE_RULE_NAME}_target",
        'Arn': JOB_COMPLETE_TOPIC_ARN
    }]
)

- Response sent to SNS topic by "Job Run State Change"

```json
{
    "version": "0",
    "id": "ee14807c-1f89-490e-b4e6-43f071980d95",
    "detail-type": "Glue Job State Change",
    "source": "aws.glue",
    "account": "381492255899",
    "time": "2024-10-20T15:00:35Z",
    "region": "us-east-1",
    "resources": [],
    "detail": {
        "jobName": "jb1_s3csv_s3parquet",
        "severity": "INFO",
        "state": "STOPPED",
        "jobRunId": "jr_d8165d895a8eee53494238118f07659a75b1a0d192048ed8a7a429c9ce176d5c",
        "message": "Job run stopped"
    }
}
```

#### TEST THE PIPELINE

In [None]:
s3_client.delete_object(Bucket=S3_BUCKET_DATALAKE, Key='raw/customers/customers.csv')
# s3_client.delete_object(Bucket=S3_BUCKET_DATALAKE, Key='processed/customers')

In [None]:
AMominNJ_arn = iam_client.get_user(UserName='AMominNJ')['User']['Arn']
lf.grant_table_level_permissions(AMominNJ_arn, CATALOG_DB_NAME, 'raw_customers', ['DROP'])
response = glue_client.delete_table(DatabaseName=CATALOG_DB_NAME,Name='raw_customers')

In [251]:
s3_client.upload_file('./customers.csv', S3_BUCKET_DATALAKE, 'raw/customers/customers.csv')

In [97]:
# !aws events list-rules --name-prefix {JOB_COMPLETE_RULE_NAME}

In [None]:
# Define the log group name and log stream name
log_group_name = '/aws-glue/jobs/logs-v2'
log_stream_name = 'jr_5dc2f650f7eb857c598afcf321d168152de0e80aec69b287340abe3f7bbd4a4e-1'

# Fetch logs
response = cw_logs_client.get_log_events(
    logGroupName=log_group_name,
    logStreamName=log_stream_name,
    startFromHead=False,  # Set to False if you want the latest logs first
    limit = 200
)

# Print the log events
for event in response['events']:
    print(event['message'])


#### **Crawler 2**: NOT TESTED !

Catalog Data from `processed/customer` as a table by the name `processed_customers`

In [None]:
S3_PROCESSED_CRAWLER_NAME = "httx-s3-processed-crawler"
S3_PROCESSED_CRAWLER_TARGET = {
    'S3Targets': [{'Path': f"s3://{S3_BUCKET_DATALAKE}/{'processed/customers'}"},]
}
glue.create_glue_crawler(S3_PROCESSED_CRAWLER_NAME, GLUE_ROLE_ARN, CATALOG_DB_NAME, S3_PROCESSED_CRAWLER_TARGET, table_prefix='processed_')

In [None]:
# glue_client.start_crawler(Name=S3_CRAWLER_NAME)

In [None]:
AMominNJ_arn = iam_client.get_user(UserName='AMominNJ')['User']['Arn']
lf.grant_table_level_permissions(AMominNJ_arn, CATALOG_DB_NAME, 'processed_customers', ['DROP'])

### Delete Resources

In [None]:
glue_client.delete_database(CatalogId=ACCOUNT_ID,Name=CATALOG_DB_NAME)

In [None]:
s3 = boto3.resource('s3')
bucket1 = s3.Bucket(S3_BUCKET_DATALAKE)
bucket2 = s3.Bucket(S3_BUCKET_GLUE_ASSETS)

# Delete all objects in the bucket
bucket1.objects.all().delete()
bucket2.objects.all().delete()

# Delete all object versions (if versioning is enabled)
# bucket1.object_versions.all().delete()
# bucket2.object_versions.all().delete()

# Finally, delete the bucket
bucket1.delete()
bucket2.delete()

In [None]:
glue_client.delete_crawler(Name=S3_RAW_CRAWLER_NAME)
# glue_client.delete_crawler(Name=S3_PROCESSED_CRAWLER_NAME)

In [None]:
glue_client.delete_job(JobName=JOB_NAME)

In [None]:
lambda_client.delete_function(FunctionName=LFN_CRAWLER_NAME)
lambda_client.delete_function(FunctionName=LFN_JOB_TRIGGERER_NAME)

In [None]:
sns.delete_sns_topic(JOB_COMPLETE_TOPIC_ARN)

In [261]:
targets = events_client.list_targets_by_rule(Rule=S3_RAW_CRAWLER_RULE_NAME)['Targets']
events_client.remove_targets(Rule=S3_RAW_CRAWLER_RULE_NAME, Ids=[targets[0]['Id']])
time.sleep(1)
response = events_client.delete_rule(Name=S3_RAW_CRAWLER_RULE_NAME,Force=True)

In [263]:
targets = events_client.list_targets_by_rule(Rule=JOB_COMPLETE_RULE_NAME)['Targets']
events_client.remove_targets(Rule=JOB_COMPLETE_RULE_NAME, Ids=[targets[0]['Id']])
time.sleep(1)
response = events_client.delete_rule(Name=JOB_COMPLETE_RULE_NAME,Force=True)

In [None]:
# response = lambda_client.remove_permission(
#     FunctionName=LFN_CRAWLER_NAME,
#     StatementId='s3-invoke-permission'
# )

In [None]:
## DELETE IAM ROLE AT THE END AFTER DELETING ALL OTHER RESOURCES.
iam.delete_iam_role(GLUE_ROLE_NAME)
iam.delete_iam_role(LFN_ROLE_NAME)

In [None]:
lgroups = [
    "/aws-glue/crawlers",
    "/aws-glue/jobs/error",
    "/aws-glue/jobs/logs-v2",
    "/aws/lambda/crawler_triggerer",
    "/aws/lambda/glue_job_triggerer"
]

# Delete the log group:
[logs_client.delete_log_group(logGroupName=lg) for lg in lgroups]

### Debug

-   <b style="color:red"> Rule-1 triggers the Crawler every about 4 minutes interval !!!!!!!</b>

The issue with your AWS EventBridge rule triggering an infinite Glue Crawler run at approximately 4-minute intervals could be due to how the event pattern is structured and the interaction between the crawler state and the rule. Here's a breakdown of potential reasons for this behavior:

##### 1. **EventBridge Rule Re-triggering Itself**
   - When your crawler finishes (with a "Succeeded" state), the EventBridge rule is triggered because the pattern matches this state. If the triggered action is causing the crawler to run again (directly or indirectly), this can lead to a feedback loop where each successful run triggers the rule, and the rule subsequently re-triggers the crawler.
   - **Solution**: Ensure that the action triggered by the rule (in this case, likely a Lambda function or SNS topic) does not start the same crawler again or trigger something that ultimately starts it.

##### 2. **Overlapping Event Patterns**
   - There might be multiple events from AWS Glue that match the pattern, including internal crawler retries or status updates. Even though you are filtering for `"state": ["Succeeded"]`, AWS Glue may still emit other events that result in the rule firing.
   - **Solution**: You could add more specificity to the `detail` filter. For example, check for additional fields in the event payload to ensure that only the exact desired event (the final "Succeeded" state) triggers the rule. 

   Example:
   ```json
   {
     "source": ["aws.glue"],
     "detail-type": ["Glue Crawler State Change"],
     "detail": {
       "state": ["Succeeded"],
       "crawlerName": [S3_RAW_CRAWLER_NAME],
       "lastUpdatedOn": ["<add timestamp filter here if possible>"]
     }
   }
   ```

##### 3. **Potential EventBridge Retry Mechanism**
   - EventBridge rules may have an internal retry mechanism or errors in the attached target (such as the Lambda or SNS action) that result in re-invocation of the rule. If the target fails or throws errors, EventBridge may retry the action, which could lead to the crawler starting again inadvertently.
   - **Solution**: Check the logs for your target (Lambda or SNS) to ensure that no errors or retries are happening. You can also set a **dead-letter queue (DLQ)** for EventBridge or the Lambda function to monitor failures and prevent retries from causing unwanted side effects.

##### 4. **Check Lambda Function Logic**
   - If the target of your EventBridge rule is a Lambda function that is supposed to trigger something based on the `Succeeded` state, ensure that this function is not inadvertently starting the same crawler again.

##### 5. **Recursive Crawler Triggers**
   - It's possible that the process triggered by the rule involves re-running the same crawler indirectly. If the crawler creates new data that again triggers the crawler, this could lead to a recursive pattern where the crawler keeps running every time it finishes.
   - **Solution**: Review the downstream processes triggered by the EventBridge rule to make sure nothing is recursively triggering the crawler.

In summary, I recommend carefully inspecting:
1. The target (Lambda or SNS) action and ensuring it doesn't start the crawler.
2. Event logs to ensure that only one specific event is triggering the rule.
3. Adding more specificity to the event pattern to catch only the final "Succeeded" state for the crawler.

--------

When a "Glue Job State Change" event is not captured by an EventBridge Rule, debugging the issue involves several steps. Here's a systematic approach to identify and resolve the issue:

##### 1. **Check EventBridge Rule Configuration:**
   - **Event Pattern:**
     - Verify that the event pattern in your rule matches the structure of the event emitted by Glue. Ensure that the source (`"aws.glue"`) and the detail type (e.g., `"Glue Job State Change"`) are correct.
     - Example of an event pattern:
       ```json
       {
         "source": ["aws.glue"],
         "detail-type": ["Glue Job State Change"],
         "detail": {
           "jobName": ["your-glue-job-name"]
         }
       }
       ```
   - **Targets:**
     - Ensure that your EventBridge rule has a valid target configured (Lambda, SNS, Step Functions, etc.).
     - Test the target independently to make sure it works.

##### 2. **Validate Glue Job Events:**
   - **CloudWatch Logs:**
     - Check the CloudWatch Logs for your Glue job to ensure the job is running and state change events are being generated.
     - If the job is failing, investigate any errors in the logs.
   - **Event History:**
     - In AWS CloudTrail, verify if the "Glue Job State Change" event is being recorded. This helps you confirm that Glue is indeed sending events.

##### 3. **Test EventBridge Rule:**
   - **Manual Event:**
     - Use the EventBridge console to create a test event that mimics a Glue job state change. Trigger the rule manually to confirm whether it captures the event and triggers the target.
     - Example test event:
       ```json
       {
         "source": "aws.glue",
         "detail-type": "Glue Job State Change",
         "detail": {
           "jobName": "your-glue-job-name",
           "state": "SUCCEEDED"
         }
       }
       ```

##### 4. **EventBridge Rule Metrics and Monitoring:**
   - **Invocations Count:**
     - In the EventBridge console, check if the rule’s "Invocations" metric shows that the rule is being triggered.
   - **Failure Logs:**
     - Enable logging for your EventBridge rule to capture any invocation failures. This can be done in the EventBridge rule settings.

##### 5. **Permissions Issues:**
   - **IAM Role:**
     - Ensure that the Glue job has the proper permissions to publish events to EventBridge.
     - Check that the EventBridge rule’s IAM role has the right permissions to invoke the target.

##### 6. **Event Filtering (If Applicable):**
   - If you are filtering events based on specific states (like `SUCCEEDED`, `FAILED`), make sure those filters are correctly set.
   - For example, if you are only interested in `SUCCEEDED` events but your Glue job fails, the rule won’t trigger.

##### 7. **EventBridge Rule in Dry-Run Mode:**
   - Sometimes, the rule might be in a dry-run mode for testing. Ensure that the rule is enabled in "live" mode to capture and process events.

##### 8. **Check for EventBus Misconfiguration:**
   - Ensure that the rule is set on the correct EventBus (default or custom). If you're using a custom EventBus, check if events are being routed properly to it.
