From f99af216dcd98001b932eb392ab1df67f91dc03d Mon Sep 17 00:00:00 2001 From: Erin Weisbart <54687786+ErinWeisbart@users.noreply.github.com> Date: Wed, 24 May 2023 11:50:52 -0700 Subject: [PATCH 1/6] fix extra } in monitor --- run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.py b/run.py index 4da71d4..2d60d85 100644 --- a/run.py +++ b/run.py @@ -598,7 +598,7 @@ def startCluster(): createMonitor.write('"MONITOR_QUEUE_NAME" : "'+SQS_QUEUE_NAME+'",\n') createMonitor.write('"MONITOR_BUCKET_NAME" : "'+AWS_BUCKET+'",\n') createMonitor.write('"MONITOR_LOG_GROUP_NAME" : "'+LOG_GROUP_NAME+'",\n') - createMonitor.write('"MONITOR_START_TIME" : "'+ starttime+'"}\n') + createMonitor.write('"MONITOR_START_TIME" : "'+ starttime+'"\n') createMonitor.write('"CLEAN_DASHBOARD" : "'+ CLEAN_DASHBOARD+'"}\n') createMonitor.close() From e0f56d7f42f2c315516f60da5b3f8fb7cee5ee14 Mon Sep 17 00:00:00 2001 From: Erin Weisbart <54687786+ErinWeisbart@users.noreply.github.com> Date: Wed, 24 May 2023 11:52:16 -0700 Subject: [PATCH 2/6] revert to old S3 mount --- worker/run-worker.sh | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/worker/run-worker.sh b/worker/run-worker.sh index 7efa848..9aef692 100644 --- a/worker/run-worker.sh +++ b/worker/run-worker.sh @@ -19,19 +19,13 @@ aws ec2 create-tags --resources $VOL_0_ID --tags Key=Name,Value=${APP_NAME}Worke VOL_1_ID=$(aws ec2 describe-instance-attribute --instance-id $MY_INSTANCE_ID --attribute blockDeviceMapping --output text --query BlockDeviceMappings[1].Ebs.[VolumeId]) aws ec2 create-tags --resources $VOL_1_ID --tags Key=Name,Value=${APP_NAME}Worker -# 2. MOUNT S3 + +# 2. MOUNT S3 +echo $AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY > /credentials.txt +chmod 600 /credentials.txt mkdir -p /home/ubuntu/bucket mkdir -p /home/ubuntu/local_output -if [[ -z "$AWS_ACCESS_KEY_ID" ]] -then - AWS_ACCESS_KEY_ID=$(curl 169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI | jq '.AccessKeyId') - AWS_SECRET_ACCESS_KEY=$(curl 169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI | jq '.SecretAccessKey') - echo $AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY > /credentials.txt - chmod 600 /credentials.txt - stdbuf -o0 s3fs $AWS_BUCKET /home/ubuntu/bucket -o passwd_file=/credentials.txt -o dbglevel=info -else - stdbuf -o0 s3fs $AWS_BUCKET /home/ubuntu/bucket -o ecs -o dbglevel=info -fi +stdbuf -o0 s3fs $AWS_BUCKET /home/ubuntu/bucket -o passwd_file=/credentials.txt # 3. SET UP ALARMS From bdd664ece16fdef0423b3131044aa9a8358aa550 Mon Sep 17 00:00:00 2001 From: Erin Weisbart <54687786+ErinWeisbart@users.noreply.github.com> Date: Wed, 24 May 2023 12:02:05 -0700 Subject: [PATCH 3/6] dashboard read app name correctly --- run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/run.py b/run.py index 2d60d85..99aff33 100644 --- a/run.py +++ b/run.py @@ -398,7 +398,7 @@ def create_dashboard(requestInfo): "x": 12, "type": "log", "properties": { - "query": f"SOURCE {APP_NAME} | fields @message| filter @message like 'cellprofiler -c'| stats count_distinct(@message)\n", + "query": f"SOURCE '{APP_NAME}' | fields @message| filter @message like 'cellprofiler -c'| stats count_distinct(@message)\n", "region": AWS_REGION, "stacked": False, "title": "Distinct Logs with \"cellprofiler -c\"", @@ -412,7 +412,7 @@ def create_dashboard(requestInfo): "x": 0, "type": "log", "properties": { - "query": f"SOURCE {APP_NAME} | fields @message| filter @message like 'cellprofiler -c'| stats count(@message)", + "query": f"SOURCE '{APP_NAME}' | fields @message| filter @message like 'cellprofiler -c'| stats count(@message)", "region": AWS_REGION, "stacked": False, "title": "All Logs \"cellprofiler -c\"", @@ -426,7 +426,7 @@ def create_dashboard(requestInfo): "x": 0, "type": "log", "properties": { - "query": f"SOURCE {APP_NAME} | fields @message | filter @message like \"Error\" | display @message", + "query": f"SOURCE '{APP_NAME}' | fields @message | filter @message like \"Error\" | display @message", "region": AWS_REGION, "stacked": False, "title": "Errors", From 87070809a458a2e69c3cd880da7102d32a823780 Mon Sep 17 00:00:00 2001 From: Erin Weisbart <54687786+ErinWeisbart@users.noreply.github.com> Date: Wed, 24 May 2023 14:55:03 -0700 Subject: [PATCH 4/6] can only delete 100 alarms at a time --- lambda_function.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lambda_function.py b/lambda_function.py index e14ba94..5b9cd3b 100644 --- a/lambda_function.py +++ b/lambda_function.py @@ -27,7 +27,12 @@ def killdeadAlarms(fleetId, monitorapp, project): if eachevent["EventInformation"]["EventSubType"] == "terminated": todel.append(eachevent["EventInformation"]["InstanceId"]) todel = [f"{project}_{x}" for x in todel] - cloudwatch.delete_alarms(AlarmNames=todel) + if len(todel) <= 100: + cloudwatch.delete_alarms(AlarmNames=todel) + while len(todel) > 100: + dellist = todel[:100] + cloudwatch.delete_alarms(AlarmNames=dellist) + todel = todel[100:] print("Old alarms deleted") @@ -115,7 +120,12 @@ def lambda_handler(event, lambda_context): active_instances = [] for instance in active_dictionary["ActiveInstances"]: active_instances.append(instance["InstanceId"]) - cloudwatch.delete_alarms(AlarmNames=active_instances) + if len(active_instances) <= 100: + cloudwatch.delete_alarms(AlarmNames=active_instances) + while len(active_instances) > 100: + dellist = active_instances[:100] + cloudwatch.delete_alarms(AlarmNames=dellist) + active_instances = active_instances[100:] killdeadAlarms(fleetId, monitorapp, project) # Read spot fleet id and terminate all EC2 instances From 9a09a65e68ea6f9d5038ff4a7fd67b149c5653f4 Mon Sep 17 00:00:00 2001 From: Erin Weisbart <54687786+ErinWeisbart@users.noreply.github.com> Date: Wed, 24 May 2023 14:58:39 -0700 Subject: [PATCH 5/6] missed deleting last chunk of instances --- lambda_function.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lambda_function.py b/lambda_function.py index 5b9cd3b..762a32d 100644 --- a/lambda_function.py +++ b/lambda_function.py @@ -27,12 +27,12 @@ def killdeadAlarms(fleetId, monitorapp, project): if eachevent["EventInformation"]["EventSubType"] == "terminated": todel.append(eachevent["EventInformation"]["InstanceId"]) todel = [f"{project}_{x}" for x in todel] - if len(todel) <= 100: - cloudwatch.delete_alarms(AlarmNames=todel) while len(todel) > 100: dellist = todel[:100] cloudwatch.delete_alarms(AlarmNames=dellist) todel = todel[100:] + if len(todel) <= 100: + cloudwatch.delete_alarms(AlarmNames=todel) print("Old alarms deleted") @@ -120,12 +120,12 @@ def lambda_handler(event, lambda_context): active_instances = [] for instance in active_dictionary["ActiveInstances"]: active_instances.append(instance["InstanceId"]) - if len(active_instances) <= 100: - cloudwatch.delete_alarms(AlarmNames=active_instances) while len(active_instances) > 100: dellist = active_instances[:100] cloudwatch.delete_alarms(AlarmNames=dellist) active_instances = active_instances[100:] + if len(active_instances) <= 100: + cloudwatch.delete_alarms(AlarmNames=active_instances) killdeadAlarms(fleetId, monitorapp, project) # Read spot fleet id and terminate all EC2 instances From f57acdcef684f2d2f9966e6f24f30c6396ed1aab Mon Sep 17 00:00:00 2001 From: Erin Weisbart <54687786+ErinWeisbart@users.noreply.github.com> Date: Thu, 15 Jun 2023 11:53:03 -0700 Subject: [PATCH 6/6] missing comma --- run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.py b/run.py index 99aff33..371a93f 100644 --- a/run.py +++ b/run.py @@ -598,7 +598,7 @@ def startCluster(): createMonitor.write('"MONITOR_QUEUE_NAME" : "'+SQS_QUEUE_NAME+'",\n') createMonitor.write('"MONITOR_BUCKET_NAME" : "'+AWS_BUCKET+'",\n') createMonitor.write('"MONITOR_LOG_GROUP_NAME" : "'+LOG_GROUP_NAME+'",\n') - createMonitor.write('"MONITOR_START_TIME" : "'+ starttime+'"\n') + createMonitor.write('"MONITOR_START_TIME" : "'+ starttime+'",\n') createMonitor.write('"CLEAN_DASHBOARD" : "'+ CLEAN_DASHBOARD+'"}\n') createMonitor.close()