The applicable IAM role must include these permissions:
* cloudwatch logs create-log-group
* ecs create-cluster
* ecs delete-cluster
* ecs run-task
* ecs create-service
* ecs update-service
* ecs delete-service
* ec2 run-instances
* ec2 describe-instances 
* ec2 terminate-instances

In [None]:
%env REGION eu-west-1

# ECS-optimized AMI with GPU support in eu-west-1
%env AMI_ID_GPU ami-0638eba79fcfe776e
# ECS-optimized AMI with CPU support in eu-west-1
%env AMI_ID_CPU ami-09cd8db92c6bf3a84 

# Adapt these as needed
# The security must allow incoming TCP ports 22 (ssh) and 8888 (jupyter)
%env SECURITY_GROUP_ID sg-0010f9778dc2e6fb2
%env ROLE_NAME ecsInstanceRole
%env KEY_NAME admin
%env KEY_PAIR ~/.ssh/admin.pem
%env SUBNET_ID subnet-cbf5bdbc


In [None]:
%%sh

aws configure set region $REGION

In [None]:
%%sh 

# Create a log group for task logs (referenced in task definitions)
aws logs create-log-group --log-group-name awslogs-tf-ecs

In [None]:
%%sh 

aws ecs create-cluster --cluster-name ecs-demo

In [None]:
%%sh

cat user-data.txt

In [None]:
%%sh 

# 4 p2.xlarge spot instances
# ECS-optimized AMI with GPU support, default VPC
aws ec2 run-instances --image-id $AMI_ID_GPU \
                       --count 4 \
                       --instance-type p2.xlarge \
                       --instance-market-options '{"MarketType":"spot"}' \
                       --tag-specifications 'ResourceType=instance,Tags=[{Key=Name,Value=ecs-demo}]' \
                       --key-name $KEY_NAME \
                       --security-group-ids $SECURITY_GROUP_ID \
                       --iam-instance-profile Name=$ROLE_NAME \
                       --user-data file://user-data.txt

In [None]:
%%sh

# 2 c5.2xlarge spot instances
# ECS-optimized AMI, default VPC, different subnet
aws ec2 run-instances --image-id $AMI_ID_CPU \
                       --count 2 \
                       --instance-type c5.2xlarge \
                       --instance-market-options '{"MarketType":"spot"}' \
                       --tag-specifications 'ResourceType=instance,Tags=[{Key=Name,Value=ecs-demo}]' \
                       --key-name $KEY_NAME \
                       --security-group-ids $SECURITY_GROUP_ID \
                       --iam-instance-profile Name=$ROLE_NAME \
                       --user-data file://user-data.txt \
                       --subnet $SUBNET_ID

In [None]:
%%sh 

#aws ecs register-task-definition --cli-input-json file://training.json
#aws ecs register-task-definition --cli-input-json file://inference.json

In [None]:
%%sh

ecs-cli configure --cluster ecs-demo --region $REGION

ecs-cli ps --desired-status RUNNING

In [None]:
%%sh 

# Run 4 training tasks without any constraint (the GPU requirement is in the task definition)
aws ecs run-task --cluster ecs-demo --task-definition training:1 --count 4

In [None]:
%%sh

ecs-cli ps --desired-status RUNNING

In [None]:
%%sh 

export TRAINING_TASK_ID=

ecs-cli logs --task-id $TRAINING_TASK_ID | grep -v ^$

In [None]:
%%sh

# Create inference service, starting with 1 initial task
# Run it on c5 instance, and spread tasks evenly
aws ecs create-service --cluster ecs-demo \
                       --service-name inference-cpu \
                       --task-definition inference:1 \
                       --desired-count 1 \
                       --placement-constraints type="memberOf",expression="attribute:ecs.instance-type =~ c5.*" \
                       --placement-strategy field="instanceId",type="spread"

In [None]:
%%sh

ecs-cli ps --desired-status RUNNING

In [None]:
%%sh 

export INFERENCE_IP=

curl -d '{"instances": [1.0, 2.0, 5.0]}' \
     -X POST http://$INFERENCE_IP:8501/v1/models/saved_model_half_plus_two_cpu:predict

In [None]:
%%sh

# Scale inference service to 2 tasks
aws ecs update-service --cluster ecs-demo --service inference-cpu --desired-count 2

In [None]:
%%sh

ecs-cli ps --desired-status RUNNING 

In [None]:
%%sh

# Scale service down and delete it
aws ecs update-service --cluster ecs-demo --service inference-cpu --desired-count 0

aws ecs delete-service --cluster ecs-demo --service inference-cpu

In [None]:
%%sh

ecs-cli ps --desired-status RUNNING

In [None]:
%%sh 

# Get the instance ids
export INSTANCE_IDS=`aws ec2 describe-instances --filters "Name=tag:Name,Values=ecs-demo" "Name=instance-state-name,Values=running" --query "Reservations[*].Instances[*].InstanceId" --output text`

echo $INSTANCE_IDS

# Terminate the instances
aws ec2 terminate-instances --instance-ids `echo $INSTANCE_IDS | xargs`


In [None]:
%%sh

# Delete the cluster
aws ecs delete-cluster --cluster ecs-demo