Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
445 lines (380 sloc) 12 KB
---
################################################################################
#
# Description
#
Description: >-
Example of 2 ASG, one on Demand the other on Spot
################################################################################
#
# Parameters
#
Parameters:
VPCName:
Description: Name of the VPC in which GPUWorker will be deployed
Type: String
MinLength: '3'
MaxLength: '255'
AllowedPattern: '[\x20-\x7E]*'
ConstraintDescription: can contain only ASCII characters.
Default: Dev
AllowedValues:
- Dev
- Prod
KeyName:
Description: EC2 KeyPair assigned to the instance
Type: AWS::EC2::KeyPair::KeyName
MinLength: '1'
MaxLength: '255'
AllowedPattern: '[\x20-\x7E]*'
ConstraintDescription: can contain only ASCII characters.
InstanceType:
Description: EC2 instance type
Type: String
Default: g2.2xlarge
AllowedValues:
- g2.2xlarge
ConstraintDescription: must be a valid EC2 instance type.
################################################################################
#
# Conditions
#
Conditions:
isProd: !Equals [!Ref 'VPCName', Prod]
################################################################################
#
# Mappings
#
#Mappings:
################################################################################
#
# Resources
#
Resources:
##############################################################################
#
# Lambda Functions
#
lambdaGetImages:
Type: AWS::CloudFormation::CustomResource
Version: '1.0'
Properties:
ServiceToken: !Sub 'arn:aws:lambda:eu-west-1:${AWS::AccountId}:function:cfGetImages'
lambdaGetVpcId:
Type: AWS::CloudFormation::CustomResource
Version: '1.0'
Properties:
ServiceToken: !Sub 'arn:aws:lambda:eu-west-1:${AWS::AccountId}:function:cfGetVpcId'
VpcName: !Ref VPCName
lambdaGetSubnets:
Type: AWS::CloudFormation::CustomResource
Version: '1.0'
Properties:
ServiceToken: !Sub 'arn:aws:lambda:eu-west-1:#{AWS::AccountId}:function:cfGetVpcSubnets'
VpcId: !GetAtt [ lambdaGetVpcId, VpcId ]
##############################################################################
#
# lcGPU
#
lcGPU:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
KeyName: !Ref 'KeyName'
ImageId: !GetAtt [ lambdaGetImages, FFMPEG-NVENC ]
InstanceType: !Ref InstanceType
InstanceMonitoring: false
IamInstanceProfile: !Sub 'arn:aws:iam::${AWS::AccountId}:instance-profile/ec2MediaWorker${VPCName}'
SecurityGroups:
- !GetAtt [ sgGPUWorkers, GroupId ]
UserData:
Fn::Base64:
Fn::Sub:
|
#!/bin/bash
export PATH=$PATH:/usr/bin:/usr/local/sbin:/usr/local/bin:/opt/aws/bin
ntpdate -u 0.europe.pool.ntp.org
pip --version || curl https://bootstrap.pypa.io/get-pip.py | python
pip install pip --upgrade
pip install awscli --upgrade
pip install boto3 --upgrade
pip install https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz
find /usr/bin -name \"cfn-*\" -exec chmod +x {} +
TIMEOUT=6
ITER=0
while ! [ -f /var/tmp/cf_vars.yml ]; do
echo $ITER
cfn-init --region ${AWS::Region} --stack ${AWS::StackName} -r lcGPU -c ansible
if [ $ITER -eq $TIMEOUT ]; then
halt
fi
sleep 5
ITER=$((ITER+1))
done
cfn-init --region ${AWS::Region} \
--stack ${AWS::StackName} \
-r lcGPU
Metadata:
AWS::CloudFormation::Init:
########################################################################
#
# ConfigSets
#
ansibleVars:
files:
/var/tmp/cf_vars.sh:
mode: 755
owner: root
group: root
content:
Fn::Sub:
|
#!/bin/bash
cfn-init --region ${AWS::Region} --stack ${AWS::StackName} -r lcGPU -c ansible
/var/tmp/cf_vars.yml:
mode: '644'
owner: root
group: root
content:
Fn::Join:
- "\n"
- - '---'
- ""
- "#########################################################"
- "# Stack Settings"
- Fn::Sub: 'stack_name: ${AWS::StackName}'
- Fn::Sub: 'env: ${VPCName}'
- ''
########################################################################
upgradeCodeDeployAgent:
commands:
0001-stop-agent:
command: service codedeploy-agent stop
0002-uprade-agent:
command: /opt/codedeploy-agent/bin/install auto
0003-start-agent:
command: service codedeploy-agent start
configSets:
ansible:
- ansibleVars
default:
- upgradeCodeDeployAgent
##############################################################################
#
# asgGPU
#
asgGPU:
DependsOn: lcGPU
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
LaunchConfigurationName: !Ref lcGPU
TerminationPolicies:
- OldestLaunchConfiguration
- ClosestToNextInstanceHour
- NewestInstance
HealthCheckGracePeriod: 300
MinSize: '0'
MaxSize: '1'
DesiredCapacity: '0'
MetricsCollection:
- Granularity: 1Minute
Metrics:
- GroupMinSize
- GroupMaxSize
LoadBalancerNames: []
VPCZoneIdentifier: !GetAtt [ lambdaGetSubnets, ApplicationSubnet ]
Tags:
- Key: Name
Value: !Sub 'gpu-mediaworker-${VPCName}'
PropagateAtLaunch: true
- Key: VPCName
Value: !Ref 'VPCName'
PropagateAtLaunch: true
##############################################################################
#
# ASG Scaling and CW Alarms
#
asgGPUScaleUpPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref 'asgGPU'
Cooldown: '300'
ScalingAdjustment: '1'
asgGPUScaleDownPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref 'asgGPU'
Cooldown: '300'
ScalingAdjustment: '-1'
##############################################################################
#
# SPOT LC / ASG
#
lcGPUSpot:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
SpotPrice: '0.35'
KeyName: !Ref KeyName
ImageId: !GetAtt [ lambdaGetImages, FFMPEG-NoNVENC ]
InstanceType: !Ref InstanceType
InstanceMonitoring: false
IamInstanceProfile: !Sub 'arn:aws:iam::${AWS::AccountId}:instance-profile/ec2MediaWorker${VPCName}'
SecurityGroups:
- !GetAtt [ sgGPUWorkers, GroupId ]
UserData:
Fn::Base64:
Fn::Join:
- "\n"
- - "#!/bin/bash"
- ""
- "export PATH=$PATH:/usr/bin:/usr/local/sbin:/usr/local/bin:/opt/aws/bin"
- ""
- Fn::Join:
- " "
- - "ntpdate -u"
- "0.europe.pool.ntp.org"
- ""
- "pip --version || curl https://bootstrap.pypa.io/get-pip.py | python"
- "pip install pip --upgrade"
- "pip install awscli --upgrade"
- "pip install boto3 --upgrade"
- "pip install https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz"
- "find /usr/bin -name \"cfn-*\" -exec chmod +x {} +"
- ""
- "# RUN THE CONFIGSETS"
- Fn::Join:
- " "
- - Fn::Sub: "cfn-init --region ${AWS::Region} --stack ${AWS::StackName}"
- "-r lcGPUSpot"
- ""
Metadata:
AWS::CloudFormation::Init:
########################################################################
#
# ConfigSets
#
ansibleVars:
files:
/var/tmp/cf_vars.sh:
mode: 755
owner: root
group: root
content:
Fn::Sub:
|
#!/bin/bash
cfn-init --region ${AWS::Region} --stack ${AWS::StackName} -r lcGPUSpot -c ansible
/var/tmp/cf_vars.yml:
mode: '644'
owner: root
group: root
content:
Fn::Join:
- "\n"
- - '---'
- "#########################################################"
- "# Stack Settings"
- Fn::Sub: 'stack_name: ${AWS::StackName}'
- Fn::Sub: 'env: ${VPCName}'
- ''
########################################################################@@
upgradeCodeDeployAgent:
commands:
0001-stop-agent:
command: service codedeploy-agent stop
0002-uprade-agent:
command: /opt/codedeploy-agent/bin/install auto
0003-start-agent:
command: service codedeploy-agent start
configSets:
default:
- ansibleVars
- upgradeCodeDeployAgent
##############################################################################
#
# ASG GPU Spot
#
asgGPUSpot:
DependsOn: lcGPUSpot
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
LaunchConfigurationName: !Ref lcGPUSpot
TerminationPolicies:
- ClosestToNextInstanceHour
- OldestLaunchConfiguration
- NewestInstance
HealthCheckGracePeriod: 300
MinSize: '0'
MaxSize: '1'
DesiredCapacity: '0'
MetricsCollection:
- Granularity: 1Minute
Metrics:
- GroupMinSize
- GroupMaxSize
VPCZoneIdentifier: !GetAtt [ lambdaGetSubnets, ApplicationSubnet ]
Tags:
- Key: Name
Value: !Sub 'gpu-mediaworker-${VPCName}'
PropagateAtLaunch: true
- Key: VPCName
Value: !Ref 'VPCName'
PropagateAtLaunch: true
- Key: SpotPrice
Value: '0.35'
PropagateAtLaunch: true
- Key: SpotType
Value: !Ref InstanceType
PropagateAtLaunch: true
##############################################################################
#
# ASG Scaling and CW Alarms
#
asgGPUSpotScaleUpPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref 'asgGPUSpot'
Cooldown: '300'
ScalingAdjustment: '1'
asgGPUSpotScaleDownPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref 'asgGPUSpot'
Cooldown: '300'
ScalingAdjustment: '-1'
##############################################################################
#
# Security Groups
#
sgGPUWorkers:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: SG for GPUWorkers
VpcId: !GetAtt [ lambdaGetVpcId, VpcId ]
Tags:
- Key: Name
Value: !Sub 'sg-${AWS::StackName}-${VPCName}'
- Key: VPCName
Value: !Ref VPCName
################################################################################
#
# Outputs
#
Outputs:
asgGpuCore:
Description: ASG GPU Core Name
Value: !Ref 'asgGPU'
asgGpuSpot:
Description: ASG GPU Spot Name
Value: !Ref 'asgGPUSpot'
ImageId:
Description: ID of the Image Used
Value: !GetAtt [ lambdaGetImages, FFMPEG-NVENC ]
################################################################################
#
# Description
#
AWSTemplateFormatVersion: '2010-09-09'