Skip to content

Commit

Permalink
Merged in dev/fabien/MPC-5256-ems-scaling-through-ems-router (pull re…
Browse files Browse the repository at this point in the history
…quest elastic#606)

MPC-5256: ems scaling

* s/arn/name for policy attachment

* Add IAM policy to let EMS Router call autoscaling

... APIs. Specifically:
* autoscaling:DescribeAutoScalingGroups
* autoscaling:SetDesiredCapacity

Also, rename Terraform resources specific to EMS-Router to include
"router" in the name.

* Add tf- prefix to ems-router-autoscaling resource

... names

* Merged master into dev/fabien/MPC-5256-ems-scaling-through-ems-router
* swap access keys for irsa as per discussion

* Let EMS Router set MinSize, MaxSize, InstanceType

... via the AWS Auto Scaling UpdateAutoScalingGroup() API

* Merge branch 'dev/fabien/MPC-5256-ems-scaling-through-ems-router' of bitbucket.org:engageli/devops into dev/fabien/MPC-5256-ems-scaling-through-ems-router

* Fix indentation

* s/namespace-service-account/cluster-service-account/

* s/cr_service_account/cluster_service_account/


Approved-by: Can Yildiz
Approved-by: Hiroshi Ishii
  • Loading branch information
fabien committed Apr 28, 2022
1 parent f36ed4b commit ebce8d3
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 6 deletions.
71 changes: 70 additions & 1 deletion aws/ams-cluster-v1-tf/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ resource "aws_iam_policy" "ems_autoscaling" {
"Effect": "Allow",
"Resource": [
${join(",",
[ for i in range(var.ems_num_clusters) :
[
for i in range(var.ems_num_clusters) :
"\"${aws_autoscaling_group.ems[i].arn}\""
]
)}
Expand All @@ -106,6 +107,74 @@ resource "aws_iam_policy" "ems_autoscaling" {
EOF
}

#
# Gives more access than needed, since the policy below allows the EMS Router
# of one cluster to call autoscaling APIs for the ASG of other clusters
#
resource "aws_iam_policy" "ems_router_autoscaling" {
name = "tf-${var.stack_name}-ems-router-autoscaling"
description = "Allow EMS Router to call some autoscaling APIs"

# https://docs.aws.amazon.com/service-authorization/latest/reference/list_amazonec2autoscaling.html
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"autoscaling:DescribeAutoScalingGroups"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"autoscaling:SetDesiredCapacity",
"autoscaling:UpdateAutoScalingGroup"
],
"Effect": "Allow",
"Resource": [
${join(",",
[
for i in range(var.ems_num_clusters) :
"\"${aws_autoscaling_group.ems[i].arn}\""
]
)}
]
}
]
}
EOF
}

resource "aws_iam_role" "ems_router_autoscaling_role" {
count = var.ems_num_clusters
name = "${local.eks_cluster_name}-ems-router-${format("%02d", count.index)}"
assume_role_policy = jsonencode({
"Version" = "2012-10-17"
"Statement" = [
{
"Effect" = "allow"
"Principal" = {
"Federated" = module.eks.oidc_provider_arn
}
"Action" = "sts:AssumeRoleWithWebIdentity"
"Condition" = {
"StringEquals" = {
"${replace(module.eks.cluster_oidc_issuer_url, "https://", "")}:sub" = "system:serviceaccount:ems:ems-router-${format("%02d", count.index)}"
}
}
}
]
})
}

resource "aws_iam_role_policy_attachment" "ems_router_autoscaling" {
count = var.ems_num_clusters
policy_arn = aws_iam_policy.ems_router_autoscaling.arn
role = aws_iam_role.ems_router_autoscaling_role[count.index].name
}

resource "aws_iam_policy" "transcription" {
name = "tf-${var.stack_name}-transcription"
description = "Allow backend to transcribe (caption)"
Expand Down
36 changes: 31 additions & 5 deletions aws/ams-cluster-v1-tf/tfmodules/k8s_namespace/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,25 @@ resource "kubernetes_namespace_v1" "namespace" {
}
}

resource "kubernetes_service_account_v1" "app_service_account" {
for_each = var.apps
metadata {
name = each.key
labels = var.labels
namespace = kubernetes_namespace_v1.namespace.metadata.0.name
annotations = {
"eks.amazonaws.com/role-arn" = each.value.iam_role_arn
}
}
}

### EMS specific
### we want to allow EMS to check its current node IPs
### for now, we map this to host_network
resource "kubernetes_service_account_v1" "service_account" {
resource "kubernetes_service_account_v1" "cluster_service_account" {
count = length({ for app, config in var.apps : app => config if config.host_network }) > 0 ? 1 : 0
metadata {
name = "namespace-service-account"
name = "cluster-service-account"
labels = var.labels
namespace = kubernetes_namespace_v1.namespace.metadata.0.name
}
Expand Down Expand Up @@ -49,11 +61,10 @@ resource "kubernetes_cluster_role_binding_v1" "role_binding" {
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account_v1.service_account.0.metadata.0.name
name = kubernetes_service_account_v1.cluster_service_account.0.metadata.0.name
namespace = kubernetes_namespace_v1.namespace.metadata.0.name
}
}

### end of EMS specific block

resource "kubernetes_secret_v1" "secrets" {
Expand Down Expand Up @@ -192,7 +203,7 @@ resource "kubernetes_deployment_v1" "deployment" {
security_context {
run_as_non_root = true
}
service_account_name = each.value.host_network ? kubernetes_service_account_v1.service_account.0.metadata.0.name : "default"
service_account_name = each.value.host_network ? kubernetes_service_account_v1.cluster_service_account.0.metadata.0.name : kubernetes_service_account_v1.app_service_account[each.key].metadata.0.name
host_network = each.value.host_network
container {
# same here, explicitly denying privileges helps me sleep better
Expand All @@ -216,6 +227,11 @@ resource "kubernetes_deployment_v1" "deployment" {
name = kubernetes_secret_v1.secrets.metadata.0.name
}
}
env_from {
secret_ref {
name = kubernetes_secret_v1.app_secret[each.key].metadata.0.name
}
}
env_from {
config_map_ref {
name = kubernetes_config_map_v1.config_map.metadata.0.name
Expand Down Expand Up @@ -274,6 +290,16 @@ resource "kubernetes_config_map_v1" "app_config_map" {
})
}

resource "kubernetes_secret_v1" "app_secret" {
for_each = var.apps
metadata {
name = each.key
labels = local.app_labels[each.key]
namespace = kubernetes_namespace_v1.namespace.metadata.0.name
}
data = each.value.secret
}

resource "kubernetes_service_v1" "service" {
for_each = var.apps
metadata {
Expand Down
2 changes: 2 additions & 0 deletions aws/ams-cluster-v1-tf/tfmodules/k8s_namespace/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ variable "apps" {
image_registry = string
image_tag = string
config_map = map(string)
secret = map(string)
create_ingress = bool
host_network = bool
iam_role_arn = string
resources = object(
{
limits = object(
Expand Down
3 changes: 3 additions & 0 deletions aws/ams-cluster-v1-tf/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -753,11 +753,13 @@ locals {
replicas = 0
ingress_annotations = {}
config_map = {}
secret = {}
create_ingress = true
host_network = false
image_registry = var.engageli_image_registry_default
image_repository_suffix = local.image_repository_suffix
image_tag = var.engageli_image_tag_default
iam_role_arn = ""
resources = {
limits = {
cpu = "500m"
Expand Down Expand Up @@ -804,6 +806,7 @@ locals {
replicas = 1
# hardcoded as the app name is ems-router-XX
image_repository = "ems-router"
iam_role_arn = aws_iam_role.ems_router_autoscaling_role[i].arn
config_map = {
EMS_CLUSTER_ID = format("/ems%02d", i)
EMS_ASG_NAME = var.eks_ems_enabled ? "" : aws_autoscaling_group.ems[i].name
Expand Down

0 comments on commit ebce8d3

Please sign in to comment.