Skip to content

Commit

Permalink
Add GKE Security Features to Deployment Manager config (kubeflow#879)
Browse files Browse the repository at this point in the history
* Add GKE Security Features to bootstrapper

Use gke-default oauth scopes

Update to gkeApiVersion

Add securityConfig

Enable iam api in dm

* Add oauth scopes manually. gke-default does not work

* Fix oauthscopes

* Add PodSecurityPolicy only to v1beta1
  • Loading branch information
Ankush Agarwal authored and k8s-ci-robot committed May 30, 2018
1 parent af6e561 commit adbcbba
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 55 deletions.
32 changes: 27 additions & 5 deletions docs/gke/configs/cluster-kubeflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,50 @@ resources:
# 1. Use a single template (.jinja file for all resources) or
# 2. Create two separate deployments and launch the boot strapper
# after the cluster is created.
#
# Two separate deployments doesn't make much sense; we could just use
#
# Two separate deployments doesn't make much sense; we could just use
# kubectl at that point. So we put all resources in a single deployment.
- name: kubeflow
type: cluster.jinja
properties:
zone: us-east1-d
# Set this to v1beta1 to use beta features such as private clusters,
gkeApiVersion: v1
# An arbitrary string appending to name of nodepools
# bump this if you want to modify the node pools.
# This will cause existing node pools to be deleted and new ones to be created.
# Use prefix v so it will be treated as a string.
pool-version: v1
# Two is small enough to fit within default quota.
cpu-pool-initialNodeCount: 2
gpu-pool-initialNodeCount: 0
gpu-pool-initialNodeCount: 0
# Whether to deploy the new Stackdriver Kubernetes agents
stackdriver-kubernetes: false
securityConfig:
# Whether to use a cluster with private IPs
# Use v1beta1 api
privatecluster: false
# masterIpv4CidrBlock for private clusters, if enabled
# Use v1beta1 api
masterIpv4CidrBlock: 172.16.0.16/28
# Protect worker node metadata from pods
# Use v1beta1 api
secureNodeMetadata: false
# Whether to enable Pod Security Policy Admission Controller
# Use v1beta1 api
podSecurityPolicy: false
masterAuthorizedNetworksConfig:
cidrBlocks:
- cidrBlock: 1.2.3.4/32
- cidrBlock: 5.6.7.8/32
enabled: false
# Path for the bootstrapper image.
bootstrapperImage: gcr.io/kubeflow-images-public/bootstrapper:latest
# This is the name of the GCP static ip address to reserve for your domain.
# This must be different for each Kubeflow deployment in your project.
ipName: kubeflow-ip
# Name of the service account to use for k8s worker node pools
vmServiceAccountName: kubeflow-service-account
# Provide the config for the bootstrapper. This should be a string
# containing the YAML spec for the bootstrapper.
#
Expand All @@ -58,7 +80,7 @@ resources:
# Project - This is the name of your GCP provided project.
bootstrapperConfig: |
# Apps only apply if on GKE
app:
app:
packages:
- name: core
- name: tf-serving
Expand Down Expand Up @@ -90,4 +112,4 @@ resources:
value: kubeflow.endpoints.<Project>.cloud.goog
- component: kubeflow-core
name: jupyterHubAuthenticator
value: iap
value: iap
137 changes: 87 additions & 50 deletions docs/gke/configs/cluster.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ limitations under the License.
{% set RBAC_TYPE_NAME = TYPE_NAME + '-rbac-v1' %}
{% set APPS_TYPE_NAME = TYPE_NAME + '-apps-v1' %}

{# A dictionary mapping type name suffixes to the corresponding
{% set VM_OAUTH_SCOPES = ['https://www.googleapis.com/auth/logging.write', 'https://www.googleapis.com/auth/monitoring'] %}

{# A dictionary mapping type name suffixes to the corresponding
Kubernetes API endpoint.
#}
{% set K8S_ENDPOINTS = {'': 'api/v1', '-v1beta1-extensions': 'apis/extensions/v1beta1', '-rbac-v1': 'apis/rbac.authorization.k8s.io/v1', '-apps-v1': 'apis/apps/v1/'} %}
Expand Down Expand Up @@ -56,15 +58,25 @@ resources:
properties:
accountId: {{ KF_ADMIN_NAME }}
displayName: Service Account used for Kubeflow admin actions.
- name: kubeflow-cluster-vm-service-account
type: iam.v1.serviceAccount
properties:
accountId: {{ properties['vmServiceAccountName'] }}
displayName: GCP Service Account to use as VM Service Account for Kubeflow Cluster VMs

- name: {{ CLUSTER_NAME }}
{% if properties['gkeApiVersion'] == 'v1beta1' %}
type: gcp-types/container-v1beta1:projects.locations.clusters
{% else %}
type: container.v1.cluster
{% endif %}
properties:
parent: projects/{{ env['project'] }}/locations/{{ properties['zone'] }}
zone: {{ properties['zone'] }}
cluster:
name: {{ CLUSTER_NAME }}
# Create a very small minimal pool. Actual nodes will be managed
# as additional node pools. This makes it easier to
# as additional node pools. This makes it easier to
initialNodeCount: 1
{% if properties['stackdriver-kubernetes'] %}
# TODO: remove alpha when 10.2 is public.
Expand All @@ -79,59 +91,83 @@ resources:
monitoringService: monitoring.googleapis.com/kubernetes
{% else %}
initialClusterVersion: 1.9.6-gke.1
{% endif %}
{% endif %}
{% if properties['gkeApiVersion'] == 'v1beta1' %}
podSecurityPolicyConfig:
enabled: {{ properties['securityConfig']['podSecurityPolicy'] }}
{% endif %}
{% if properties['securityConfig']['privatecluster'] %}
ipAllocationPolicy:
createSubnetwork: true
useIpAliases: true
masterIpv4CidrBlock: {{ properties['securityConfig']['masterIpv4CidrBlock'] }}
privateCluster: true
masterAuthorizedNetworksConfig: {{ properties['securityConfig']['masterAuthorizedNetworksConfig'] }}
{% endif %}
nodeConfig:
machineType: n1-standard-1
oauthScopes:
- https://www.googleapis.com/auth/compute
- https://www.googleapis.com/auth/devstorage.read_only
- https://www.googleapis.com/auth/logging.write
- https://www.googleapis.com/auth/monitoring
serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com
{% if properties['securityConfig']['secureNodeMetadata'] %}
workloadMetadataConfig:
nodeMetadata: SECURE
{% endif %}
oauthScopes: {{ VM_OAUTH_SCOPES }}
metadata:
dependsOn:
- kubeflow-cluster-vm-service-account

# We manage the node pools as separate resources.
# We do this so that if we want to make changes we can delete the existing resource and then recreate it.
# Updating doesn't work so well because we are limited in what changes GKE's update method supports.

- name: {{ CPU_POOL }}
{% if properties['gkeApiVersion'] == 'v1beta1' %}
type: gcp-types/container-v1beta1:projects.locations.clusters.nodePools
{% else %}
type: container.v1.nodePool
{% endif %}
properties:
parent: projects/{{ env['project'] }}/locations/{{ properties['zone'] }}/clusters/{{ CLUSTER_NAME }}
project: {{ properties['project'] }}
zone: {{ properties['zone'] }}
clusterId: {{ CLUSTER_NAME }}
nodePool:
name: cpu-pool
name: cpu-pool
initialNodeCount: {{ properties['cpu-pool-initialNodeCount'] }}
config:
config:
{% if properties['securityConfig']['secureNodeMetadata'] %}
workloadMetadataConfig:
nodeMetadata: SECURE
{% endif %}
machineType: n1-standard-8
oauthScopes:
- https://www.googleapis.com/auth/compute
- https://www.googleapis.com/auth/devstorage.read_only
- https://www.googleapis.com/auth/logging.write
- https://www.googleapis.com/auth/monitoring

serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com
oauthScopes: {{ VM_OAUTH_SCOPES }}
metadata:
dependsOn:
- {{ CLUSTER_NAME }}

- name: {{ GPU_POOL }}
{% if properties['gkeApiVersion'] == 'v1beta1' %}
type: gcp-types/container-v1beta1:projects.locations.clusters.nodePools
{% else %}
type: container.v1.nodePool
{% endif %}
properties:
project: {{ properties['project'] }}
parent: projects/{{ env['project'] }}/locations/{{ properties['zone'] }}/clusters/{{ CLUSTER_NAME }}
project: {{ properties['securityConfig']['project'] }}
zone: {{ properties['zone'] }}
clusterId: {{ CLUSTER_NAME }}
nodePool:
name: gpu-pool
name: gpu-pool
initialNodeCount: {{ properties['gpu-pool-initialNodeCount'] }}
config:
config:
{% if properties['securityConfig']['secureNodeMetadata'] %}
workloadMetadataConfig:
nodeMetadata: SECURE
{% endif %}
machineType: n1-standard-8
oauthScopes:
# Attaching cloud-platform scope to nodes is not good practice
# But it simplifies demos.
- https://www.googleapis.com/auth/cloud-platform
- https://www.googleapis.com/auth/compute
- https://www.googleapis.com/auth/devstorage.read_only
- https://www.googleapis.com/auth/logging.write
- https://www.googleapis.com/auth/monitoring
serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com
oauthScopes: {{ VM_OAUTH_SCOPES }}
accelerators:
- acceleratorCount: 1
acceleratorType: nvidia-tesla-k80
Expand All @@ -143,13 +179,13 @@ resources:

{# Project defaults to the project of the deployment. #}
- name: {{ properties['ipName'] }}
type: compute.v1.globalAddress
type: compute.v1.globalAddress
properties:
description: "Static IP for Kubeflow ingress."

{#
Define TypeProviders for different K8s endpoints.
Define TypeProviders for different K8s endpoints.
https://cloud.google.com/deployment-manager/docs/configuration/type-providers/process-adding-api
This allows K8s resources to be created using Deployment manager.
We use this to create the minimal resources needed to startup and deploy Kubeflow via the bootstrapper;
Expand Down Expand Up @@ -192,12 +228,12 @@ e.g. creating namespaces, service accounts, stateful set to run the bootstrapper
If activating multiple APIs you might want to serialize them.
We use an action and not the type deploymentmanager.v2.virtual.enableService
because we only want to create it; we don't want to delete it.
because we only want to create it; we don't want to delete it.
Deleting the service corresponds to deactivating the API and that causes problems.
#}
- name: resource-manager-api
action: 'gcp-types/servicemanagement-v1:servicemanagement.services.enable'
properties:
properties:
consumerId: {{ 'project:' + env['project'] }}
serviceName: cloudresourcemanager.googleapis.com

Expand All @@ -207,30 +243,31 @@ TODO(jlewi): Do we need to serialize API activation
#}
- name: endpoints-api
action: 'gcp-types/servicemanagement-v1:servicemanagement.services.enable'
properties:
properties:
consumerId: {{ 'project:' + env['project'] }}
serviceName: endpoints.googleapis.com

- name: iam-api
action: 'gcp-types/servicemanagement-v1:servicemanagement.services.enable'
properties:
properties:
consumerId: {{ 'project:' + env['project'] }}
serviceName: iam.googleapis.com

{# Get the IAM policy first so that we do not remove any existing bindings. #}
- name: get-iam-policy
action: gcp-types/cloudresourcemanager-v1:cloudresourcemanager.projects.getIamPolicy
properties:
properties:
resource: {{ env['project'] }}

metadata:
dependsOn:
- resource-manager-api
runtimePolicy:
- UPDATE_ALWAYS
- iam-api
runtimePolicy:
- UPDATE_ALWAYS

{# Set the IAM policy patching the existing policy with what ever is currently in the
config.
config.
We need to make the cloudservices account a GKE cluster admin because deployment manager
users the cloudservices account; so this will be the identity used with the K*s cluster.
Expand All @@ -240,16 +277,16 @@ TODO(jlewi): Do we need to serialize API activation
#}
- name: patch-iam-policy
action: gcp-types/cloudresourcemanager-v1:cloudresourcemanager.projects.setIamPolicy
properties:
properties:
resource: {{ env['project'] }}
policy: $(ref.get-iam-policy)
gcpIamPolicyPatch:
add:
add:
- role: roles/container.admin
members:
- {{ 'serviceAccount:' + env['project_number'] + '@cloudservices.gserviceaccount.com' }}

- role: roles/servicemanagement.admin
- role: roles/servicemanagement.admin
members:
- {{ 'serviceAccount:' + KF_ADMIN_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

Expand All @@ -260,8 +297,8 @@ TODO(jlewi): Do we need to serialize API activation
- get-iam-policy
- iam-api
- {{ KF_ADMIN_NAME }}
runtimePolicy:
- UPDATE_ALWAYS
runtimePolicy:
- UPDATE_ALWAYS

{# A note about K8s resources.
The type value should be defined using a reference to the corresponding type provider.
Expand All @@ -278,7 +315,7 @@ the corresponding type provider.
apiVersion: v1
kind: Namespace
metadata:
name: kubeflow-admin
name: kubeflow-admin
spec:

{# The deployment manager uses the cloudservices account. We need to create
Expand Down Expand Up @@ -333,11 +370,11 @@ the corresponding type provider.
properties:
apiVersion: v1
kind: PersistentVolumeClaim
{# Namespace is a property because its used by deployment manager in
{# Namespace is a property because its used by deployment manager in
the URL #}
namespace: kubeflow-admin
metadata:
name: kubeflow-ksonnet-pvc
name: kubeflow-ksonnet-pvc
labels:
app: kubeflow-ksonnet
spec:
Expand All @@ -356,7 +393,7 @@ the corresponding type provider.
type: {{ env['project'] }}/$(ref.{{ TYPE_NAME }}.name):{{ CM_COLLECTION }}
properties:
apiVersion: v1
{# Namespace is a property because its used bye deployment manager in
{# Namespace is a property because its used bye deployment manager in
the URL #}
kind: ConfigMap
namespace: kubeflow-admin
Expand All @@ -376,7 +413,7 @@ the corresponding type provider.
type: {{ env['project'] }}/$(ref.{{ APPS_TYPE_NAME }}.name):{{ STATEFULSETS_COLLECTION }}
properties:
apiVersion: apps/v1
{# Namespace is a property because its used bye deployment manager in
{# Namespace is a property because its used bye deployment manager in
the URL #}
kind: StatefulSet
namespace: kubeflow-admin
Expand All @@ -398,7 +435,7 @@ the corresponding type provider.
- name: kubeflow-bootstrapper
image: {{ properties["bootstrapperImage"] }}
workingDir: /opt/bootstrap
command:
command:
- /opt/kubeflow/bootstrapper
- --in-cluster=true
- --apply=true
Expand All @@ -414,7 +451,7 @@ the corresponding type provider.
persistentVolumeClaim:
claimName: kubeflow-ksonnet-pvc
- name: kubeflow-bootstrapper
configMap:
configMap:
name: kubeflow-bootstrapper

metadata:
Expand Down

0 comments on commit adbcbba

Please sign in to comment.