From fab2b583f3c36fa179b6388ca7a28fa68b2aad8b Mon Sep 17 00:00:00 2001 From: Tasko Olevski Date: Tue, 20 Jun 2023 11:59:13 +0200 Subject: [PATCH] feat(service): horizontal scaling (#3178) --- chartpress.yaml | 1 + .../renku-core/templates/configmap.yaml | 49 +------------------ .../templates/deployment-nginx.yaml | 41 ---------------- .../renku-core/templates/deployment.yaml | 20 +++++++- helm-chart/renku-core/templates/hpa.yaml | 29 +++++++++++ helm-chart/renku-core/templates/pdb.yaml | 12 +++++ .../renku-core/templates/service-nginx.yaml | 15 ------ helm-chart/renku-core/values.schema.json | 37 +++++++++----- helm-chart/renku-core/values.yaml | 26 +++++----- renku/ui/service/config.py | 5 +- renku/ui/service/controllers/versions_list.py | 36 ++++++++++++++ renku/ui/service/entrypoint.py | 4 +- renku/ui/service/serializers/version.py | 4 +- renku/ui/service/serializers/versions_list.py | 37 ++++++++++++++ renku/ui/service/views/versions_list.py | 45 +++++++++++++++++ 15 files changed, 225 insertions(+), 136 deletions(-) delete mode 100644 helm-chart/renku-core/templates/deployment-nginx.yaml create mode 100644 helm-chart/renku-core/templates/hpa.yaml create mode 100644 helm-chart/renku-core/templates/pdb.yaml delete mode 100644 helm-chart/renku-core/templates/service-nginx.yaml create mode 100644 renku/ui/service/controllers/versions_list.py create mode 100644 renku/ui/service/serializers/versions_list.py create mode 100644 renku/ui/service/views/versions_list.py diff --git a/chartpress.yaml b/chartpress.yaml index 053bb51980..bdab0ab67e 100644 --- a/chartpress.yaml +++ b/chartpress.yaml @@ -15,3 +15,4 @@ charts: buildArgs: CLEAN_INSTALL: "1" BUILD_CORE_SERVICE: "1" + diff --git a/helm-chart/renku-core/templates/configmap.yaml b/helm-chart/renku-core/templates/configmap.yaml index 66e2181ec4..abd664ba8d 100644 --- a/helm-chart/renku-core/templates/configmap.yaml +++ b/helm-chart/renku-core/templates/configmap.yaml @@ -1,54 +1,9 @@ apiVersion: v1 kind: ConfigMap metadata: - name: {{ include "renku-core.fullname" . }}-nginx + name: {{ include "renku-core.fullname" . }}-metadata-versions data: - nginx-server-blocks.conf: | - {{- range $version := .Values.versions }} - upstream {{ $version.name }} { - server {{ include "renku-core.fullname" $ }}-{{ $version.name }}; - keepalive 32; - keepalive_timeout 60s; - } - {{ end }} - - server { - listen 8080; - - server_name {{ include "renku-core.fullname" . }}; - - sendfile on; - tcp_nopush on; - client_max_body_size 0; # Required for uploading large files - - location /renku/version { - root /; - add_header Content-Type application/json; - try_files /usr/share/nginx/html/version.json =404; - } - - {{- range $version := .Values.versions }} - location /renku/{{ $version.prefix }} { - rewrite /renku/{{ $version.prefix }}/(.*) /renku/$1 break; - proxy_set_header Host $host; - proxy_pass http://{{ $version.name }}; - proxy_send_timeout {{ $.Values.requestTimeout }}s; - proxy_read_timeout {{ $.Values.requestTimeout }}s; - proxy_http_version 1.1; - proxy_set_header "Connection" ""; - } - {{- end }} - - location /renku { - proxy_set_header Host $host; - proxy_pass http://{{ .Values.versions.latest.name }}; - proxy_send_timeout {{ $.Values.requestTimeout }}s; - proxy_read_timeout {{ $.Values.requestTimeout }}s; - proxy_http_version 1.1; - proxy_set_header "Connection" ""; - } - } - version.json: | + metadata-versions.json: | { "name": "renku-core", "versions": [ diff --git a/helm-chart/renku-core/templates/deployment-nginx.yaml b/helm-chart/renku-core/templates/deployment-nginx.yaml deleted file mode 100644 index 6288c621cb..0000000000 --- a/helm-chart/renku-core/templates/deployment-nginx.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "renku-core.fullname" $ }}-nginx - labels: - app.kubernetes.io/name: {{ include "renku-core.fullname" $ }}-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: {{ include "renku-core.fullname" $ }}-nginx - replicas: 1 - template: - metadata: - annotations: - checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} - labels: - app.kubernetes.io/name: {{ include "renku-core.fullname" $ }}-nginx - spec: - automountServiceAccountToken: false - containers: - - name: nginx - image: bitnami/nginx:1.21.4 - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - ports: - - containerPort: 8080 - volumeMounts: - - name: nginx-volume - readOnly: true - mountPath: /opt/bitnami/nginx/conf/server_blocks/nginx-server-blocks.conf - subPath: nginx-server-blocks.conf - - name: nginx-volume - readOnly: true - mountPath: /usr/share/nginx/html/version.json - subPath: version.json - volumes: - - name: nginx-volume - configMap: - name: {{ include "renku-core.fullname" . }}-nginx diff --git a/helm-chart/renku-core/templates/deployment.yaml b/helm-chart/renku-core/templates/deployment.yaml index 7f0313ea26..ef90a93232 100644 --- a/helm-chart/renku-core/templates/deployment.yaml +++ b/helm-chart/renku-core/templates/deployment.yaml @@ -17,7 +17,10 @@ metadata: spec: replicas: {{ $.Values.replicaCount }} strategy: - type: Recreate + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 selector: matchLabels: app.kubernetes.io/name: {{ include "renku-core.name" $ }} @@ -44,6 +47,9 @@ spec: volumes: - name: shared-volume emptyDir: {} + - name: metadata-versions + configMap: + name: {{ include "renku-core.fullname" $ }}-metadata-versions {{- include "certificates.volumes" $ | nindent 8 }} initContainers: {{- include "certificates.initContainer" $ | nindent 8 }} @@ -139,10 +145,14 @@ spec: value: {{ $.Values.global.renku.domain }} - name: RENKU_PROJECT_DEFAULT_CLI_VERSION value: {{ $.Values.global.renku.cli_version | default "" | quote }} + - name: METADATA_VERSIONS_LIST + value: /svc/config/metadata-versions/metadata-versions.json {{- include "certificates.env.python" $ | nindent 12 }} volumeMounts: - name: shared-volume mountPath: {{ $.Values.cacheDirectory }} + - name: metadata-versions + mountPath: /svc/config/metadata-versions {{- include "certificates.volumeMounts.system" $ | nindent 12 }} ports: - name: http @@ -157,7 +167,7 @@ spec: path: /health port: http resources: - {{- toYaml $.Values.resources | nindent 12 }} + {{- toYaml $.Values.resources.core | nindent 12 }} - name: {{ $.Chart.Name }}-datasets-workers image: "{{ $version.image.repository }}:{{ $version.image.tag }}" imagePullPolicy: {{ $version.image.pullPolicy }} @@ -213,6 +223,8 @@ spec: - name: shared-volume mountPath: {{ $.Values.cacheDirectory }} {{- include "certificates.volumeMounts.system" $ | nindent 12 }} + resources: + {{- toYaml $.Values.resources.datasetsWorkers | nindent 12 }} - name: {{ $.Chart.Name }}-management-workers image: "{{ $version.image.repository }}:{{ $version.image.tag }}" @@ -267,6 +279,8 @@ spec: - name: shared-volume mountPath: {{ $.Values.cacheDirectory }} {{- include "certificates.volumeMounts.system" $ | nindent 12 }} + resources: + {{- toYaml $.Values.resources.managementWorkers | nindent 12 }} - name: {{ $.Chart.Name }}-scheduler image: "{{ $version.image.repository }}:{{ $version.image.tag }}" @@ -307,6 +321,8 @@ spec: {{- include "certificates.env.python" $ | nindent 12 }} volumeMounts: {{- include "certificates.volumeMounts.system" $ | nindent 12 }} + resources: + {{- toYaml $.Values.resources.scheduler | nindent 12 }} {{- with $.Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/helm-chart/renku-core/templates/hpa.yaml b/helm-chart/renku-core/templates/hpa.yaml new file mode 100644 index 0000000000..3ce0b9ee87 --- /dev/null +++ b/helm-chart/renku-core/templates/hpa.yaml @@ -0,0 +1,29 @@ +{{- range $version := .Values.versions }} +--- +{{- if $.Capabilities.APIVersions.Has "autoscaling/v2" }} +apiVersion: autoscaling/v2 +{{- else if $.Capabilities.APIVersions.Has "autoscaling/v2beta2" }} +apiVersion: autoscaling/v2beta2 +{{- else if $.Capabilities.APIVersions.Has "autoscaling/v2beta1" }} +apiVersion: autoscaling/v2beta1 +{{- else }} + {{- fail "ERROR: You must have at least autoscaling/v2beta1 to use HorizontalPodAutoscaler" }} +{{- end }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "renku-core.fullname" $ }}-{{ $version.name }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "renku-core.fullname" $ }}-{{ $version.name }} + minReplicas: {{ $.Values.horizontalPodAutoscaling.minReplicas }} + maxReplicas: {{ $.Values.horizontalPodAutoscaling.maxReplicas }} + metrics: + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ $.Values.horizontalPodAutoscaling.averageMemoryUtilization }} +{{ end }} diff --git a/helm-chart/renku-core/templates/pdb.yaml b/helm-chart/renku-core/templates/pdb.yaml new file mode 100644 index 0000000000..6bbba04907 --- /dev/null +++ b/helm-chart/renku-core/templates/pdb.yaml @@ -0,0 +1,12 @@ +{{- range $version := .Values.versions }} +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "renku-core.fullname" $ }}-{{ $version.name }} +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/deploymentVersion: {{ $version.name }} +{{ end }} diff --git a/helm-chart/renku-core/templates/service-nginx.yaml b/helm-chart/renku-core/templates/service-nginx.yaml deleted file mode 100644 index d0e843fc68..0000000000 --- a/helm-chart/renku-core/templates/service-nginx.yaml +++ /dev/null @@ -1,15 +0,0 @@ -kind: Service -apiVersion: v1 -metadata: - name: {{ include "renku-core.fullname" . }} - labels: -{{ include "renku-core.labels" $ | indent 4 }} -spec: - type: ClusterIP - selector: - app.kubernetes.io/name: {{ include "renku-core.fullname" $ }}-nginx - ports: - - protocol: TCP - port: 80 - targetPort: 8080 - name: nginx diff --git a/helm-chart/renku-core/values.schema.json b/helm-chart/renku-core/values.schema.json index 1cdd307e65..b814c0f133 100644 --- a/helm-chart/renku-core/values.schema.json +++ b/helm-chart/renku-core/values.schema.json @@ -101,16 +101,6 @@ }, "type": "object" }, - "ingress": { - "description": "Kubernetes ingress definition", - "properties": { - "enabled": { - "description": "whether to enable the ingress or not", - "type": "boolean" - } - }, - "type": "object" - }, "metrics": { "description": "Definition of Redis Queue metrics", "properties": { @@ -141,8 +131,30 @@ "type": "object" }, "nodeSelector": { - "description": "Node selector for deployment", - "type": "object" + "description": "Node selector for deployment", + "type": "object" + }, + "horizontalPodAutoscaling": { + "description": "Setup for scaling the core service", + "type": "object", + "properties": { + "minReplicas": { + "description": "Flag to turn on/off Sentry", + "type": "integer", + "minimum": 1 + }, + "maxReplicas": { + "description": "URI of the sentry Data Source Name", + "type": "integer", + "minimum": 1 + }, + "averageMemoryUtilization": { + "description": "Name of the sentry environment to post to", + "type": "integer", + "exclusiveMinimum": 0, + "exclusiveMaximum": 100 + } + } }, "tolerations": { "description": "Tolerations for deployment", @@ -237,7 +249,6 @@ "gitLFSSkipSmudge", "jwtTokenSecret", "service", - "ingress", "metrics", "sentry", "versions" diff --git a/helm-chart/renku-core/values.yaml b/helm-chart/renku-core/values.yaml index e346f00149..be103418ae 100644 --- a/helm-chart/renku-core/values.yaml +++ b/helm-chart/renku-core/values.yaml @@ -1,7 +1,7 @@ # Default values for renku-core. # This is a YAML-formatted file. # Declare variables to be passed into your templates. -replicaCount: 1 +replicaCount: 2 global: ## Specify a secret that containes the certificate ## if you would like to use a custom CA. The key for the secret @@ -56,25 +56,18 @@ fullnameOverride: "" service: type: ClusterIP port: 80 -ingress: - enabled: false metrics: enabled: false image: repository: renku/rqmetrics tag: 0.0.2 pullPolicy: IfNotPresent -resources: {} -# We usually recommend not to specify default resources and to leave this as a conscious -# choice for the user. This also increases chances charts run on environments with little -# resources, such as Minikube. If you do want to specify resources, uncomment the following -# lines, adjust them as necessary, and remove the curly braces after 'resources:'. -# limits: -# cpu: 100m -# memory: 128Mi -# requests: -# cpu: 100m -# memory: 128Mi +resources: + core: {} + rqmetrics: {} + datasetsWorkers: {} + managementWorkers: {} + scheduler: {} # nodeSelector: {} @@ -114,3 +107,8 @@ podSecurityContext: fsGroup: 100 securityContext: allowPrivilegeEscalation: false + +horizontalPodAutoscaling: + minReplicas: 2 + maxReplicas: 10 + averageMemoryUtilization: 50 diff --git a/renku/ui/service/config.py b/renku/ui/service/config.py index 262c247b6f..694761848b 100644 --- a/renku/ui/service/config.py +++ b/renku/ui/service/config.py @@ -1,5 +1,5 @@ # -# Copyright 2020 - Swiss Data Science Center (SDSC) +# Copyright 2022 - Swiss Data Science Center (SDSC) # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and # Eidgenössische Technische Hochschule Zürich (ETHZ). # @@ -78,3 +78,6 @@ # Sentry configuration SENTRY_ENABLED = os.getenv("SENTRY_ENABLED", "false").lower() == "true" SENTRY_SAMPLERATE = float(os.getenv("SENTRY_SAMPLE_RATE", 0.2)) + +# List of all available metadata versions +METADATA_VERSIONS_LIST = os.getenv("METADATA_VERSIONS_LIST", "/svc/config/metadata-versions/metadata-versions.json") diff --git a/renku/ui/service/controllers/versions_list.py b/renku/ui/service/controllers/versions_list.py new file mode 100644 index 0000000000..5027fb8d7b --- /dev/null +++ b/renku/ui/service/controllers/versions_list.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service version controller.""" +import json + +from renku.ui.service import config +from renku.ui.service.controllers.api.abstract import ServiceCtrl +from renku.ui.service.serializers.versions_list import VersionsListResponseRPC +from renku.ui.service.views import result_response + + +class VersionsListCtrl(ServiceCtrl): + """Versions list controller.""" + + RESPONSE_SERIALIZER = VersionsListResponseRPC() + + def to_response(self): + """Serialize to service version response.""" + + with open(config.METADATA_VERSIONS_LIST, "r") as f: + return result_response(self.RESPONSE_SERIALIZER, json.load(f)) diff --git a/renku/ui/service/entrypoint.py b/renku/ui/service/entrypoint.py index e90f971a59..d3eadca274 100644 --- a/renku/ui/service/entrypoint.py +++ b/renku/ui/service/entrypoint.py @@ -1,5 +1,5 @@ # -# Copyright 2020 - Swiss Data Science Center (SDSC) +# Copyright 2022 - Swiss Data Science Center (SDSC) # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and # Eidgenössische Technische Hochschule Zürich (ETHZ). # @@ -50,6 +50,7 @@ from renku.ui.service.views.project import project_blueprint from renku.ui.service.views.templates import templates_blueprint from renku.ui.service.views.version import version_blueprint +from renku.ui.service.views.versions_list import versions_list_blueprint from renku.ui.service.views.workflow_plans import workflow_plans_blueprint logging.basicConfig(level=os.getenv("SERVICE_LOG_LEVEL", "WARNING")) @@ -154,6 +155,7 @@ def build_routes(app): app.register_blueprint(templates_blueprint) app.register_blueprint(version_blueprint) app.register_blueprint(apispec_blueprint) + app.register_blueprint(versions_list_blueprint) app = create_app() diff --git a/renku/ui/service/serializers/version.py b/renku/ui/service/serializers/version.py index ad951d1dff..2a9788b794 100644 --- a/renku/ui/service/serializers/version.py +++ b/renku/ui/service/serializers/version.py @@ -1,4 +1,4 @@ -# Copyright 2020 - Swiss Data Science Center (SDSC) +# Copyright 2022 - Swiss Data Science Center (SDSC) # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and # Eidgenössische Technische Hochschule Zürich (ETHZ). # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Renku service version controller.""" +"""Renku service version serializers.""" from marshmallow import Schema, fields diff --git a/renku/ui/service/serializers/versions_list.py b/renku/ui/service/serializers/versions_list.py new file mode 100644 index 0000000000..9fa27d2525 --- /dev/null +++ b/renku/ui/service/serializers/versions_list.py @@ -0,0 +1,37 @@ +# Copyright 2022 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service metadata versions list serializers.""" +from marshmallow import Schema, fields + + +class VersionsListVersion(Schema): + """A single instance of renku metadata version information.""" + + version = fields.String() + data = fields.Nested(Schema.from_dict({"metadata_version": fields.String()})) + + +class VersionsListResponse(Schema): + """Lists all available renku metadata versions.""" + + name = fields.String() + versions = fields.List(fields.Nested(VersionsListVersion)) + + +class VersionsListResponseRPC(Schema): + """Versions list response RPC schema.""" + + result = fields.Nested(VersionsListResponse) diff --git a/renku/ui/service/views/versions_list.py b/renku/ui/service/views/versions_list.py new file mode 100644 index 0000000000..f531891fbc --- /dev/null +++ b/renku/ui/service/views/versions_list.py @@ -0,0 +1,45 @@ +# Copyright 2022 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service version view.""" +from flask import Blueprint + +from renku.ui.service.config import SERVICE_PREFIX +from renku.ui.service.controllers.versions_list import VersionsListCtrl +from renku.ui.service.views.error_handlers import handle_common_except + +VERSIONS_LIST_BLUEPRINT_TAG = "versions" +versions_list_blueprint = Blueprint("versions", __name__, url_prefix=SERVICE_PREFIX) + + +@versions_list_blueprint.route("/versions", methods=["GET"], provide_automatic_options=False) +@handle_common_except +def versions_list(): + """ + Shows the list of all supported metadata versions. + + --- + get: + description: Metadata versions supported by all deployed core services. + responses: + 200: + description: The list of metadata versions. + content: + application/json: + schema: VersionsListResponseRPC + tags: + - version + """ + return VersionsListCtrl().to_response()