diff --git a/Pipfile b/Pipfile index b09b5a1c..8bb555a1 100644 --- a/Pipfile +++ b/Pipfile @@ -11,7 +11,7 @@ progressbar2 = "*" cryptography = {extras = ["security"]} six = "*" gradient-statsd = "*" -click = "*" +click = "7.1.2" terminaltables = "*" click-didyoumean = "*" click-help-colors = "*" diff --git a/gradient/api_sdk/clients/secret_client.py b/gradient/api_sdk/clients/secret_client.py index 3122bc0f..7f84cfd0 100644 --- a/gradient/api_sdk/clients/secret_client.py +++ b/gradient/api_sdk/clients/secret_client.py @@ -57,3 +57,19 @@ def delete(self, entity, entity_id, name): repository = self.build_repository(repositories.DeleteSecret) repository.delete(entity=entity, entity_id=entity_id, name=name) + + def ephemeral(self, key, value, expires_in): + """Create ephemeral secret. + + :param str key: secret key + :param str value: secret value + :param str expires_in: seconds secrets expire in + + :returns: + :rtype: str + """ + + repository = self.build_repository(repositories.EphemeralSecret) + ephemeral_secret = repository.create(key=key, value=value, expires_in=expires_in) + return ephemeral_secret + diff --git a/gradient/api_sdk/repositories/__init__.py b/gradient/api_sdk/repositories/__init__.py index ac40ea7c..4af8eecd 100644 --- a/gradient/api_sdk/repositories/__init__.py +++ b/gradient/api_sdk/repositories/__init__.py @@ -19,7 +19,7 @@ from .notebooks import CreateNotebook, DeleteNotebook, GetNotebook, ListNotebooks, GetNotebookMetrics, ListNotebookMetrics, \ StreamNotebookMetrics, StopNotebook, StartNotebook, ForkNotebook, ListNotebookArtifacts, ListNotebookLogs from .projects import CreateProject, ListProjects, DeleteProject, GetProject -from .secrets import ListSecrets, SetSecret, DeleteSecret +from .secrets import ListSecrets, SetSecret, DeleteSecret, EphemeralSecret from .storage_providers import ListStorageProviders, CreateStorageProvider, DeleteStorageProvider, \ GetStorageProvider, UpdateStorageProvider from .tensorboards import CreateTensorboard, GetTensorboard, ListTensorboards, UpdateTensorboard, DeleteTensorboard diff --git a/gradient/api_sdk/repositories/secrets.py b/gradient/api_sdk/repositories/secrets.py index 9137e088..a46c4df6 100644 --- a/gradient/api_sdk/repositories/secrets.py +++ b/gradient/api_sdk/repositories/secrets.py @@ -52,4 +52,20 @@ def _send_request(self, client, url, json=None, params=None): def delete(self, **kwargs): response = self._get(**kwargs) - self._validate_response(response) \ No newline at end of file + self._validate_response(response) + +class EphemeralSecret(SecretsMixin, BaseRepository): + def get_request_url(self, **kwargs): + return "/secrets/ephemeral?expiresIn={}".format(kwargs.get("expires_in")) + + def _get_request_json(self, kwargs): + return { kwargs.get("key"): kwargs.get("value") } + + def _send_request(self, client, url, json=None, params=None): + response = client.post(url, json=json) + return response + + def create(self, **kwargs): + response = self._get(**kwargs) + self._validate_response(response) + return response.data diff --git a/gradient/cli/datasets.py b/gradient/cli/datasets.py index adfcb76a..1e49e969 100644 --- a/gradient/cli/datasets.py +++ b/gradient/cli/datasets.py @@ -4,6 +4,12 @@ from gradient.cli.cli import cli from gradient.cli.common import ClickGroup, api_key_option from gradient.commands import datasets as commands +from gradient.cli import common +from gradient.cli.jobs import get_workspace_handler +from gradient.cli.common import ( + api_key_option, del_if_value_is_none, ClickGroup, jsonify_dicts, + validate_comma_split_option, +) EXAMPLE_ID = 'dsr8k5qzn401lb5' EXAMPLE_VERSION = 'klfoyy9' @@ -143,6 +149,65 @@ def update_dataset( description=description, ) +@datasets.command("import", help="Import dataset") +@click.option( + "--clusterId", + "cluster_id", + help="Cluster ID", + cls=common.GradientOption, + required=True, +) +@click.option( + "--machineType", + "machine_type", + help="Virtual machine type", + cls=common.GradientOption, + required=True, +) +@click.option( + "--datasetId", + "dataset_id", + help="Dataset ID", + cls=common.GradientOption, + required=True, +) +@click.option( + "--s3Url", + "s3_url", + help="S3 URL https://s3-us-east-1.amazonaws.com/bucket/path", + cls=common.GradientOption, +) +@click.option( + "--httpUrl", + "http_url", + help="HTTP/S URL https://data.something.org/all_my_data.zip}}", + cls=common.GradientOption, +) +@click.option( + "--httpAuth", + "http_auth", + help="Http Auth username:password", + cls=common.GradientOption, +) +@click.option( + "--s3AccessKey", + "access_key", + help="S3 access key", + cls=common.GradientOption, +) +@click.option( + "--s3SecretKey", + "secret_key", + help="S3 secret key", + cls=common.GradientOption, +) +@api_key_option +@common.options_file +def import_dataset(cluster_id, machine_type, dataset_id, s3_url, http_url, http_auth, access_key, secret_key, api_key, options_file): + validate_dataset_id(dataset_id) + + command = commands.ImportDatasetCommand(api_key=api_key, workspace_handler=get_workspace_handler()) + command.execute(cluster_id, machine_type, dataset_id, s3_url, http_url, http_auth, access_key, secret_key) @datasets.command("delete", help="Delete dataset") @click.option( diff --git a/gradient/commands/datasets.py b/gradient/commands/datasets.py index dcd74454..9cf9ff8b 100644 --- a/gradient/commands/datasets.py +++ b/gradient/commands/datasets.py @@ -5,11 +5,13 @@ import re import threading import uuid +import json try: import queue except ImportError: import Queue as queue from xml.etree import ElementTree +from urllib.parse import urlparse import halo import requests @@ -17,11 +19,23 @@ from gradient import api_sdk from gradient.api_sdk.sdk_exceptions import ResourceFetchingError +from gradient.api_sdk.utils import base64_encode from gradient.cli_constants import CLI_PS_CLIENT_NAME +from gradient.cli.jobs import get_workspace_handler +from gradient.commands import jobs as jobs_commands from gradient.commands.common import BaseCommand, DetailsCommandMixin, ListCommandPagerMixin +from gradient.commands.jobs import BaseCreateJobCommandMixin, BaseJobCommand, CreateJobCommand from gradient.exceptions import ApplicationError S3_XMLNS = 'http://s3.amazonaws.com/doc/2006-03-01/' +DATASET_IMPORTER_IMAGE = "paperspace/dataset-importer:latest" +PROJECT_NAME = "Job Builder" +SUPPORTED_URL = ['https', 'http'] +IMPORTER_COMMAND = "go-getter" +HTTP_SECRET = "HTTP_AUTH" +S3_ACCESS_KEY = "AWS_ACCESS_KEY_ID" +S3_SECRET_KEY = "AWS_SECRET_ACCESS_KEY" +S3_REGION_KEY = "AWS_DEFAULT_REGION" class WorkerPool(object): @@ -676,3 +690,92 @@ def update_status(): for pre_signed in pre_signeds: update_status() pool.put(self._delete, url=pre_signed.url) + + +class ImportDatasetCommand(BaseCreateJobCommandMixin, BaseJobCommand): + def create_secret(self, key, value, expires_in=86400): + client = api_sdk.clients.SecretsClient( + api_key=self.api_key, + logger=self.logger, + ps_client_name=CLI_PS_CLIENT_NAME, + ) + + response = client.ephemeral(key, value, expires_in) + return response + + def get_command(self, s3_url, http_url, http_auth): + command = "%s %s /data/output" % (IMPORTER_COMMAND, (s3_url or http_url)) + if s3_url: + command = "%s s3::%s /data/output" % (IMPORTER_COMMAND, s3_url) + + if http_url and http_auth is not None: + url = urlparse(http_url) + command_string = "%s https://${{HTTP_AUTH}}@%s /data/output" % (IMPORTER_COMMAND, url.path) + command = base64_encode(command_string) + + return command + + def get_env_vars(self, s3_url, http_url, secrets): + if s3_url is not None: + if secrets[S3_ACCESS_KEY] is None or secrets[S3_SECRET_KEY] is None: + self.logger.log('s3AccessKey and s3SecretKey required') + return + + access_key_secret = self.create_secret(S3_ACCESS_KEY, secrets[S3_ACCESS_KEY]) + secret_key_secret = self.create_secret(S3_SECRET_KEY, secrets[S3_SECRET_KEY]) + + access_key_value = "secret:ephemeral:%s" % access_key_secret[S3_ACCESS_KEY] + secret_key_value = "secret:ephemeral:%s" % secret_key_secret[S3_SECRET_KEY] + + return { + S3_ACCESS_KEY: access_key_value, + S3_SECRET_KEY: secret_key_value, + } + + if http_url and secrets[S3_ACCESS_KEY] is not None: + http_auth_secret = self.create_secret(HTTP_SECRET, secrets[HTTP_SECRET]) + return { + HTTP_SECRET: http_auth_secret + } + + return "" + + def _create(self, workflow): + client = api_sdk.clients.JobsClient( + api_key=self.api_key, + ps_client_name=CLI_PS_CLIENT_NAME, + ) + return self.client.create(**workflow) + + + def execute(self, cluster_id, machine_type, dataset_id, s3_url, http_url, http_auth, access_key, secret_key): + if s3_url is None and http_url is None: + self.logger.log('Error: --s3Url or --httpUrl required') + return + + workflow = { + "cluster_id": cluster_id, + "container": DATASET_IMPORTER_IMAGE, + "machine_type": machine_type, + "project": PROJECT_NAME, + "datasets": [{ "id": dataset_id, "name": "output", "output": True }], + "project_id": None + } + + dataset_url = s3_url or http_url + + url = urlparse(dataset_url) + if url.scheme not in SUPPORTED_URL: + self.logger.log('Invalid URL format supported [{}] Format:{} URL:{}'.format(','.join(SUPPORTED_URL), url.scheme, dataset_url)) + return + + command = self.get_command(s3_url, http_url, http_auth) + if command: + workflow["command"] = command + + env_vars = self.get_env_vars(s3_url, http_url, { HTTP_SECRET: http_auth, S3_ACCESS_KEY: access_key, S3_SECRET_KEY: secret_key }) + if env_vars: + workflow["env_vars"] = env_vars + + command = CreateJobCommand(api_key=self.api_key, workspace_handler=get_workspace_handler()) + command.execute(workflow) diff --git a/setup.py b/setup.py index 2967559e..dbb77bf9 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,7 @@ def run(self): install_requires=[ 'requests[security]', 'six', - 'click>=7.0', + 'click==7.1.2', 'terminaltables', 'click-didyoumean', 'click-help-colors',