Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(dataset): importer #335

Merged
merged 32 commits into from
Dec 3, 2020
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f4e4586
feat(dataset): importer
reynld Dec 1, 2020
2a9f4d6
fix: addressed code review comments
reynld Dec 1, 2020
18ae728
fixup
reynld Dec 1, 2020
db262a1
set clip help colors version color
reynld Dec 2, 2020
1a25173
update package versions
reynld Dec 2, 2020
ebaf29e
update package versions
reynld Dec 2, 2020
a4a277e
update package versions
reynld Dec 2, 2020
4d53fa1
update package versions
reynld Dec 2, 2020
2b457bc
addressed cde review comments
reynld Dec 2, 2020
6cc0f80
reset piplock file
reynld Dec 2, 2020
0a9ce6a
Revert "reset piplock file"
reynld Dec 2, 2020
1f920ce
review comments
reynld Dec 2, 2020
f5099ed
review comments
reynld Dec 2, 2020
8d5e27d
fix
reynld Dec 2, 2020
64761b1
fix address
reynld Dec 3, 2020
ae7467e
fix code review comments
reynld Dec 3, 2020
ed584aa
revert lock file
reynld Dec 3, 2020
a08169c
revert lock file
reynld Dec 3, 2020
89ac9a5
chore: fix spacing
reynld Dec 3, 2020
37ac80d
change click help colors version
reynld Dec 3, 2020
f690faf
change click help colors version
reynld Dec 3, 2020
e207b43
setting specific version
reynld Dec 3, 2020
41fc4be
pls
reynld Dec 3, 2020
6eea3e7
setup.py click number
reynld Dec 3, 2020
8e4802a
reset pipfile
reynld Dec 3, 2020
7144a6a
fix equals signs
reynld Dec 3, 2020
d7a9ed7
fix(dataset): importer command wrapped in JobCommand to make use of j…
reynld Dec 3, 2020
e393d0c
fix: pip lock file
reynld Dec 3, 2020
1340af9
remove toml file
reynld Dec 3, 2020
37fd5ee
fix: http basic auth
reynld Dec 3, 2020
228a2bc
base64 basic auth commands
reynld Dec 3, 2020
3a593d1
error handling
reynld Dec 3, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ progressbar2 = "*"
cryptography = {extras = ["security"]}
six = "*"
gradient-statsd = "*"
click = "*"
click = "7.1.2"
terminaltables = "*"
click-didyoumean = "*"
click-help-colors = "*"
Expand Down
16 changes: 16 additions & 0 deletions gradient/api_sdk/clients/secret_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,19 @@ def delete(self, entity, entity_id, name):

repository = self.build_repository(repositories.DeleteSecret)
repository.delete(entity=entity, entity_id=entity_id, name=name)

def ephemeral(self, key, value, expires_in):
"""Create ephemeral secret.

:param str key: secret key
:param str value: secret value
:param str expires_in: seconds secrets expire in

:returns:
:rtype: str
"""

repository = self.build_repository(repositories.EphemeralSecret)
ephemeral_secret = repository.create(key=key, value=value, expires_in=expires_in)
return ephemeral_secret

2 changes: 1 addition & 1 deletion gradient/api_sdk/repositories/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .notebooks import CreateNotebook, DeleteNotebook, GetNotebook, ListNotebooks, GetNotebookMetrics, ListNotebookMetrics, \
StreamNotebookMetrics, StopNotebook, StartNotebook, ForkNotebook, ListNotebookArtifacts, ListNotebookLogs
from .projects import CreateProject, ListProjects, DeleteProject, GetProject
from .secrets import ListSecrets, SetSecret, DeleteSecret
from .secrets import ListSecrets, SetSecret, DeleteSecret, EphemeralSecret
from .storage_providers import ListStorageProviders, CreateStorageProvider, DeleteStorageProvider, \
GetStorageProvider, UpdateStorageProvider
from .tensorboards import CreateTensorboard, GetTensorboard, ListTensorboards, UpdateTensorboard, DeleteTensorboard
18 changes: 17 additions & 1 deletion gradient/api_sdk/repositories/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,20 @@ def _send_request(self, client, url, json=None, params=None):

def delete(self, **kwargs):
response = self._get(**kwargs)
self._validate_response(response)
self._validate_response(response)

class EphemeralSecret(SecretsMixin, BaseRepository):
def get_request_url(self, **kwargs):
return "/secrets/ephemeral?expiresIn={}".format(kwargs.get("expires_in"))

def _get_request_json(self, kwargs):
return { kwargs.get("key"): kwargs.get("value") }

def _send_request(self, client, url, json=None, params=None):
response = client.post(url, json=json)
return response

def create(self, **kwargs):
response = self._get(**kwargs)
self._validate_response(response)
return response.data
60 changes: 60 additions & 0 deletions gradient/cli/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from gradient.cli.cli import cli
from gradient.cli.common import ClickGroup, api_key_option
from gradient.commands import datasets as commands
from gradient.cli import common
from gradient.cli.jobs import get_workspace_handler
from gradient.cli.common import (
api_key_option, del_if_value_is_none, ClickGroup, jsonify_dicts,
validate_comma_split_option,
)

EXAMPLE_ID = 'dsr8k5qzn401lb5'
EXAMPLE_VERSION = 'klfoyy9'
Expand Down Expand Up @@ -143,6 +149,60 @@ def update_dataset(
description=description,
)

@datasets.command("import", help="Import dataset")
@click.option(
"--clusterId",
"cluster_id",
help="Cluster ID",
cls=common.GradientOption,
required=True,
)
@click.option(
"--machineType",
"machine_type",
help="Virtual machine type",
cls=common.GradientOption,
required=True,
)
@click.option(
"--datasetId",
"dataset_id",
help="Dataset ID",
cls=common.GradientOption,
required=True,
)
@click.option(
"--url",
"dataset_url",
reynld marked this conversation as resolved.
Show resolved Hide resolved
help="URL ",
cls=common.GradientOption,
required=True,
)
@click.option(
"--httpAuth",
"http_auth",
help="Http Auth username:password",
cls=common.GradientOption,
)
@click.option(
"--s3AccessKey",
"access_key",
help="S3 access key",
cls=common.GradientOption,
)
@click.option(
"--s3SecretKey",
"secret_key",
help="S3 secret key",
cls=common.GradientOption,
)
@api_key_option
@common.options_file
def import_dataset(cluster_id, machine_type, dataset_id, dataset_url, http_auth, access_key, secret_key, api_key, options_file):
validate_dataset_id(dataset_id)

command = commands.ImportDatasetCommand(api_key=api_key, workspace_handler=get_workspace_handler())
command.execute(cluster_id, machine_type, dataset_id, dataset_url, http_auth, access_key, secret_key)

@datasets.command("delete", help="Delete dataset")
@click.option(
Expand Down
94 changes: 94 additions & 0 deletions gradient/commands/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import re
import threading
import uuid
import json
try:
import queue
except ImportError:
import Queue as queue
from xml.etree import ElementTree
from urllib.parse import urlparse

import halo
import requests
Expand All @@ -18,10 +20,21 @@
from gradient import api_sdk
from gradient.api_sdk.sdk_exceptions import ResourceFetchingError
from gradient.cli_constants import CLI_PS_CLIENT_NAME
from gradient.cli.jobs import get_workspace_handler
from gradient.commands import jobs as jobs_commands
from gradient.commands.common import BaseCommand, DetailsCommandMixin, ListCommandPagerMixin
from gradient.commands.jobs import BaseCreateJobCommandMixin, BaseJobCommand, CreateJobCommand
from gradient.exceptions import ApplicationError

S3_XMLNS = 'http://s3.amazonaws.com/doc/2006-03-01/'
DATASET_IMPORTER_IMAGE = "paperspace/dataset-importer:latest"
PROJECT_NAME = "Job Builder"
SUPPORTED_URL = ['s3', 'git', 'https', 'http']
IMPORTER_COMMAND = "go-getter"
HTTP_SECRET = "HTTP_AUTH"
S3_ACCESS_KEY = "AWS_ACCESS_KEY_ID"
S3_SECRET_KEY = "AWS_SECRET_ACCESS_KEY"
S3_REGION_KEY = "AWS_DEFAULT_REGION"


class WorkerPool(object):
Expand Down Expand Up @@ -676,3 +689,84 @@ def update_status():
for pre_signed in pre_signeds:
update_status()
pool.put(self._delete, url=pre_signed.url)


class ImportDatasetCommand(BaseCreateJobCommandMixin, BaseJobCommand):
def create_secret(self, key, value, expires_in=86400):
client = api_sdk.clients.SecretsClient(
api_key=self.api_key,
logger=self.logger,
ps_client_name=CLI_PS_CLIENT_NAME,
)

response = client.ephemeral(key, value, expires_in)
return response

def get_command(self, url, http_auth):
command_url = url.geturl()

if url.scheme == 'https' or url.scheme == 'http' and http_auth:
return "%s https://${{HTTP_AUTH}}@%s /data/output" % (IMPORTER_COMMAND, url.path)

return "%s %s /data/output" % (IMPORTER_COMMAND, command_url)

def get_env_vars(self, url, secrets):
if url.scheme == 's3' and S3_ACCESS_KEY in secrets and S3_SECRET_KEY in secrets:
if not secrets[S3_ACCESS_KEY] or not secrets[S3_SECRET_KEY]:
self.logger.log('s3AccessKey and s3SecretKey required')
return

access_key_secret = self.create_secret(S3_ACCESS_KEY, secrets[S3_ACCESS_KEY])
secret_key_secret = self.create_secret(S3_SECRET_KEY, secrets[S3_SECRET_KEY])

access_key_value = "secret:ephemeral:%s" % access_key_secret[S3_ACCESS_KEY]
secret_key_value = "secret:ephemeral:%s" % secret_key_secret[S3_SECRET_KEY]

return {
S3_ACCESS_KEY: access_key_value,
S3_SECRET_KEY: secret_key_value,
}

if url.scheme == 'https' and url.scheme == 'http' and HTTP_SECRET in secrets:
http_auth_secret = self.create_secret(HTTP_SECRET, secrets[HTTP_SECRET])
return {
HTTP_SECRET: http_auth_secret
}

return ""

def _create(self, workflow):
client = api_sdk.clients.JobsClient(
api_key=self.api_key,
ps_client_name=CLI_PS_CLIENT_NAME,
)
return self.client.create(**workflow)


def execute(self, cluster_id, machine_type, dataset_id, dataset_url, http_auth, access_key, secret_key):
workflow = {
"cluster_id": cluster_id,
"container": DATASET_IMPORTER_IMAGE,
"machine_type": machine_type,
"project": PROJECT_NAME,
"datasets": [{ "id": dataset_id, "name": "output", "output": True }],
"project_id": None
reynld marked this conversation as resolved.
Show resolved Hide resolved
}

url = urlparse(dataset_url)

if url.scheme not in SUPPORTED_URL:
self.logger.log('Invalid URL format supported [{}] Format:{} URL:{}'.format(','.join(SUPPORTED_URL), url.scheme, dataset_url))
return


command = self.get_command(url, http_auth)
if command:
workflow["command"] = command

env_vars = self.get_env_vars(url, { HTTP_SECRET: http_auth, S3_ACCESS_KEY: access_key, S3_SECRET_KEY: secret_key })
if env_vars:
workflow["env_vars"] = env_vars

command = CreateJobCommand(api_key=self.api_key, workspace_handler=get_workspace_handler())
command.execute(workflow)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def run(self):
install_requires=[
'requests[security]',
'six',
'click>=7.0',
'click==7.1.2',
'terminaltables',
'click-didyoumean',
'click-help-colors',
Expand Down