Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revamp AWS configuration module #59

Merged
merged 6 commits into from Dec 16, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
139 changes: 67 additions & 72 deletions metaflow/main_cli.py
Expand Up @@ -68,7 +68,7 @@ def main(ctx):
short_help = {'tutorials': 'Browse and access metaflow tutorials.',
'configure': 'Configure metaflow to run remotely.',
'status': 'Display the current working tree.',
'help': 'Shows all available commands to run.'}
'help': 'Show all available commands to run.'}

echo('Commands:', bold=False)

Expand All @@ -85,7 +85,7 @@ def main(ctx):
def help(ctx):
print(ctx.parent.get_help())

@main.command(help='Shows flows accessible from the current working tree.')
@main.command(help='Show flows accessible from the current working tree.')
def status():
from metaflow.client import get_metadata
res = get_metadata()
Expand Down Expand Up @@ -262,8 +262,7 @@ def info(episode):
METAFLOW_CONFIGURATION_DIR =\
expanduser(os.environ.get('METAFLOW_HOME', '~/.metaflowconfig'))

@main.group(help="Configure Metaflow to "\
"run on our sandbox or an AWS account.")
@main.group(help="Configure Metaflow to run on AWS.")
def configure():
makedirs(METAFLOW_CONFIGURATION_DIR)

Expand All @@ -275,7 +274,7 @@ def get_config_path(profile):
def prompt_config_overwrite(profile):
path = get_config_path(profile)
if os.path.exists(path):
if click.confirm('Do you wish to overwrite the existing configuration '
if click.confirm('Overwrite existing configuration '
'in ' + click.style('"%s"' % path, fg='cyan') + '?',
abort=True):
return
Expand All @@ -290,7 +289,7 @@ def persist_env(env_dict, profile):
echo('\nConfiguration successfully written to ', nl=False)
echo('"%s"' % path, fg='cyan')

@configure.command(help='Resets the configuration to run locally.')
@configure.command(help='Reset configuration to run locally.')
@click.option('--profile', '-p', default='',
help="Optional user profile to allow storing multiple "
"configurations. Please `export METAFLOW_PROFILE` to "
Expand All @@ -305,7 +304,7 @@ def reset(profile):
else:
echo('Configuration is already reset to run locally.')

@configure.command(help='Shows the existing configuration.')
@configure.command(help='Show existing configuration.')
@click.option('--profile', '-p', default='',
help="Optional user profile to allow storing multiple "
"configurations. Please `export METAFLOW_PROFILE` to "
Expand Down Expand Up @@ -349,83 +348,79 @@ def sandbox(profile):
# Persist to a file.
persist_env(env_dict, profile)

@configure.command(help='Get Metaflow up and running on your own AWS environment.')
@configure.command(help='Configure Metaflow to access self-managed AWS resources.')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason you chose against the 'Get Metaflow up and running on your self-managed AWS resources.'? Is it getting trimmed?

Currently the language looks slightly inconsistent between sandbox and aws:

Commands:
  aws      Configure Metaflow to access self-managed AWS resources.
  reset    Reset configuration to run locally.
  sandbox  Get Metaflow up and running on our sandbox.
  show     Show existing configuration.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that all the statements start with an assertive verb, it made sense to modify Get... to Configure.... I will make a similar language change for sandbox.

@click.option('--profile', '-p', default='',
help="Optional user profile to allow storing multiple "
"configurations. Please `export METAFLOW_PROFILE` to "
"switch between profile configuration(s).")
help='Configure a named profile. Activate the profile by setting '
'`METAFLOW_PROFILE` environment variable.')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indentation off by 1 on this line.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will fix.

def aws(profile):
def cyan(string):
return click.style(string, fg='cyan')

def yellow(string):
return click.style(string, fg='yellow')

prompt_config_overwrite(profile)
if click.confirm('Have you setup your ' +\
click.style('AWS credentials?', fg='cyan')):
if not click.confirm('Already configured ' + cyan('AWS access credentials') + '?',
default=True):
echo('\nSetup your access credentials by following this guide: ', nl=False)
echo('https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html',
fg='cyan')
else:
env_dict = {}
# Datastore configuration.
use_s3 = click.confirm('\nDo you want to use AWS S3 as your datastore?',
use_s3 = click.confirm('\nConfigure Amazon '
'S3 as default datastore (required for AWS Batch)?',
default=True, abort=False)
Comment on lines +368 to 370
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add 'Amazon S3' in cyan?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that, it ended up being too many colors.

if use_s3:
echo('\tAWS S3', fg='cyan')
datastore_s3_root =\
click.prompt('\tPlease enter the bucket prefix to use for your '
'flows')

datatools_s3_root =\
click.prompt('\tPlease enter the bucket prefix to use for your '
'data',
default='%s/data' % datastore_s3_root)

env_dict['METAFLOW_DEFAULT_DATASTORE'] = 's3'
env_dict['METAFLOW_DATASTORE_SYSROOT_S3'] = datastore_s3_root
env_dict['METAFLOW_DATATOOLS_SYSROOT_S3'] = datatools_s3_root

# Batch configuration (only if S3 is being used).
env_dict['METAFLOW_DATASTORE_SYSROOT_S3'] =\
click.prompt('\t' + cyan('METAFLOW_DATASTORE_SYSROOT_S3:') +
' Amazon S3 url for metaflow datastore (s3://<bucket>/<prefix>)')

env_dict['METAFLOW_DATATOOLS_SYSROOT_S3'] =\
click.prompt('\t' + cyan('METAFLOW_DATATOOLS_SYSROOT_S3:') +
yellow(' (optional)') + ' Amazon S3 url for metaflow datatools (s3://<bucket>/<prefix>)',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Do you need this again, since you suggest a default in brackets right after this. I vote for removing it, since it seems pretty clear with the actual default value.

METAFLOW_DATATOOLS_SYSROOT_S3: (optional) Amazon S3 url for metaflow datatools (s3://<bucket>/<prefix>) [s3://foo/metaflow/data]

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to follow a consistent pattern throughout.

default='%s/data' % env_dict['METAFLOW_DATASTORE_SYSROOT_S3'],
show_default=True)
# AWS Batch configuration (only if Amazon S3 is being used).
use_batch =\
click.confirm('\nDo you want to use AWS Batch for compute?',
click.confirm('\nConfigure AWS Batch for compute?',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for AWS Batch in cyan? The original intent is to given an idea what the next set of configuration is going to be about, with the cyan highlight(s).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar argument as above.

default=True, abort=False)
if use_batch:
echo('\n\tAWS Batch', fg='cyan')
job_queue = click.prompt('\tPlease enter the job queue to use '
'for batch')
default_image =\
click.prompt('\tPlease enter the default container image '
'to use')
container_registry =\
click.prompt('\tPlease enter the container registry')
ecs_s3_role =\
click.prompt('\tPlease enter the IAM role to use for the '
'container to get AWS S3 access')

env_dict['METAFLOW_BATCH_JOB_QUEUE'] = job_queue
env_dict['METAFLOW_BATCH_CONTAINER_IMAGE'] = default_image
env_dict['METAFLOW_BATCH_CONTAINER_REGISTRY'] =\
container_registry
env_dict['METAFLOW_ECS_S3_ACCESS_IAM_ROLE'] = ecs_s3_role

env_dict['METAFLOW_BATCH_JOB_QUEUE'] =\
click.prompt('\t' + cyan('METAFLOW_BATCH_JOB_QUEUE:') +
' AWS Batch Job Queue')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we be a bit more specific here? I thought there was some confusion what entry to have here (the arn or the queue name or what).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both of them work equally well. For any of the configs, the user can specify ARNs or names as they are interchangeable.

env_dict['METAFLOW_ECS_S3_ACCESS_IAM_ROLE'] =\
click.prompt('\t' + cyan('METAFLOW_ECS_S3_ACCESS_IAM_ROLE:') +
' IAM role granting AWS Batch access to Amazon S3')
metaflow_batch_container_registry =\
click.prompt('\t' + cyan('METAFLOW_BATCH_CONTAINER_REGISTRY:') +
yellow(' (optional)') + ' Default docker image repository for AWS Batch jobs',
default='', show_default=False)
if metaflow_batch_container_registry:
env_dict['METAFLOW_BATCH_CONTAINER_REGISTRY'] = metaflow_batch_container_registry
metaflow_batch_container_image =\
click.prompt('\t' + cyan('METAFLOW_BATCH_CONTAINER_IMAGE:') +
yellow(' (optional)') + ' Default docker image for AWS Batch jobs',
default='', show_default=False)
if metaflow_batch_container_image:
env_dict['METAFLOW_BATCH_CONTAINER_IMAGE'] = metaflow_batch_container_image
# Metadata service configuration.
use_metadata = click.confirm('\nDo you want to use a (remote) metadata '
'service?', default=True, abort=False)
use_metadata = click.confirm('\nConfigure Metadata Service as default metadata provider?',
default=True, abort=False)
if use_metadata:
echo('\tMetadata service', fg='cyan')
service_url = click.prompt('\tPlease enter the URL for your '
'metadata service')
env_dict['METAFLOW_DEFAULT_METADATA'] = 'service'
env_dict['METAFLOW_SERVICE_URL'] = service_url

# Conda (on S3) configuration.
if use_s3:
use_conda = click.confirm('\nDo you want to use conda for '
'dependency management?',
default=True, abort=False)
if use_conda:
echo('\tConda on AWS S3', fg='cyan')
default_val =\
'%s/conda' % env_dict['METAFLOW_DATASTORE_SYSROOT_S3']
package_s3root = \
click.prompt('\tPlease enter the bucket prefix for storing '
'conda packages',
default=default_val)
env_dict['METAFLOW_CONDA_PACKAGE_S3ROOT'] = package_s3root
Comment on lines -422 to -426
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to double check - have you decided to retire this configuration for METAFLOW_CONDA_PACKAGE_S3ROOT?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, indeed.

env_dict['METAFLOW_SERVICE_URL'] =\
click.prompt('\t' + cyan('METAFLOW_SERVICE_URL:') +
' URL for Metadata Service (Open to Public Access)')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you double check if sentence case or lower case - which looks better - for the info in ()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went with the sentence case for continuity here.

env_dict['METAFLOW_SERVICE_INTERNAL_URL'] =\
click.prompt('\t' + cyan('METAFLOW_SERVICE_INTERNAL_URL:') +
yellow(' (optional)') + ' URL for Metadata Service (Accessible within VPC)',
default=env_dict['METAFLOW_SERVICE_URL'], show_default=True)
metaflow_service_auth_key =\
click.prompt('\t' + cyan('METAFLOW_SERVICE_AUTH_KEY:') +
yellow(' (optional)') + ' Auth key for Metadata Service',
default='', show_default=False)
if metaflow_service_auth_key:
env_dict['METAFLOW_SERVICE_AUTH_KEY'] = metaflow_service_auth_key
persist_env(env_dict, profile)
else:
echo('\nPlease set them up first through ', nl=False)
echo('"https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html"',
fg='cyan')
7 changes: 5 additions & 2 deletions metaflow/metaflow_config.py
Expand Up @@ -70,8 +70,10 @@ def from_conf(name, default=None):
###
METADATA_SERVICE_URL = from_conf('METAFLOW_SERVICE_URL')
METADATA_SERVICE_NUM_RETRIES = from_conf('METAFLOW_SERVICE_RETRY_COUNT', 5)
METADATA_SERVICE_AUTH_KEY = from_conf('METAFLOW_SERVICE_AUTH_KEY')
METADATA_SERVICE_HEADERS = json.loads(from_conf('METAFLOW_SERVICE_HEADERS', '{}'))

if METADATA_SERVICE_AUTH_KEY is not None:
METADATA_SERVICE_HEADERS['x-api-key'] = METADATA_SERVICE_AUTH_KEY

###
# AWS Batch configuration
Expand All @@ -85,7 +87,8 @@ def from_conf(name, default=None):
# Default container registry for AWS Batch
BATCH_CONTAINER_REGISTRY = from_conf("METAFLOW_BATCH_CONTAINER_REGISTRY")
# Metadata service URL for AWS Batch
BATCH_METADATA_SERVICE_URL = METADATA_SERVICE_URL
BATCH_METADATA_SERVICE_URL = from_conf('METAFLOW_SERVICE_INTERNAL_URL', METADATA_SERVICE_URL)
BATCH_METADATA_SERVICE_HEADERS = METADATA_SERVICE_HEADERS

###
# Conda configuration
Expand Down
5 changes: 3 additions & 2 deletions metaflow/plugins/aws/batch/batch.py
Expand Up @@ -10,7 +10,8 @@
from requests.exceptions import HTTPError
from metaflow.exception import MetaflowException, MetaflowInternalError
from metaflow.metaflow_config import BATCH_METADATA_SERVICE_URL, DATATOOLS_S3ROOT, \
DATASTORE_LOCAL_DIR, DATASTORE_SYSROOT_S3, DEFAULT_METADATA, METADATA_SERVICE_HEADERS
DATASTORE_LOCAL_DIR, DATASTORE_SYSROOT_S3, DEFAULT_METADATA, \
BATCH_METADATA_SERVICE_HEADERS
from metaflow import util

from .batch_client import BatchClient
Expand Down Expand Up @@ -143,7 +144,7 @@ def launch_job(
.environment_variable('METAFLOW_CODE_DS', code_package_ds) \
.environment_variable('METAFLOW_USER', attrs['metaflow.user']) \
.environment_variable('METAFLOW_SERVICE_URL', BATCH_METADATA_SERVICE_URL) \
.environment_variable('METAFLOW_SERVICE_HEADERS', json.dumps(METADATA_SERVICE_HEADERS)) \
.environment_variable('METAFLOW_SERVICE_HEADERS', json.dumps(BATCH_METADATA_SERVICE_HEADERS)) \
.environment_variable('METAFLOW_DATASTORE_SYSROOT_LOCAL', DATASTORE_LOCAL_DIR) \
.environment_variable('METAFLOW_DATASTORE_SYSROOT_S3', DATASTORE_SYSROOT_S3) \
.environment_variable('METAFLOW_DATATOOLS_S3ROOT', DATATOOLS_S3ROOT) \
Expand Down