# Distributed Wandb Hyperparameter Search with AWS Batch
Please select the **conda_tensorflow2_p36** kernel in the top right!

In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
!pip install --upgrade wandb -q

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/tensorflow2_p36/bin/python -m pip install --upgrade pip' command.[0m


In [23]:
import wandb
import boto3
import base64
from botocore.exceptions import ClientError

## Define the Sweep Config

In [11]:
def get_sweep_id(model='ResNet-50'):
    sweep_config = {
        'name': 'Artemis',
        'program': 'dockerfile/model.py',
        'method': 'bayes',
        'metric': {
          'name': 'accuracy',
          'goal': 'maximize'   
        },
        'parameters' : {
            'learning_rate' : {
                'values' : [1e-2, 1e-3, 1e-4]
            },
            'batch_size' : {
                'values' : [16]#, 32, 64] #128 too big
            },
            'epochs' : {
                'values' : [5]#, 10, 25, 50]
            },
            'optimizer' : {
                'values' : ['adam', 'rmsprop', 'sgd']
            },
            'activation' : {
                'values' : ['relu', 'tanh', 'sigmoid']
            },
            'hidden_ly1' : {
                'values' : [256, 512, 1024]
            },
            'hidden_ly2' : {
                'values' : [256, 512, 1024]
            },
            'dropout_ly1' : {
                'values' : [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
            },
            'dropout_ly2' : {
                'values' : [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
            },
            'model' : {
                'value' : model
            }
        }
    }
    
    sweep_id = wandb.sweep(sweep_config, project='satellite-model-and-orientation')
    
    return sweep_id

## Login to Wandb

In [7]:
wandb.login()

[34m[1mwandb[0m: [32m[41mERROR[0m Not authenticated.  Copy a key from https://app.wandb.ai/authorize


API Key:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ec2-user/.netrc


True

## Initialize the Sweep ID

In [12]:
sweep_id = get_sweep_id()

Create sweep with ID: ire9ohrz
Sweep URL: https://app.wandb.ai/rosenblatt/satellite-model-and-orientation/sweeps/ire9ohrz


## Test Sweep Locally
Can't launch agent from python interpreter, must use shell

In [13]:
!wandb agent $sweep_id

[34m[1mwandb[0m: Starting wandb agent 🕵️
2020-06-28 19:55:02,756 - wandb.wandb_agent - INFO - Running runs: []
2020-06-28 19:55:02,970 - wandb.wandb_agent - INFO - Agent received command: run
2020-06-28 19:55:02,970 - wandb.wandb_agent - INFO - Agent starting run with config:
	activation: sigmoid
	batch_size: 16
	dropout_ly1: 0.1
	dropout_ly2: 0.6
	epochs: 5
	hidden_ly1: 1024
	hidden_ly2: 512
	learning_rate: 0.001
	model: ResNet-50
	optimizer: rmsprop
2020-06-28 19:55:03,056 - wandb.wandb_agent - INFO - About to run command: /usr/bin/env python new_model.py --activation=sigmoid --batch_size=16 --dropout_ly1=0.1 --dropout_ly2=0.6 --epochs=5 --hidden_ly1=1024 --hidden_ly2=512 --learning_rate=0.001 --model=ResNet-50 --optimizer=rmsprop
[34m[1mwandb[0m: Tracking run with wandb version 0.9.1
[34m[1mwandb[0m: Run data is saved locally in wandb/run-20200628_195502-blidp2je
[34m[1mwandb[0m: Syncing run [33mlogical-sweep-1[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps:/

## Dockerize the Model

In [22]:
!pygmentize 'dockerfile/Dockerfile'

[37m# https://github.com/aws/deep-learning-containers/blob/master/available_images.md[39;49;00m
[34mFROM[39;49;00m [33mtensorflow/tensorflow-training:2.2.0-gpu-py37-cu102-ubuntu18.04[39;49;00m

[34mRUN[39;49;00m pip install wandb -q --upgrade

[34mRUN[39;49;00m wandb login [31m$api_key[39;49;00m

[34mENTRYPOINT[39;49;00m wandb agent [31m$sweep_id[39;49;00m


In [None]:
image_name = 'sweep-model'
!docker build -t $image_name .

## Upload Docker image to ECR

## Obtain Wandb API Key from Secret Manager

In [45]:
def get_secret():

    secret_name = "wandb_api_key"
    region_name = "us-east-2"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    # In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    # We rethrow the exception by default.

    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException':
            # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException':
            # An error occurred on the server side.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException':
            # You provided an invalid value for a parameter.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException':
            # You provided a parameter value that is not valid for the current state of the resource.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException':
            # We can't find the resource that you asked for.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
    else:
        # Decrypts secret using the associated KMS CMK.
        # Depending on whether the secret is a string or binary, one of these fields will be populated.
        if 'SecretString' in get_secret_value_response:
            secret = get_secret_value_response['SecretString']
        else:
            decoded_binary_secret = base64.b64decode(get_secret_value_response['SecretBinary'])
    
   

## Sweep the Model with AWS Batch

In [None]:
batch = boto3.client('batch')

response = batch.submit_job(jobName='model-sweep', # use your HutchNet ID instead of 'jdoe'
                            jobQueue='mixed', # sufficient for most jobs
                            jobDefinition='myJobDef:7', # use a real job definition
                            containerOverrides={
                                "environment": [ # optionally set environment variables
                                    {"name": "sweep_id", "value": sweep_id},
                                    {"name": "api_key", "value": ""}
                                ]
                            })

print("Job ID is {}.".format(response['jobId']))