In [None]:
%%sh
pip -q install pip --upgrade
pip -q install sagemaker --upgrade

In [None]:
import boto3
import sagemaker

print(sagemaker.__version__)

session = sagemaker.Session()
bucket = session.default_bucket()

### Define channels

In [None]:
# Fully replicated, Pipe Mode

prefix = 'imagenet-split'
s3_train_path = 's3://{}/{}/input/training/'.format(bucket, prefix)
s3_val_path   = 's3://{}/{}/input/validation/'.format(bucket, prefix)
s3_output     = 's3://{}/{}/output/'.format(bucket, prefix)

from sagemaker.inputs import ShuffleConfig

train_data = sagemaker.TrainingInput(s3_train_path, 
                      distribution='FullyReplicated',
                      shuffle_config=ShuffleConfig(59),
                      content_type='application/x-recordio',
                      s3_data_type='S3Prefix',
                      input_mode='Pipe')

validation_data = sagemaker.TrainingInput(s3_val_path, 
                           distribution='FullyReplicated',
                           content_type='application/x-recordio', 
                           s3_data_type='S3Prefix',
                           input_mode='Pipe')

In [None]:
print(s3_train_path)
print(s3_val_path)
print(s3_output)

In [None]:
s3_channels = {'train': train_data, 'validation': validation_data}

### Get the name of the image classification algorithm in our region

In [None]:
from sagemaker import image_uris

region = boto3.Session().region_name    
container = image_uris.retrieve('image-classification', region)
print(container)

### Configure the training job

In [None]:
role = sagemaker.get_execution_role()

ic = sagemaker.estimator.Estimator(container,
                                   role, 
                                   instance_count=8, 
                                   instance_type='ml.p3dn.24xlarge', # 64 NVIDIA V100 GPUs :)
                                   use_spot_instances=True,
                                   max_run=28800,
                                   max_wait=32400,
                                   output_path=s3_output)

### Set algorithm parameters

In [None]:
ic.set_hyperparameters(num_layers=50,                 # Train a Resnet-50 model
                       use_pretrained_model=0,        # Train from scratch
                       num_classes=1000,              # 1000 ImageNet classes
                       num_training_samples=1281167,  # Number of training samples
                       mini_batch_size=2816,          # Empiric value to fill GPU RAM
                       learning_rate=0.4,
                       lr_scheduler_factor=0.5,
                       lr_scheduler_step='30,60,90,120,150,180',
                       epochs=200,
                       kv_store='dist_sync',
                       augmentation_type='crop',
                       early_stopping=True,
                       early_stopping_patience=30,
                       top_k=3)

### Train the model

In [None]:
ic.fit(inputs=s3_channels)

### Deploy the model

In [None]:
ic_predictor = ic.deploy(initial_instance_count=1,
                         instance_type='ml.t2.medium')

### Download a test image

In [None]:
# Dog
!wget -O /tmp/test.jpg https://upload.wikimedia.org/wikipedia/commons/b/b7/LabradorWeaving.jpg
file_name = '/tmp/test.jpg'
from IPython.display import Image
Image(file_name)

### Predict test image

In [None]:
import boto3, json
runtime = boto3.Session().client(service_name='runtime.sagemaker')

with open(file_name, 'rb') as f:
    payload = f.read()
    payload = bytearray(payload)

response = runtime.invoke_endpoint(EndpointName=ic_predictor.endpoint_name, 
                                   ContentType='application/x-image', 
                                   Body=payload)

result = response['Body'].read()
result = json.loads(result)
index = np.argmax(result)
print(result[index], index)

In [None]:
with open('classes.txt', 'r') as f:
    labels = f.readlines()
    
print(labels[index])

### Delete endpoint

In [None]:
#ic_predictor.delete_endpoint()