In [15]:
# imports
import os
import glob
import numpy as np
import pandas as pd
import pickle
import re

import torch
import torch.utils.data
import torch.optim as optim

from sklearn.utils import shuffle
from sklearn.feature_extraction.text import CountVectorizer

import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import *

from bs4 import BeautifulSoup

import sagemaker
from sagemaker.pytorch import PyTorch

from train.model import LSTMClassifier

In [16]:
def train(model, train_loader, epochs, optimizer, loss_fn, device):
    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0
        for batch in train_loader:         
            batch_X, batch_y = batch
            
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            
            # TODO: Complete this train method to train the model provided.
            
            #feedforward
            optimizer.zero_grad()
            output = model(batch_X)
            loss = loss_fn(output, batch_y)
            
            #backprop
            loss.backward()
            optimizer.step()
            
            
            total_loss += loss.data.item()
        print("Epoch: {}, BCELoss: {}".format(epoch, total_loss / len(train_loader)))

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMClassifier(32, 100, 5000).to(device)
optimizer = optim.Adam(model.parameters())
loss_fn = torch.nn.BCELoss()

In [43]:
sagemaker_session = sagemaker.Session()

bucket = 'sagemaker-us-west-2-646270609688'
prefix = 'sagemaker/sentiment_rnn'

role = sagemaker.get_execution_role()

data_dir = '../data/pytorch'


arn:aws:iam::646270609688:role/service-role/AmazonSageMaker-ExecutionRole-20191009T005915


In [44]:
input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix)
print(input_data)

s3://sagemaker-us-west-2-646270609688/sagemaker/sentiment_rnn


In [45]:
estimator = PyTorch(entry_point="train.py",
                    source_dir="train",
                    role=role,
                    framework_version='0.4.0',
                    train_instance_count=1,
                    train_instance_type='ml.p2.xlarge',
                    hyperparameters={
                        'epochs': 10,
                        'hidden_dim': 200,
                    })

In [47]:
# training
estimator.fit({'training': input_data})

2020-02-06 01:20:04 Starting - Starting the training job...
2020-02-06 01:20:08 Starting - Launching requested ML instances......
2020-02-06 01:21:08 Starting - Preparing the instances for training.........
2020-02-06 01:22:44 Downloading - Downloading input data...
2020-02-06 01:23:22 Training - Downloading the training image..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2020-02-06 01:23:46,610 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2020-02-06 01:23:46,649 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2020-02-06 01:23:46,653 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2020-02-06 01:23:46,900 sagemaker-containers INFO     Module train does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-02-06 01:23:46,900 sagemaker-containers INFO 

[34mModel loaded with embedding_dim 32, hidden_dim 200, vocab_size 5000.[0m
[34mEpoch: 1, BCELoss: 0.694141549723489[0m
[34mEpoch: 2, BCELoss: 0.693354285493189[0m
[34mEpoch: 3, BCELoss: 0.693320768220084[0m
[34mEpoch: 4, BCELoss: 0.6932922820655667[0m
[34mEpoch: 5, BCELoss: 0.6932701760408829[0m
[34mEpoch: 6, BCELoss: 0.693255293126009[0m
[34mEpoch: 7, BCELoss: 0.6932460106149012[0m
[34mEpoch: 8, BCELoss: 0.693240840824283[0m
[34mEpoch: 9, BCELoss: 0.6932385673328322[0m

2020-02-06 01:29:36 Uploading - Uploading generated training model
2020-02-06 01:29:36 Completed - Training job completed
[34mEpoch: 10, BCELoss: 0.6932378253158258[0m
[34m2020-02-06 01:29:24,882 sagemaker-containers INFO     Reporting training SUCCESS[0m
Training seconds: 412
Billable seconds: 412


In [48]:
predictor = estimator.deploy(initial_instance_count=1, instance_type = 'ml.m4.xlarge')

-----------------!

In [50]:
estimator.delete_endpoint()

In [51]:
# deployment
from sagemaker.predictor import RealTimePredictor
from sagemaker.pytorch import PyTorchModel

class StringPredictor(RealTimePredictor):
    def __init__(self, endpoint_name, sagemaker_session):
        super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain')

In [52]:
model = PyTorchModel(model_data=estimator.model_data,
                     role = role,
                     framework_version='0.4.0',
                     entry_point='predict.py',
                     source_dir='serve',
                     predictor_cls=StringPredictor)
predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

-----------------!

In [53]:
predictor.endpoint

'sagemaker-pytorch-2020-02-06-01-44-52-407'