# [Module 4.1] Deploy from Scratch


여기서는 다음과 같은 작업을 합니다.

- 모델 아티펙트 (model.tar.gz) 파일을 S3에서 로컬에 다운로드
- TF Saved_Model 의 정의를 확인
- SageMaker Model 생성
- Endpoint 생성
- Inference의 Request Serializer and Deserializer 생성
- 프리딕터 생성
- 셈플 데이타로 추론

---
이 노트북은 약 10분 정도 소요 됩니다.


필요한 프로그램 설치

In [1]:
!pip install -q --upgrade pip
!pip install -q wrapt --upgrade --ignore-installed
!pip install -q tensorflow==2.1.0
!pip install -q transformers==2.8.0
!pip install -q sagemaker==1.56.1

[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

astroid 2.3.3 requires wrapt==1.11.*, but you'll have wrapt 1.12.1 which is incompatible.[0m


In [2]:
import boto3
import sagemaker
import pandas as pd
import tensorflow as tf
import os

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

## Download the Model to the Notebook

In [3]:
%store -r training_job_name

In [4]:
print(training_job_name)

tensorflow-training-2020-07-31-13-03-25-859


In [5]:
model_download = 'model'
os.makedirs(model_download, exist_ok=True)

In [6]:
!aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz {model_download}/model.tar.gz

download: s3://sagemaker-us-west-2-057716757052/tensorflow-training-2020-07-31-13-03-25-859/output/model.tar.gz to model/model.tar.gz


In [7]:
# !tar -xvzf   {model_download}/model.tar.gz
# !saved_model_cli show --all --dir ./tensorflow/saved_model/0/

## SageMaker Model 생성

In [8]:
import os
from sagemaker.tensorflow.serving import Model

model = Model(model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name),
              role=role,
              framework_version='2.0.0') # Elastic Inference does not yet support TF 2.1.0 as of sagemaker==1.56.1

## Endpoint 생성

In [9]:
instance_type='ml.m4.xlarge'

deployed_model = model.deploy(initial_instance_count = 1,
                             instance_type = instance_type,
                             wait=True)



-------------!Endpoint name:  tensorflow-inference-2020-07-31-13-39-35-246


In [13]:
endpoint_name = deployed_model.endpoint
print('Endpoint name:  {}'.format(endpoint_name))

Endpoint name:  tensorflow-inference-2020-07-31-13-39-35-246


## Inference Request Serializer and Deserializer 생성

In [10]:
class RequestHandler(object):
    import json
    
    def __init__(self, tokenizer, max_seq_length):
        self.tokenizer = tokenizer
        self.max_seq_length = max_seq_length

    def __call__(self, instances):
        transformed_instances = []

        for instance in instances:
            encode_plus_tokens = tokenizer.encode_plus(instance,
                                                       pad_to_max_length=True,
                                                       max_length=self.max_seq_length)

            input_ids = encode_plus_tokens['input_ids']
            input_mask = encode_plus_tokens['attention_mask']
            segment_ids = [0] * self.max_seq_length

            transformed_instance = {"input_ids": input_ids, 
                                    "input_mask": input_mask, 
                                    "segment_ids": segment_ids}

            transformed_instances.append(transformed_instance)

        transformed_data = {"instances": transformed_instances}

        return json.dumps(transformed_data)
    
class ResponseHandler(object):
    import json
    import tensorflow as tf
    
    def __init__(self, classes):
        self.classes = classes
    
    def __call__(self, response, accept_header):
        import tensorflow as tf

        response_body = response.read().decode('utf-8')

        response_json = json.loads(response_body)

        log_probabilities = response_json["predictions"]

        predicted_classes = []

        # Convert log_probabilities => softmax (all probabilities add up to 1) => argmax (final prediction)
        for log_probability in log_probabilities:
            softmax = tf.nn.softmax(log_probability)    
            predicted_class_idx = tf.argmax(softmax, axis=-1, output_type=tf.int32)
            predicted_class = self.classes[predicted_class_idx]
            predicted_classes.append(predicted_class)

        return predicted_classes    

## Predictor 생성

In [14]:
import json
from sagemaker.tensorflow.serving import Predictor
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

request_handler = RequestHandler(tokenizer=tokenizer,
                                 max_seq_length=128)

response_handler = ResponseHandler(classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

predictor = Predictor(endpoint_name=endpoint_name,
                      sagemaker_session=sess,
                      serializer=request_handler,
                      deserializer=response_handler,
                      content_type='application/json',
                      model_name='saved_model',
                      model_version=0)

## Inference 실행

In [17]:
from TweetUtil import TweetUtil

tweet_util = TweetUtil()
tweet_util.load_emoji_data('emoji_to_idx.pickle')
emoji = tweet_util.get_emo_class_label(3)
print(emoji)

FileNotFoundError: [Errno 2] No such file or directory: 'data/emoji_to_idx.pickle'

In [18]:
test_file_path = 'data/test/tweet_file_test.csv'
test_df = pd.read_csv(test_file_path)
test_df.TWEET.values

FileNotFoundError: [Errno 2] File data/test/tweet_file_test.csv does not exist: 'data/test/tweet_file_test.csv'

In [58]:
sample_df = test_df.sample(10)
sample_df.columns

Index(['TWEET', 'LABEL'], dtype='object')

In [59]:
from TweetUtil import TweetUtil

tweet_util = TweetUtil()
tweet_util.load_emoji_data('emoji_to_idx.pickle')
emoji = tweet_util.get_emo_class_label(3)
print(emoji)

emoji_to_idx is loaded
😂


In [60]:
import tensorflow as tf
import json

sample_df = test_df.sample(10)
columns = ['TWEET', 'LABEL']
for tweet, label in zip(sample_df.TWEET.values, sample_df.LABEL.values):
    # print("label: {}, tweet: {}".format(label, tweet))
    
    reviews = [tweet]

    predicted_classes = predictor.predict(reviews)
    print(predicted_classes)
        



[3]
[3]
[3]
[3]
[3]
[3]
[3]
[3]
[3]
[3]


In [64]:
import tensorflow as tf
import json

sample_df = test_df.sample(10)
columns = ['TWEET', 'LABEL']
for tweet, label in zip(sample_df.TWEET.values, sample_df.LABEL.values):
    # print("label: {}, tweet: {}".format(label, tweet))
    
    reviews = [tweet]

    predicted_classes = predictor.predict(reviews)[0]
    print('Prediction, Ground_truth- {}:{}, {}:{} \n tweet: {}'.format(
        predicted_classes, 
        tweet_util.get_emo_class_label(predicted_classes),
        label, 
        tweet_util.get_emo_class_label(label),        
        tweet))    

        



Prediction, Ground_truth- 3:😂, 1:💕 
 tweet:  be dere soon gang
Prediction, Ground_truth- 3:😂, 3:😂 
 tweet:  i'm killing u for that one
Prediction, Ground_truth- 3:😂, 2:🔥 
 tweet:  when michael jackson was at diana ross's concert and danced with her on stage
Prediction, Ground_truth- 3:😂, 3:😂 
 tweet: so weird how the only person i get along w in this class is the other carlie dunlap
Prediction, Ground_truth- 3:😂, 7:😭 
 tweet:  bundles 
Prediction, Ground_truth- 3:😂, 8:🙄 
 tweet: real things i say i have so many tv shows too watch it's eternally stressful 
Prediction, Ground_truth- 3:😂, 4:😊 
 tweet:  im a sophomore and i look 12
Prediction, Ground_truth- 3:😂, 3:😂 
 tweet: i just want to be able to do a cartwheel
Prediction, Ground_truth- 3:😂, 6:😩 
 tweet:  i second this
Prediction, Ground_truth- 3:😂, 6:😩 
 tweet:  i be craving sex
