# [Module 4.1] Deploy from Scratch


여기서는 다음과 같은 작업을 합니다.

- 모델 아티펙트 (model.tar.gz) 파일을 S3에서 로컬에 다운로드
- TF Saved_Model 의 정의를 확인
- SageMaker Model 생성
- Endpoint 생성
- Inference의 Request Serializer and Deserializer 생성
- 프리딕터 생성
- 셈플 데이타로 추론

---
이 노트북은 약 10분 정도 소요 됩니다.


필요한 프로그램 설치

In [93]:
!pip install -q --upgrade pip
!pip install -q wrapt --upgrade --ignore-installed
!pip install -q tensorflow==2.1.0
!pip install -q transformers==2.8.0
!pip install -q sagemaker==1.56.1

[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

astroid 2.3.3 requires wrapt==1.11.*, but you'll have wrapt 1.12.1 which is incompatible.[0m


In [94]:
import boto3
import sagemaker
import pandas as pd
import tensorflow as tf
import os

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

## Download the Model to the Notebook

In [95]:
%store -r training_job_name

In [96]:
print(training_job_name)

tensorflow-training-2020-08-02-12-18-28-858


In [97]:
model_download = 'model'
os.makedirs(model_download, exist_ok=True)

In [98]:
!aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz {model_download}/model.tar.gz

download: s3://sagemaker-ap-northeast-2-343441690612/tensorflow-training-2020-08-02-12-18-28-858/output/model.tar.gz to model/model.tar.gz


In [100]:
# !tar -xvzf   {model_download}/model.tar.gz
# !saved_model_cli show --all --dir ./tensorflow/saved_model/0/

## SageMaker Model 생성

In [101]:
import os
from sagemaker.tensorflow.serving import Model

model = Model(model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name),
              role=role,
              framework_version='2.0.0') # Elastic Inference does not yet support TF 2.1.0 as of sagemaker==1.56.1

## Endpoint 생성

In [102]:
instance_type='ml.m4.xlarge'

deployed_model = model.deploy(initial_instance_count = 1,
                             instance_type = instance_type,
                             wait=True)



-------------!

In [103]:
endpoint_name = deployed_model.endpoint
print('Endpoint name:  {}'.format(endpoint_name))

Endpoint name:  tensorflow-inference-2020-08-02-12-37-40-784


## Inference Request Serializer and Deserializer 생성

In [104]:
class RequestHandler(object):
    import json
    
    def __init__(self, tokenizer, max_seq_length):
        self.tokenizer = tokenizer
        self.max_seq_length = max_seq_length

    def __call__(self, instances):
        transformed_instances = []

        for instance in instances:
            encode_plus_tokens = tokenizer.encode_plus(instance,
                                                       pad_to_max_length=True,
                                                       max_length=self.max_seq_length)

            input_ids = encode_plus_tokens['input_ids']
            input_mask = encode_plus_tokens['attention_mask']
            segment_ids = [0] * self.max_seq_length

            transformed_instance = {"input_ids": input_ids, 
                                    "input_mask": input_mask, 
                                    "segment_ids": segment_ids}

            transformed_instances.append(transformed_instance)

        transformed_data = {"instances": transformed_instances}

        return json.dumps(transformed_data)
    
class ResponseHandler(object):
    import json
    import tensorflow as tf
    
    def __init__(self, classes):
        self.classes = classes
    
    def __call__(self, response, accept_header):
        import tensorflow as tf

        response_body = response.read().decode('utf-8')

        response_json = json.loads(response_body)

        log_probabilities = response_json["predictions"]

#        predicted_classes = []

        # Convert log_probabilities => softmax (all probabilities add up to 1) => argmax (final prediction)
#         for log_probability in log_probabilities:
#             softmax = tf.nn.softmax(log_probability)    
#             predicted_class_idx = tf.argmax(softmax, axis=-1, output_type=tf.int32)
#             predicted_class = self.classes[predicted_class_idx]
#             predicted_classes.append(predicted_class)

        return log_probabilities    

## Predictor 생성

In [105]:
import json
from sagemaker.tensorflow.serving import Predictor
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

request_handler = RequestHandler(tokenizer=tokenizer,
                                 max_seq_length=32)

response_handler = ResponseHandler(classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

predictor = Predictor(endpoint_name=endpoint_name,
                      sagemaker_session=sess,
                      serializer=request_handler,
                      deserializer=response_handler,
                      content_type='application/json',
                      model_name='saved_model',
                      model_version=0)

## Inference 실행

In [106]:
from TweetUtil import TweetUtil

tweet_util = TweetUtil()
tweet_util.load_emoji_data('emoji_to_idx.pickle')
emoji = tweet_util.get_emo_class_label(3)
print(emoji)

emoji_to_idx is loaded
😂


In [107]:
test_file_path = 'data/test/tweet_file_test.csv'
test_df = pd.read_csv(test_file_path)
test_file_path = 'data/test/tweet_file_test.csv'
test_df = pd.read_csv(test_file_path)
sample_df = test_df.sample(10)
sample_df

Unnamed: 0,TWEET,LABEL
2260,a likely story,9
8370,african space tour,2
7086,raheem sterling's snapchat story,7
2984,they can't hide the heart eyes,5
7494,for anything enchanted kingdom,0
3603,actor park,1
3170,my grades are not where i want them to be got...,7
549,you are fabulous good morning pipol happy mon...,4
4622,lmao why,6
4374,new video go watch my favourite parts were th...,3


In [108]:
def show_top_N_label(score_list, topN):

    import numpy as np

    top_n_idx = np.argsort(score_list)[-topN:]
    top_n_values = [score_list[i] for i in top_n_idx]
    
    top_n_idx_list = top_n_idx.tolist()
    top_n_idx_list.reverse()
    top_n_values = [score_list[i] for i in top_n_idx_list]    
    
    return top_n_idx_list


In [111]:
import tensorflow as tf
import json

columns = ['TWEET', 'LABEL']
topN = 3
for tweet, label in zip(sample_df.TWEET.values, sample_df.LABEL.values):
    # print("label: {}, tweet: {}".format(label, tweet))
    
    reviews = [tweet]
    
#     print("reviews: \n", reviews)



    predicted_classes = predictor.predict(reviews)[0]
#    predicted_classes = predictor.predict(reviews)    
    predicted_classes = show_top_N_label(predicted_classes, topN)

    print('tweet: {} \nGround_truth- {}:{}\n '.format(
        tweet,
        label, 
        tweet_util.get_emo_class_label(label))
         )    
    

    print('Prediction: {},{},{},{},{},{} \n '.format(
        predicted_classes[0], 
        tweet_util.get_emo_class_label(predicted_classes[0]),
        predicted_classes[1], 
        tweet_util.get_emo_class_label(predicted_classes[1]),
        predicted_classes[2], 
        tweet_util.get_emo_class_label(predicted_classes[2])                                       
        ))    
        



tweet:  a likely story  
Ground_truth- 9:🤔
 
Prediction: 6,😩,7,😭,9,🤔 
 
tweet:  african space tour 
Ground_truth- 2:🔥
 
Prediction: 2,🔥,5,😍,7,😭 
 
tweet:  raheem sterling's snapchat story  
Ground_truth- 7:😭
 
Prediction: 0,❤,5,😍,3,😂 
 
tweet:  they can't hide the heart eyes  
Ground_truth- 5:😍
 
Prediction: 9,🤔,5,😍,7,😭 
 
tweet: for anything enchanted kingdom 
Ground_truth- 0:❤
 
Prediction: 4,😊,6,😩,1,💕 
 
tweet: actor park 
Ground_truth- 1:💕
 
Prediction: 4,😊,7,😭,5,😍 
 
tweet:  my grades are not where i want them to be got 1 week to get them up 
Ground_truth- 7:😭
 
Prediction: 9,🤔,8,🙄,3,😂 
 
tweet:  you are fabulous good morning pipol happy monday aldubikawlang 
Ground_truth- 4:😊
 
Prediction: 0,❤,1,💕,4,😊 
 
tweet:  lmao why 
Ground_truth- 6:😩
 
Prediction: 8,🙄,9,🤔,6,😩 
 
tweet:  new video go watch my favourite parts were the approving your costume the best house wars 
Ground_truth- 3:😂
 
Prediction: 1,💕,5,😍,4,😊 
 


## Past Version

In [50]:
import tensorflow as tf
import json

sample_df = test_df.sample(10)
columns = ['TWEET', 'LABEL']
for tweet, label in zip(sample_df.TWEET.values, sample_df.LABEL.values):
    # print("label: {}, tweet: {}".format(label, tweet))
    
    reviews = [tweet]

    predicted_classes = predictor.predict(reviews)
    print(predicted_classes)
        



[1]
[1]
[1]
[1]
[0]
[1]
[0]
[1]
[1]
[1]


In [51]:
import tensorflow as tf
import json

sample_df = test_df.sample(10)
columns = ['TWEET', 'LABEL']
for tweet, label in zip(sample_df.TWEET.values, sample_df.LABEL.values):
    # print("label: {}, tweet: {}".format(label, tweet))
    
    reviews = [tweet]

    predicted_classes = predictor.predict(reviews)[0]
    print('Prediction, Ground_truth- {}:{}, {}:{} \n tweet: {}'.format(
        predicted_classes, 
        tweet_util.get_emo_class_label(predicted_classes),
        label, 
        tweet_util.get_emo_class_label(label),        
        tweet))    

        



Prediction, Ground_truth- 1:💕, 9:🤔 
 tweet: why am i still up
Prediction, Ground_truth- 1:💕, 4:😊 
 tweet:  gud eve frm here fe al
Prediction, Ground_truth- 1:💕, 2:🔥 
 tweet:  he throws 95 105 mph for those wondering 
Prediction, Ground_truth- 1:💕, 7:😭 
 tweet: power just pissed me off
Prediction, Ground_truth- 1:💕, 6:😩 
 tweet: feels like i'm getting sick
Prediction, Ground_truth- 1:💕, 1:💕 
 tweet: always been the baddest
Prediction, Ground_truth- 1:💕, 7:😭 
 tweet:  oh my god
Prediction, Ground_truth- 0:❤, 0:❤ 
 tweet:  still following people all day send me a screenshot that you got the song 
Prediction, Ground_truth- 1:💕, 6:😩 
 tweet: to early for college
Prediction, Ground_truth- 1:💕, 6:😩 
 tweet: beh tambak ang assignments
