In [1]:
%%writefile ./inference.py

import torch
import json
import logging
import sys
import os

target_list=['autos',
 'baseball',
 'christian',
 'comp',
 'crypt',
 'electronics',
 'forsale',
 'graphics',
 'hardware',
 'hockey',
 'ibm',
 'mac',
 'med',
 'misc',
 'motorcycles',
 'ms-windows',
 'os',
 'pc',
 'politics',
 'rec',
 'religion',
 'sci',
 'soc',
 'space',
 'sport',
 'sys',
 'talk',
 'windows',
 'x']

# Hyperparameters
MAX_LEN = 256
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
TEST_BATCH_SIZE = 32
EPOCHS = 10
LEARNING_RATE = 1e-05
THRESHOLD = 0.5 # threshold for the sigmoid

# Configure logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))


try:
    from transformers import BertTokenizer, BertModel
except ImportError:
    import subprocess
    subprocess.check_call(["pip", "install", "transformers"])
    from transformers import BertTokenizer, BertModel
    
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased', return_dict=True)
        self.dropout = torch.nn.Dropout(0.3)
        self.linear = torch.nn.Linear(768, len(target_list))

    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.bert_model(
            input_ids, 
            attention_mask=attn_mask, 
            token_type_ids=token_type_ids
        )
        output_dropout = self.dropout(output.pooler_output)
        output = self.linear(output_dropout)
        return output

def model_fn(model_dir):
    logger.info("Loading model...")
    model = BERTClass()
    
    with open(os.path.join(model_dir , 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f,map_location=torch.device(device)))
    

    model = model.to(device).eval()
    
    logger.info("Model loaded.")
    return model

def input_fn(request_body, request_content_type):
    assert request_content_type == "application/json"
    data = json.loads(request_body)["inputs"]
    return data


def predict_fn(input_data, model):
    logger.info("Tokenizing input data...")
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    encoded_text = tokenizer.encode_plus(
    input_data,
    max_length=MAX_LEN,
    add_special_tokens=True,
    return_token_type_ids=True,
    pad_to_max_length=True,
    return_attention_mask=True,
    return_tensors='pt',
    )
     
    input_ids = encoded_text['input_ids'].to(device)
    attention_mask = encoded_text['attention_mask'].to(device)
    token_type_ids = encoded_text['token_type_ids'].to(device)
    output = model(input_ids, attention_mask, token_type_ids)
     # add sigmoid, for the training sigmoid is in BCEWithLogitsLoss
    output = torch.sigmoid(output).detach().cpu()
    # thresholding at 0.5
    output = output.flatten().round().numpy()

    # Correctly identified the topic of the paper: High energy physics
#     print(f"Title: {raw_text}")
    label=''
    for idx, p in enumerate(output):
        if p==1:
    #         print(f"Label: {target_list[idx]}")
            label=label+' '+target_list[idx]
    return label

def output_fn(predictions, content_type):
    assert content_type == "application/json"
    return json.dumps(predictions)


Writing ./inference.py


In [2]:
!mkdir code

!cp MLTC_model_state.bin model.pth

In [3]:
!cp inference.py code/inference.py

In [70]:
# !tar -czvf model.tar.gz -C my_model .

./
./code/
./code/inference.py
./model.pth


In [13]:
# !cp MLTC_model_state.bin model.pth

In [4]:
!tar -czvf model.tar.gz model.pth code

model.pth
code/
code/inference.py


In [18]:
!aws s3 cp model.tar.gz s3://bert-2/model/

upload: ./model.tar.gz to s3://bert-2/model/model.tar.gz            


In [19]:
import os
import json

import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel
from sagemaker import get_execution_role, Session

sess = Session()

role = get_execution_role()

model = PyTorchModel(
    entry_point="inference.py",
    source_dir="code",
    role=role,
    model_data='s3://bert-2/model/model.tar.gz',
    framework_version="2.1.0",
    py_version="py310",
)

In [20]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

# set local_mode to False if you want to deploy on a remote
# SageMaker instance

local_mode = False

if local_mode:
    instance_type = "local"
else:
    instance_type = "ml.m5.large"

predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
)

------!

In [21]:
data = {"inputs": '''From: astein@nysernet.org (Alan Stein)
Subject: Re: Pease without justice cann't last Re: Last Opportunity for Peace
Organization: NYSERNet, Inc.
Lines: 18

It seems that, to keep the peace talks going, Israel has to keep
making goodwill gesture after goodwill gesture, while Palestinian
Arabs continue to go around hunting Jews.

If the peace talks are going to have any realistic chance of success,
the Arabs are going to have to start reciprocating, especially since
they are the ones who will be getting tangible concessions in return
for giving up only intangibles.  If they keep trying to change the
already agreed upon rules, which seems to be one of their favorite
games, the Israelis are not likely to be very confident that the
intangibles they will receive at the bargaining table will be worth
the parchment they're written on.

It takes two to negotiate a peace.  It's time for the Arabs to start
doing their share.

-- 
Alan H. Stein                     astein@israel.nysernet.org

        '''}

In [23]:
res = predictor.predict(data)
print(res)

 politics talk


In [26]:
# predictor.delete_endpoint()