In [1]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

# initialize the model architecture and weights
model = T5ForConditionalGeneration.from_pretrained("t5-base")
# initialize the model tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-base")

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|███████████████████████████| 850M/850M [00:17<00:00, 50.6MB/s]
Downloading: 100%|███████████████████████████| 773k/773k [00:00<00:00, 5.80MB/s]


In [10]:
article = """
A frontal boundary that had been stalled over the mid-Atlantic had lifted north of the region by the morning of 
June 29. Later that day and into the evening, the front once again approached, this time as a strong cold front,
as low pressure tracked through New England and began to intensify offshore in the Gulf of Maine. 
The combination of strong frontal forcing and a warm, unstable environment ahead of the front led to 
widespread severe thunderstorms developing. Numerous reports of damaging wind, as well as some hail, 
were received in association with these storms.
"""

In [11]:
# encode the text into tensor of integers using the appropriate tokenizer
inputs = tokenizer.encode("summarize: " + article, return_tensors="pt", max_length=512, truncation=True)

In [12]:
inputs

tensor([[21603,    10,    71,   851,   138, 20430,    24,   141,   118,     3,
          8407,  1361,   147,     8,  2076,    18,   188,    17,  1618,  1225,
           141, 19464,  3457,    13,     8,  1719,    57,     8,  1379,    13,
          1515,  2838,     5, 10511,    24,   239,    11,   139,     8,  2272,
             6,     8,   851,   728,   541, 15319,     6,    48,    97,    38,
             3,     9,  1101,  2107,   851,     6,    38,   731,  1666, 22679,
           190,   368,  2789,    11,  1553,    12,  9608,  4921, 16667,    16,
             8, 13566,    13, 13905,     5,    37,  2711,    13,  1101,   851,
           138, 19060,    11,     3,     9,  1978,     6, 27644,  1164,  2177,
            13,     8,   851,  2237,    12, 14047,  5274, 31250,     7,  2421,
             5, 25638,   302,  2279,    13, 18488,  2943,     6,    38,   168,
            38,   128, 22690,     6,   130,  1204,    16,  6028,    28,   175,
          5536,     7,     5,     1]])

In [13]:
# generate the summarization output
outputs = model.generate(
    inputs, 
    max_length=150, 
    min_length=40, 
    length_penalty=2.0, 
    num_beams=4, 
    early_stopping=True)
# just for debugging
print(outputs)
print(tokenizer.decode(outputs[0]))

tensor([[    0,     3,     9,   851,    24,   141,   118,     3,  8407,  1361,
           147,     8,  2076,    18,   188,    17,  1618,  1225, 19464,  3457,
            13,     8,  1719,     3,     5,   865,    24,   239,    11,   139,
             8,  2272,     6,     8,   851,   728,   541, 15319,     3,     5,
           731,  1666, 22679,   190,   126,  2789,    11,  1553,    12,  9608,
          4921, 16667,    16,     8, 13566,    13, 13905,     3,     5,     1]])
<pad> a front that had been stalled over the mid-Atlantic lifted north of the region. later that day and into the evening, the front once again approached. low pressure tracked through new England and began to intensify offshore in the Gulf of Maine.</s>


In [None]:
import json
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, AutoConfig

def encode(tokenizer, question, context):
    """encodes the question and context with a given tokenizer"""
    encoded = tokenizer.encode_plus(question, context)
    return encoded["input_ids"], encoded["attention_mask"]

def decode(tokenizer, token):
    """decodes the tokens to the answer with a given tokenizer"""
    answer_tokens = tokenizer.convert_ids_to_tokens(
        token, skip_special_tokens=True)
    return tokenizer.convert_tokens_to_string(answer_tokens)

def serverless_pipeline(model_path='./model'):
    """Initializes the model and tokenzier and returns a predict function that ca be used as pipeline"""
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForQuestionAnswering.from_pretrained(model_path)
    def predict(question, context):
        """predicts the answer on an given question and context. Uses encode and decode method from above"""
        input_ids, attention_mask = encode(tokenizer,question, context)
        a =  model(torch.tensor(
            [input_ids]), attention_mask=torch.tensor([attention_mask]))
        start_scores = a['start_logits']
        end_scores = a['end_logits']    
        print(start_scores)
        ans_tokens = input_ids[torch.argmax(
            start_scores): torch.argmax(end_scores)+1]
        answer = decode(tokenizer,ans_tokens)
        return answer
    return predict

# initializes the pipeline
question_answering_pipeline = serverless_pipeline()

def handler(event, context):
    try:
        # loads the incoming event into a dictonary
        body = json.loads(event['body'])
        # uses the pipeline to predict the answer
        answer = question_answering_pipeline(question=body['question'], context=body['context'])
        return {
            "statusCode": 200,
            "headers": {
                'Content-Type': 'application/json',
                'Access-Control-Allow-Origin': '*',
                "Access-Control-Allow-Credentials": True

            },
            "body": json.dumps({'answer': answer})
        }
    except Exception as e:
        print(repr(e))
        return {
            "statusCode": 500,
            "headers": {
                'Content-Type': 'application/json',
                'Access-Control-Allow-Origin': '*',
                "Access-Control-Allow-Credentials": True
            },
            "body": json.dumps({"error": repr(e)})
        }

In [None]:
import json
from transformers import pipeline

ner = pipeline("ner", model='./model', tokenizer='./model')


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
event = {"body":"{\"context\":\"We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be finetuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial taskspecific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).\"}"}

In [51]:
import requests
import json
import ast

event = {"context":"D2 drought conditions developed across portions of Geneva and Coffee counties during the last week of September and continued into October."}
event1 = {"context":"Scattered thunderstorms developed ahead of a cold front on the morning of the 14th, producing isolated wind damage. Throughout the afternoon and evening of the 13th and all day on the 14th, strong south gradient winds associated with a strong low pressure system gusted up to 50 mph at times. The low pressure center tracked northeast across the Plains to the Great Lakes region. Peak gradient wind gusts included 56 mph at the Cape Girardeau airport, 53 mph at the Perryville airport, 48 mph at the Poplar Bluff airport, and 47 mph at the Sikeston airport."}


def NLP_NER_OUTPUT(event):
    url = 'https://0rwjoafjn8.execute-api.us-east-1.amazonaws.com/dev/qa'
    response = requests.post(url,json=event1)
    output = response.content.decode()
    an = json.loads(output)
    a = an['answer']
    print(a)
    b = str(a)
    Final_dict = ''
    list_b = list(b)
    for i in range(1,len(list_b)-1):
        Final_dict = Final_dict +list_b[i]

    es = ast.literal_eval(Final_dict)
    return es

output_dict = NLP_NER_OUTPUT(event)

[{'entity': 'B-LOC', 'score': 0.9885201, 'index': 69, 'word': 'Plains', 'start': 346, 'end': 352}, {'entity': 'B-LOC', 'score': 0.9972579, 'index': 72, 'word': 'Great', 'start': 360, 'end': 365}, {'entity': 'I-LOC', 'score': 0.9981468, 'index': 73, 'word': 'Lakes', 'start': 366, 'end': 371}, {'entity': 'B-LOC', 'score': 0.9952852, 'index': 87, 'word': 'Cape', 'start': 428, 'end': 432}, {'entity': 'I-LOC', 'score': 0.99629253, 'index': 88, 'word': 'G', 'start': 433, 'end': 434}, {'entity': 'I-LOC', 'score': 0.9961098, 'index': 89, 'word': '##ira', 'start': 434, 'end': 437}, {'entity': 'I-LOC', 'score': 0.9663395, 'index': 90, 'word': '##rde', 'start': 437, 'end': 440}, {'entity': 'I-LOC', 'score': 0.9272877, 'index': 91, 'word': '##au', 'start': 440, 'end': 442}, {'entity': 'B-LOC', 'score': 0.9960742, 'index': 98, 'word': 'Perry', 'start': 466, 'end': 471}, {'entity': 'I-LOC', 'score': 0.9977574, 'index': 99, 'word': '##ville', 'start': 471, 'end': 476}, {'entity': 'B-LOC', 'score': 0.

In [19]:
def NLP_SUMMAARIZATION_OUTPUT(event):
    url = 'https://yto0kae7xb.execute-api.us-east-1.amazonaws.com/dev/qa'
    response = requests.post(url,json=event1)
    output = response.content.decode()
    an = json.loads(output)
    a = an['answer']
    return a

output_dict = NLP_SUMMAARIZATION_OUTPUT(event)

In [52]:
output_dict

({'entity': 'B-LOC',
  'score': 0.9885201,
  'index': 69,
  'word': 'Plains',
  'start': 346,
  'end': 352},
 {'entity': 'B-LOC',
  'score': 0.9972579,
  'index': 72,
  'word': 'Great',
  'start': 360,
  'end': 365},
 {'entity': 'I-LOC',
  'score': 0.9981468,
  'index': 73,
  'word': 'Lakes',
  'start': 366,
  'end': 371},
 {'entity': 'B-LOC',
  'score': 0.9952852,
  'index': 87,
  'word': 'Cape',
  'start': 428,
  'end': 432},
 {'entity': 'I-LOC',
  'score': 0.99629253,
  'index': 88,
  'word': 'G',
  'start': 433,
  'end': 434},
 {'entity': 'I-LOC',
  'score': 0.9961098,
  'index': 89,
  'word': '##ira',
  'start': 434,
  'end': 437},
 {'entity': 'I-LOC',
  'score': 0.9663395,
  'index': 90,
  'word': '##rde',
  'start': 437,
  'end': 440},
 {'entity': 'I-LOC',
  'score': 0.9272877,
  'index': 91,
  'word': '##au',
  'start': 440,
  'end': 442},
 {'entity': 'B-LOC',
  'score': 0.9960742,
  'index': 98,
  'word': 'Perry',
  'start': 466,
  'end': 471},
 {'entity': 'I-LOC',
  'score': 

In [30]:
for i in range(len(output_dict)):
    if output_dict[i]['score'] > 0.90:
        print("\033[1;31;40m"+output_dict[i]['entity']+"\033[0;31;40m","\033[1;37;40m"+output_dict[i]['word']+"\033[0;37;40m \n")

[1;31;40mB-LOC[0;31;40m [1;37;40mBoston[0;37;40m 

[1;31;40mI-LOC[0;31;40m [1;37;40mAmerica[0;37;40m 



In [6]:
event = {"context":"Hi My name is Petet"}

In [5]:
an['answer']

"[{'entity': 'B-LOC', 'score': 0.9960104, 'index': 9, 'word': 'Geneva', 'start': 51, 'end': 57}, {'entity': 'B-LOC', 'score': 0.99175394, 'index': 11, 'word': 'Coffee', 'start': 62, 'end': 68}]"

In [89]:
output = response.content.decode()
output

'{"answer": "[{\'entity\': \'B-LOC\', \'score\': 0.9960104, \'index\': 9, \'word\': \'Geneva\', \'start\': 51, \'end\': 57}, {\'entity\': \'B-LOC\', \'score\': 0.99175394, \'index\': 11, \'word\': \'Coffee\', \'start\': 62, \'end\': 68}]"}'

In [90]:
an = json.loads(output)

In [91]:
an

{'answer': "[{'entity': 'B-LOC', 'score': 0.9960104, 'index': 9, 'word': 'Geneva', 'start': 51, 'end': 57}, {'entity': 'B-LOC', 'score': 0.99175394, 'index': 11, 'word': 'Coffee', 'start': 62, 'end': 68}]"}

In [15]:
an['answer'][1]

'{'

In [18]:
an['answer']

"[{'entity': 'B-PER', 'score': 0.95498997, 'index': 5, 'word': 'R', 'start': 14, 'end': 15}, {'entity': 'B-PER', 'score': 0.40459135, 'index': 6, 'word': '##iya', 'start': 15, 'end': 18}, {'entity': 'B-LOC', 'score': 0.73876435, 'index': 11, 'word': 'New', 'start': 33, 'end': 36}, {'entity': 'I-LOC', 'score': 0.7159286, 'index': 12, 'word': 'yo', 'start': 37, 'end': 39}]"

In [95]:
a = an['answer']

In [96]:
a

"[{'entity': 'B-LOC', 'score': 0.9960104, 'index': 9, 'word': 'Geneva', 'start': 51, 'end': 57}, {'entity': 'B-LOC', 'score': 0.99175394, 'index': 11, 'word': 'Coffee', 'start': 62, 'end': 68}]"

In [97]:
print("The original list : " + str(a))

The original list : [{'entity': 'B-LOC', 'score': 0.9960104, 'index': 9, 'word': 'Geneva', 'start': 51, 'end': 57}, {'entity': 'B-LOC', 'score': 0.99175394, 'index': 11, 'word': 'Coffee', 'start': 62, 'end': 68}]


In [98]:
b = str(a)

In [99]:
c = ''
list_b = list(b)
for i in range(1,len(list_b)-1):
    c = c+list_b[i]

In [107]:
c


'{\'entity\': \'B-LOC\', \'score\': 0.9960104, \'index\': 9, \'word\': \'Geneva\', \'start\': 51, \'end\': 57}, {\'entity\': \'B-LOC\', \'score\': 0.99175394, \'index\': 11, \'word\': \'Coffee\', \'start\': 62, \'end\': 68}\'answer\': "[{\'entity\': \'B-LOC\', \'score\': 0.9960104, \'index\': 9, \'word\': \'Geneva\', \'start\': 51, \'end\': 57}, {\'entity\': \'B-LOC\', \'score\': 0.99175394, \'index\': 11, \'word\': \'Coffee\', \'start\': 62, \'end\': 68}]"'

In [87]:
es[3]['word']

'yo'

In [75]:
K = 'word'
res = dict()
for sub in b:
    res.update(sub)
print("The extracted value : " + str(res[K]))

ValueError: dictionary update sequence element #0 has length 1; 2 is required

In [28]:
def Convert(string):
    li = list(string.split(" "))
    return li
  
# Driver code    
str1 = "Geeks for Geeks"
print(Convert(a))

["[{'entity':", "'B-PER',", "'score':", '0.95498997,', "'index':", '5,', "'word':", "'R',", "'start':", '14,', "'end':", '15},', "{'entity':", "'B-PER',", "'score':", '0.40459135,', "'index':", '6,', "'word':", "'##iya',", "'start':", '15,', "'end':", '18},', "{'entity':", "'B-LOC',", "'score':", '0.73876435,', "'index':", '11,', "'word':", "'New',", "'start':", '33,', "'end':", '36},', "{'entity':", "'I-LOC',", "'score':", '0.7159286,', "'index':", '12,', "'word':", "'yo',", "'start':", '37,', "'end':", '39}]']


In [43]:
import json
import sys

#load the data into an element
data = a

#dumps the json object into an element
json_str = json.dumps(data)

#load the json to a string
resp = json.loads(json_str)

#print the resp
#print (resp)

#extract an element in the response
print (a['word'])

TypeError: string indices must be integers

In [57]:
from re import sub

def camel_case(s):
  s = sub(r"(_|-)+", " ", s).title()
  return s

In [58]:
camel_case("Scattered thunderstorms developed ahead of a cold front on the morning of the 14th, producing isolated wind damage. Throughout the afternoon and evening of the 13th and all day on the 14th, strong south gradient winds associated with a strong low pressure system gusted up to 50 mph at times. The low pressure center tracked northeast across the Plains to the Great Lakes region. Peak gradient wind gusts included 56 mph at the Cape Girardeau airport, 53 mph at the Perryville airport, 48 mph at the Poplar Bluff airport, and 47 mph at the Sikeston airport.")

'Scattered Thunderstorms Developed Ahead Of A Cold Front On The Morning Of The 14Th, Producing Isolated Wind Damage. Throughout The Afternoon And Evening Of The 13Th And All Day On The 14Th, Strong South Gradient Winds Associated With A Strong Low Pressure System Gusted Up To 50 Mph At Times. The Low Pressure Center Tracked Northeast Across The Plains To The Great Lakes Region. Peak Gradient Wind Gusts Included 56 Mph At The Cape Girardeau Airport, 53 Mph At The Perryville Airport, 48 Mph At The Poplar Bluff Airport, And 47 Mph At The Sikeston Airport.'