In [22]:
import pandas as pd
import argparse
from tokenization import BertTokenizer
from modeling import BertForSequenceClassification
import torch
import torch.nn.functional as F
import numpy as np
import sys
from nltk.corpus import wordnet
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)

import pandas as pd
import numpy as np
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize

import json

In [2]:
def construct_context_gloss_pairs_through_nltk(input, target_start_id, target_end_id, gloss_dict):
    """
    construct context gloss pairs like sent_cls_ws
    :param input: str, a sentence
    :param target_start_id: int
    :param target_end_id: int
    :param lemma: lemma of the target word
    :return: candidate lists
    """
    sent = input.split(" ")
    assert 0 <= target_start_id and target_start_id < target_end_id  and target_end_id <= len(sent)
    target = " ".join(sent[target_start_id:target_end_id])
    if len(sent) > target_end_id:
        sent = sent[:target_start_id] + ['"'] + sent[target_start_id:target_end_id] + ['"'] + sent[target_end_id:]
    else:
        sent = sent[:target_start_id] + ['"'] + sent[target_start_id:target_end_id] + ['"']

    sent = " ".join(sent)

    candidate = []
    for gloss_def in gloss_dict['synsets']:
        candidate.append((sent, f"{target} : {gloss_def}", target, gloss_def))
    #syns = wordnet.synsets(target)
    #for syn in syns:
    #    gloss = syn.definition()
    #    candidate.append((sent, f"{target} : {gloss}", target, gloss))

    assert len(candidate) != 0, f'there is no candidate sense of "{target}" in WordNet, please check'
    #print(f'there are {len(candidate)} candidate senses of "{target}"')


    return candidate


In [3]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, input_mask, segment_ids):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids

In [4]:
def convert_to_features(candidate, tokenizer, max_seq_length=512):

    candidate_results = []
    features = []
    for item in candidate:
        text_a = item[0] # sentence
        text_b = item[1] # gloss
        candidate_results.append((item[-2], item[-1])) # (target, gloss)


        tokens_a = tokenizer.tokenize(text_a)
        tokens_b = tokenizer.tokenize(text_b)
        _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
        tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
        segment_ids = [0] * len(tokens)
        tokens += tokens_b + ["[SEP]"]
        segment_ids += [1] * (len(tokens_b) + 1)

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real tokens are attended to.
        input_mask = [1] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding = [0] * (max_seq_length - len(input_ids))
        input_ids += padding
        input_mask += padding
        segment_ids += padding

        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length

        features.append(
            InputFeatures(input_ids=input_ids,
                          input_mask=input_mask,
                          segment_ids=segment_ids))


    return features, candidate_results


In [5]:
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place to the maximum length."""

    # This is a simple heuristic which will always truncate the longer sequence
    # one token at a time. This makes more sense than truncating an equal percent
    # of tokens from each, since if one sequence is very short then each token
    # that's truncated likely contains more information than a longer sequence.
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()

In [45]:
def infer(input, target_start_id, target_end_id, gloss_dict, args):

    sent = input.split(" ")
    assert 0 <= target_start_id and target_start_id < target_end_id  and target_end_id <= len(sent)
    target = " ".join(sent[target_start_id:target_end_id])


    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")


    label_list = ["0", "1"]
    num_labels = len(label_list)
    tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=True)
    model = BertForSequenceClassification.from_pretrained(args.bert_model,
                                                          num_labels=num_labels)
    model.to(device)

    print(f"input: {input}\ntarget: {target}")

    examples = construct_context_gloss_pairs_through_nltk(input, target_start_id, target_end_id, gloss_dict)
    eval_features, candidate_results = convert_to_features(examples, tokenizer)
    input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)


    model.eval()
    input_ids = input_ids.to(device)
    input_mask = input_mask.to(device)
    segment_ids = segment_ids.to(device)
    with torch.no_grad():
        logits = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=None)
    logits_ = F.softmax(logits, dim=-1)
    logits_ = logits_.detach().cpu().numpy()
    output = np.argmax(logits_, axis=0)[1]
    print(f"results:\ngloss: {candidate_results[output][1]}")
    return candidate_results[output][1]


In [7]:
def remove_punctuation(input_string, punc1, punc2):
    """
    remove/ replace unnecessary punctuations from ontext sentence
    
    :param input_string: strs, input string/ token
    :param punc1: str, punctuations to replace
    :param punc2: str, punctuations to remove
    :return input_string:str
    """
    
    for ele in input_string:  
        if ele in punc1:  
            input_string = input_string.replace(ele, " ")
        if ele in punc2:  
            input_string = input_string.replace(ele, "")
    return input_string

In [42]:
### to get context sentense from paragraph
def get_context_sentence(word, para):
    """
    Get context sentense for the given word and paragraph
    
    :param input: str, a sentence
    :param target_start_id: int
    :param target_end_id: int
    :param lemma: lemma of the target word
    :return: candidate lists 
        word (str) target word
        sent (str) context sentence
        target_start_id (int), starting index of target word
        target_end_id (int), end index of target word
    """
    sentences = sent_tokenize(para)
    sentence_lst=[sent.lower() for sent in sentences if word.lower() in sent.lower()]
    
    sent = sentence_lst[0]
    
    word = remove_punctuation(word, punc1, punc2)
    sent = remove_punctuation(sent, punc1, punc2)
    print('sentence: ', sent)
    print('word:', word)

    target_tokens = word_tokenize(word)   # create word tokens for input target string
    sent_tokens = word_tokenize(sent)     # create word tokens for context sentence

    target_start_id = sent_tokens.index(target_tokens[0])
    target_end_id = sent_tokens.index(target_tokens[-1]) + 1     # +1 to get correct ending index
    
    # get POS tag for target word
    tagged_sent = nltk.pos_tag(sent_tokens)           # get POS tags of tokens in the sentence
    target_penn_pos = tagged_sent[target_start_id][1] # get POS tag in Penntree notation for target word
    target_mb_pos = get_pos_MB(target_penn_pos)       # get POS tag in meriam webster notation for target word
    
    print('target_start_id:',target_start_id)
    print('target_end_id:', target_end_id)
    print('target_penn_pos', target_penn_pos)
    print('target_mb_pos:', target_mb_pos)
    
    return word, sent, target_start_id, target_end_id, target_mb_pos

In [43]:
def get_pos_MB(pos_penn_tree):
    """
    Description: Function takes part-of-speech (POS) for target word using Penn Treebank notation 
                and returns part of speech in the form of Merriam webster notation (i.e. 'fl' part in json input)
    
    Input: 
        pos_penn_tree: (str), part of speech (POS) for target word in Penn Treebank notation form
    
    Output: 
        pos_MB: (str), part of speech to match with 'fl' in Merriam Webster dictionary
    """
    pos_MB = [pos for pos, item_list in pos_tag.items() if pos_penn_tree in item_list]
    return pos_MB[0]

In [47]:
if  __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-f')
    parser.add_argument("--bert_model", default="./Sent_CLS_WS", type=str)
    parser.add_argument("--no_cuda", default=False, action='store_true', help="Whether not to use CUDA when available")

    args = parser.parse_args()

    request_file = open('request1.json', 'r')
    request_data = request_file.read()
    jsonobj = json.loads(request_data)
    
    #jsonobj_test = jsonobj[:2]
    
    #* Define global variables *#
    # Create a dictionary to as a cross reference for part-of-speech between penn tree and Merriam Webster dictionary
    pos_tag = {'verb':['VB','VBD','VBG','VBN','VBP','VBZ'], 
           'noun': ['NN','NNS','NNP', 'NNPS'], 
           'adjective': ['JJ','JJR','JJS'],
           'adverb':['RB', 'RBR', 'RBS', 'RP'],
           'None':['None',''] }

    punc1 = '-—'
    #punc2 =':.,;“”’()?'
    punc2 = '''!()[]{};:'"’“”\,<>./?@#$%^&*_~'''
    word_id_lst=[]
    teacher_vote_lst=[]
    ai_vote_lst=[]
    word_lst=[]

    for item in jsonobj:
        word = item['word'].lower()                                          # get target word 
 
        print('target word:', word)
        # get context sentence for target word and index details of target word in context sentence
        word, sent, target_start_id, target_end_id, target_pos = get_context_sentence(word, item['paragraph']) 
        
        #get_pos_MB()

        gloss_dict={}
        gloss_data =[]
        for gloss in item['dictionaryData']:
            
            if gloss['fl'].lower() == target_pos:
                gloss_data.append(gloss['meaning'])
            
            if gloss['isTeacher'] == 1:
                word_id = gloss['word_id']
                teacher_vote = gloss['meaning']
   
        gloss_dict['synsets']=gloss_data
        ai_vote = infer(sent, target_start_id, target_end_id, gloss_dict, args)

        word_id_lst.append(word_id)
        word_lst.append(word)
        teacher_vote_lst.append(teacher_vote)
        ai_vote_lst.append(ai_vote)
    

12/06/2020 12:01:39 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:01:39 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:01:39 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



target word: manslaughter
sentence:  the shooter was charged with manslaughter
word: manslaughter
target_start_id: 5
target_end_id: 6
target_penn_pos NN
target_mb_pos: noun
input: the shooter was charged with manslaughter
target: manslaughter


12/06/2020 12:01:44 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt


results:
gloss: the crime of killing a person without intending to do so
target word: bludgeoned
sentence:  there was vincent chin in detroit who in 1982 was bludgeoned to death with a baseball bat a week before his wedding
word: bludgeoned
target_start_id: 10
target_end_id: 11
target_penn_pos VBN
target_mb_pos: verb


12/06/2020 12:01:44 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:01:44 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



input: there was vincent chin in detroit who in 1982 was bludgeoned to death with a baseball bat a week before his wedding
target: bludgeoned


12/06/2020 12:01:50 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:01:50 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:01:50 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: to hit with heavy impact
target word: envisioned
sentence:  i envisioned my students galvanized as i had been in eighth grade by martin luther king jrs letter from birmingham jail and mesmerized as i had been in high school by malcolm xs autobiography
word: envisioned
target_start_id: 1
target_end_id: 2
target_penn_pos VBD
target_mb_pos: verb
input: i envisioned my students galvanized as i had been in eighth grade by martin luther king jrs letter from birmingham jail and mesmerized as i had been in high school by malcolm xs autobiography
target: envisioned


12/06/2020 12:01:56 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:01:56 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:01:56 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: to picture to oneself
target word: bucket
sentence:  they stacked foot high collections of michael jackson and joan baez records in the living room voted dutifully never missing an election and brought home for dinner the occasional bucket of fried chicken
word: bucket
target_start_id: 29
target_end_id: 30
target_penn_pos NN
target_mb_pos: noun
input: they stacked foot high collections of michael jackson and joan baez records in the living room voted dutifully never missing an election and brought home for dinner the occasional bucket of fried chicken
target: bucket


12/06/2020 12:02:12 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt


results:
gloss: a typically cylindrical vessel for catching, holding, or carrying liquids or solids
target word: acquitted
sentence:  he claimed at court that the kid moved in a strange way his lawyer told the jury that the killer was protecting his property and that he was an average joe one of your neighbors someone who liked sugar in his grits he was acquitted
word: acquitted
target_start_id: 44
target_end_id: 45
target_penn_pos VBN
target_mb_pos: verb


12/06/2020 12:02:12 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:12 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



input: he claimed at court that the kid moved in a strange way his lawyer told the jury that the killer was protecting his property and that he was an average joe one of your neighbors someone who liked sugar in his grits he was acquitted
target: acquitted


12/06/2020 12:02:17 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:17 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:17 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: to decide that someone is not guilty of a crime
target word: allegation
sentence:  i took this allegation literally
word: allegation
target_start_id: 3
target_end_id: 4
target_penn_pos NN
target_mb_pos: noun
input: i took this allegation literally
target: allegation


12/06/2020 12:02:22 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:22 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:22 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: the act of alleging something
target word: primed
sentence:  they effected a clandestine evangelization of a kid primed to be a good disciple
word: primed
target_start_id: 8
target_end_id: 9
target_penn_pos VBN
target_mb_pos: verb
input: they effected a clandestine evangelization of a kid primed to be a good disciple
target: primed


12/06/2020 12:02:28 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:28 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:28 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: to make (someone) ready to do something : prepare
target word: laden
sentence:  education for me became laden with a meaning at once specific and spiritual
word: laden
target_start_id: 4
target_end_id: 5
target_penn_pos JJ
target_mb_pos: adjective
input: education for me became laden with a meaning at once specific and spiritual
target: laden


12/06/2020 12:02:33 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:33 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:33 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: loaded heavily with something : having or carrying a large amount of something —usually + with —often used figuratively
target word: rhetoric
sentence:  in suburban michigan in the quiet of my bedroom these iconic readings of antiracist rhetoric cast a spell over me
word: rhetoric
target_start_id: 14
target_end_id: 15
target_penn_pos NN
target_mb_pos: noun
input: in suburban michigan in the quiet of my bedroom these iconic readings of antiracist rhetoric cast a spell over me
target: rhetoric


12/06/2020 12:02:37 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:37 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:37 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: the art of speaking or writing effectively: such as
target word: essential
sentence:  du bois ralph ellison richard wright alice walker maya angelou each of these people seemed as fearless to me as asian americans seemed afraid as essential to american history as we were irrelevant
word: essential
target_start_id: 25
target_end_id: 26
target_penn_pos JJ
target_mb_pos: adjective
input: du bois ralph ellison richard wright alice walker maya angelou each of these people seemed as fearless to me as asian americans seemed afraid as essential to american history as we were irrelevant
target: essential


12/06/2020 12:02:41 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:41 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:41 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: of, relating to, or constituting essence : inherent
target word: steeped
sentence:  i imagined households steeped in conversation
word: steeped
target_start_id: 3
target_end_id: 4
target_penn_pos VBD
target_mb_pos: verb
input: i imagined households steeped in conversation
target: steeped


12/06/2020 12:02:46 - INFO - tokenization -   loading vocabulary file ./Sent_CLS_WS/vocab.txt
12/06/2020 12:02:46 - INFO - modeling -   loading archive file ./Sent_CLS_WS
12/06/2020 12:02:46 - INFO - modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



results:
gloss: to put (something) in a liquid for a period of time
target word: embolden
sentence:  did it embolden you
word: embolden
target_start_id: 2
target_end_id: 3
target_penn_pos VB
target_mb_pos: verb
input: did it embolden you
target: embolden
results:
gloss: to impart boldness or courage to : to instill with boldness, courage, or resolution enough to overcome timidity or misgiving
target word: from scratch
sentence:  i would start from scratch
word: from scratch


IndexError: list index out of range

In [18]:
results_df = pd.DataFrame(list(zip(word_id_lst, word_lst,teacher_vote_lst, ai_vote_lst)),columns=['word_id', 'word','teachers_vote', 'AI_vote'])

In [20]:
np.mean(results_df['teachers_vote']==results_df['AI_vote'])

0.7022058823529411

In [23]:
import nltk

In [28]:
sente = 'patricks warrant stated capital murder meaning lethal injection in arkansas and had since been reduced to first degree murder still an overcharge'

tokens = word_tokenize(sente)
tagged_sent = nltk.pos_tag(tokens)

target_start_id = 6

In [39]:
pos_tag = {'verb':['VB','VBD','VBG','VBN','VBP','VBZ'], 
               'noun': ['NN','NNS','NNP', 'NNPS'], 
               'adjective': ['JJ','JJR','JJS'],
               'adverb':['RB', 'RBR', 'RBS', 'RP'],
               'None':['None',''] }

def add_pos(pos_value):
    # make a dictionary of possible tags (in progress)
    pos_val = [pos for pos, item_list in pos_tag.items() if pos_value in item_list]
    return pos_val

In [40]:
tagged_sent[target_start_id][1]

'JJ'

In [41]:
add_pos(tagged_sent[target_start_id][1])

['adjective']

In [51]:
gfg = ['from scratch']
nltk.pos_tag(gfg)

[('from scratch', 'NN')]

In [30]:
tagged_sent

[('patricks', 'NNS'),
 ('warrant', 'VBP'),
 ('stated', 'VBN'),
 ('capital', 'NN'),
 ('murder', 'NN'),
 ('meaning', 'NN'),
 ('lethal', 'JJ'),
 ('injection', 'NN'),
 ('in', 'IN'),
 ('arkansas', 'NN'),
 ('and', 'CC'),
 ('had', 'VBD'),
 ('since', 'IN'),
 ('been', 'VBN'),
 ('reduced', 'VBN'),
 ('to', 'TO'),
 ('first', 'JJ'),
 ('degree', 'NN'),
 ('murder', 'NN'),
 ('still', 'RB'),
 ('an', 'DT'),
 ('overcharge', 'NN')]

In [27]:
tagged_sent['lethal']

TypeError: list indices must be integers or slices, not str