In [18]:
import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import pandas as pd
import numpy as np
from nltk.util import ngrams
from collections import Counter

In [4]:
df = pd.read_excel('Attachment ambiguity WITH.xlsx')
df

Unnamed: 0,She walked into the room with a phone in her hand.
0,"The chef cooked dinner, chopping vegetables an..."
1,John entered the cafe with a phone tucked into...
2,"She stood in line, chatting with the person ah..."
3,The teenager sat on the bench with a phone in ...
4,"The researcher conducted experiments, carefull..."
...,...
74,They went to the sports stadium with a phone a...
75,The explorer saw the world with a sense of adv...
76,She went to the planetarium with a phone and s...
77,She saw the silver lining in every cloud with ...


In [6]:
df.columns = ['Sentence']
df.head()

Unnamed: 0,Sentence
0,"The chef cooked dinner, chopping vegetables an..."
1,John entered the cafe with a phone tucked into...
2,"She stood in line, chatting with the person ah..."
3,The teenager sat on the bench with a phone in ...
4,"The researcher conducted experiments, carefull..."


In [10]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [token for token in tokens if token.isalpha()]
    return tokens

df['tokens'] = df['Sentence'].apply(preprocess_text)
df.head()

Unnamed: 0,Sentence,tokens
0,"The chef cooked dinner, chopping vegetables an...","[the, chef, cooked, dinner, chopping, vegetabl..."
1,John entered the cafe with a phone tucked into...,"[john, entered, the, cafe, with, a, phone, tuc..."
2,"She stood in line, chatting with the person ah...","[she, stood, in, line, chatting, with, the, pe..."
3,The teenager sat on the bench with a phone in ...,"[the, teenager, sat, on, the, bench, with, a, ..."
4,"The researcher conducted experiments, carefull...","[the, researcher, conducted, experiments, care..."


In [17]:
def bi_gram(tokens):
    return list(ngrams(tokens,2))
df['bigrams'] = df['tokens'].apply(bi_gram)
df.head()

Unnamed: 0,Sentence,tokens,bigrams
0,"The chef cooked dinner, chopping vegetables an...","[the, chef, cooked, dinner, chopping, vegetabl...","[(the, chef), (chef, cooked), (cooked, dinner)..."
1,John entered the cafe with a phone tucked into...,"[john, entered, the, cafe, with, a, phone, tuc...","[(john, entered), (entered, the), (the, cafe),..."
2,"She stood in line, chatting with the person ah...","[she, stood, in, line, chatting, with, the, pe...","[(she, stood), (stood, in), (in, line), (line,..."
3,The teenager sat on the bench with a phone in ...,"[the, teenager, sat, on, the, bench, with, a, ...","[(the, teenager), (teenager, sat), (sat, on), ..."
4,"The researcher conducted experiments, carefull...","[the, researcher, conducted, experiments, care...","[(the, researcher), (researcher, conducted), (..."


In [25]:
all_tokens = []
for idx,row in df.iterrows():
    all_tokens.extend(row['tokens'])
all_tokens
token_count = Counter(all_tokens)
token_count

Counter({'the': 126,
         'chef': 2,
         'cooked': 2,
         'dinner': 2,
         'chopping': 1,
         'vegetables': 1,
         'and': 43,
         'occasionally': 11,
         'referring': 2,
         'to': 27,
         'recipes': 2,
         'on': 23,
         'a': 80,
         'phone': 59,
         'propped': 1,
         'up': 2,
         'counter': 2,
         'john': 4,
         'entered': 6,
         'cafe': 2,
         'with': 65,
         'tucked': 3,
         'into': 4,
         'his': 7,
         'pocket': 6,
         'she': 17,
         'stood': 2,
         'in': 27,
         'line': 2,
         'chatting': 1,
         'person': 3,
         'ahead': 3,
         'of': 15,
         'her': 13,
         'checking': 2,
         'time': 1,
         'hand': 5,
         'teenager': 2,
         'sat': 6,
         'bench': 2,
         'lost': 1,
         'thought': 1,
         'researcher': 2,
         'conducted': 2,
         'experiments': 2,
         'carefully': 1,

In [28]:
all_bigram = []
for idx,row in df.iterrows():
    all_bigram.extend(row['bigrams'])
bigram_count = Counter(all_bigram)
bigram_count

Counter({('the', 'chef'): 2,
         ('chef', 'cooked'): 2,
         ('cooked', 'dinner'): 2,
         ('dinner', 'chopping'): 1,
         ('chopping', 'vegetables'): 1,
         ('vegetables', 'and'): 1,
         ('and', 'occasionally'): 11,
         ('occasionally', 'referring'): 2,
         ('referring', 'to'): 2,
         ('to', 'recipes'): 1,
         ('recipes', 'on'): 1,
         ('on', 'a'): 8,
         ('a', 'phone'): 59,
         ('phone', 'propped'): 1,
         ('propped', 'up'): 1,
         ('up', 'on'): 2,
         ('on', 'the'): 11,
         ('the', 'counter'): 2,
         ('john', 'entered'): 2,
         ('entered', 'the'): 6,
         ('the', 'cafe'): 2,
         ('cafe', 'with'): 1,
         ('with', 'a'): 51,
         ('phone', 'tucked'): 3,
         ('tucked', 'into'): 3,
         ('into', 'his'): 1,
         ('his', 'pocket'): 2,
         ('she', 'stood'): 2,
         ('stood', 'in'): 2,
         ('in', 'line'): 2,
         ('line', 'chatting'): 1,
         ('chat

In [33]:
import math
prep = input("Enter the Preposition : ")
noun = input("Enter the Noun : ")
verb = input("Enter the Verb :")
c_n = token_count.get(noun,0)+1
c_v = token_count.get(verb,0)+1
c_pn = bigram_count.get((prep,noun),0)+1
c_pv = bigram_count.get((prep,noun),0)+1
p_pn = c_pn/c_n
p_pv = c_pv/c_v
p_npn = 1 - p_pn
lamda = math.log((p_pv * p_npn/p_pn),2)
if lamda > 0 :
    print("Verb")
else :
    print("Noun")

Enter the Preposition :  in
Enter the Noun :  cloud
Enter the Verb : saw


Noun
