In [25]:
import numpy as np
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [26]:
from sklearn.metrics.pairwise import cosine_similarity as cs

In [55]:
class SimplifiedLesk: 

    def __init__(self):
        self.stopwords = set(stopwords.words('english'))

    def disambiguate(self, word, sentence):       
        word_senses = wordnet.synsets(word)
        best_sense = word_senses[0]  # Assume that first sense is most freq.
        max_overlap = 0
        context = set(word_tokenize(sentence))
        for sense in word_senses:
            signature = self.tokenized_gloss(sense)
            overlap = self.compute_overlap(signature, context)            
            if overlap > max_overlap:
                max_overlap = overlap
                best_sense = sense
        return best_sense 
    
    def find(self, sentence):
        context = set(word_tokenize(sentence))
        context = context.difference(self.stopwords)
        z=[]
        for i in context:            
            if len(wordnet.synsets(i))>2:
                z.append(i)
        return z
    
    def tokenized_gloss(self, sense):        
        tokens = set(word_tokenize(sense.definition()))
        for example in sense.examples():
            tokens.union(set(word_tokenize(example)))
        return tokens

    def compute_overlap(self, signature, context):       
        sig = signature.difference(self.stopwords)
        return len(sig.intersection(context))




In [56]:
s1=("Crickets, are insects somewhat related to grasshoppers and more closely related to katydids or bush crickets.")

In [57]:
model=SimplifiedLesk()
z=model.find(s1)
z

['related', 'Crickets', 'bush', 'crickets', 'closely']

In [58]:
import tkinter as tk

def get_Words():
        sentence = utext.get('1.0', "end").strip()
        
        model=SimplifiedLesk()
        z=model.find(sentence)       
        
        AW.config(state='normal') 
        AW.delete('1.0','end')
        AW.insert('1.0', z)
        AW.config(state='disabled')

def get_sense():
        sentence = utext.get('1.0', "end").strip()       
        word = Word.get('1.0', "end").strip()  
        
        model=SimplifiedLesk()
        sense=model.disambiguate(word,sentence)        
        
        summary.config(state='normal') 
        summary.delete('1.0','end')
        summary.insert('1.0', sense.definition())        

        summary.config(state='disabled')
        

root = tk.Tk()
root.title('Word Sense Predictor')
root.geometry('1200x600')



slabel = tk.Label(root, text='Sense')
slabel.pack()
summary = tk.Text(root, height=18, width=130)
summary.config(state='disabled', bg='#dddddd')
summary.pack()

ulabel = tk.Label(root, text='sentence')
ulabel.pack()
utext = tk.Text(root, height=1, width=130)
utext.pack()


btn1 = tk.Button(root, text='Get_Words', command=get_Words)
btn1.pack()

wlabel = tk.Label(root, text='Ambiguous words')
wlabel.pack()
AW= tk.Text(root, height=5, width=65)
AW.config(state='disabled', bg='#dddddd')
AW.pack()



alabel = tk.Label(root, text='Word To be disambiguated')
alabel.pack()
Word = tk.Text(root, height=1.2, width=20)
Word.pack()


btn2 = tk.Button(root, text='Get_Sense', command=get_sense)
btn2.pack()

root.mainloop()