In [2]:
#let's import all the libraries i.e nltk specially which is used for NLP
import nltk
import codecs
from googletrans import Translator
from nltk.tokenize import PunktSentenceTokenizer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.corpus import wordnet

In [3]:
#in this function of simple filter we'll pass the sentences to filter them by removing the unesscary words i.e 
#'the' 'is' etc.
def simpleFilter(sentence):
    #this list will store only useful words.
    filtered_sent = []   
    #lemmatizer outputs the correct word according to dictionary for eg: cacti would be cactus, rocks would be rock
    lemmatizer = WordNetLemmatizer()
    #the stop words contains the list of all the stop words present in "english" or any mentioned language.
    #stopwords like i,me,the,they,would,wouln't etc
    #stop words also has additional keywords pos for adjectives,verbs but noun is defualt.
    stop_words = set(stopwords.words("english"))
    #word tokenizer tokenizes(breaks downs) the sentence in individual words.
    words = word_tokenize(sentence)
    for w in words:
        if w not in stop_words:
            filtered_sent.append(lemmatizer.lemmatize(w))

    return filtered_sent

In [4]:
#similaritycheck function checks the similarity between the tokens(useful words) returned from the first function
#and the training dataset i.e both .txt files 
#for this synonyms is checked for each word and the word in the .txt file and output is returned on a scale of 0-1
#this is main function for ambiguity check and then normalized similarity between sentences is stored.
def simlilarityCheck(word1, word2):

    word1 = word1 + ".n.01"
    word2 = word2 + ".n.01"
    try:
        w1 = wordnet.synset(word1)
        w2 = wordnet.synset(word2)

        return w1.wup_similarity(w2)

    except:
        return 0

In [5]:
#this function is used to create synonyms from the word passed as a argument.
def synonymsCreator(word):
    synonyms=[]
    #wordnet.synsets creates all possible versions of the word passed in order to create synonyms.
    for syn in wordnet.synsets(word):
        for i in syn.lemmas():
            #syn.lemmas() creates the object inorder to create the relationship between words created by 
            #wordnet.synsets 
            synonyms.append(i.name())

    return synonyms

In [6]:
#we apply the next level filter, function: filteredSentence , to apply lemmatization over stemmed tokens and again removing stop words. 
#In the filtered sentence list, we now store the token word along with its synonyms for more precised matching / similarity check. 
def filteredSentence(sentence):

    filtered_sent = []
    lemmatizer = WordNetLemmatizer()  # lemmatizes the words
    ps = PorterStemmer()  # stemmer stems the root of the word.

    stop_words = set(stopwords.words("english"))
    words = word_tokenize(sentence)

    for w in words:
        if w not in stop_words:
            filtered_sent.append(lemmatizer.lemmatize(ps.stem(w)))
            for i in synonymsCreator(w):
                filtered_sent.append(i)
    return filtered_sent


In [7]:
if __name__ == '__main__':
     
    #this is the first file to which contains info about negative sentiments
    negative_sentiments = codecs.open("/home/dharmendra/Desktop/desk/captsone/senti_outputs/neg_senti.txt", 'r', "utf-8") 
    sent2 = negative_sentiments.read().lower()   
    #this is the second file which contains info about positive sentiments
    positive_sentiments = codecs.open("/home/dharmendra/Desktop/desk/captsone/senti_outputs/pos_senti.txt", 'r', 'utf-8')
    sent1 = positive_sentiments.read().lower()
    sent3 = "start"
    
    while(sent3 != "end"):
        #let's take input of the new sentence over here and it'll be automatically converted into lowercase.
        sent3 = input("Enter Query: ").lower()
        #here the object from google cloud api which transaltes the any language into english
        translator = Translator()
        sent3=translator.translate(sent3).text
        
        filtered_sent1 = []
        filtered_sent2 = []
        filtered_sent3 = []

        counter1 = 0
        counter2 = 0
        sent31_similarity = 0
        sent32_similarity = 0

        #these all three functions will store the filtered sentence from function above.
        #sentence 1 contains txt about cricket bat
        #sentence 2 contains txt about vampire bat
        #sentence 3 is our input query.
        filtered_sent1 = simpleFilter(sent1)
        filtered_sent2 = simpleFilter(sent2)
        filtered_sent3 = simpleFilter(sent3)

        #in the following function we'll pass each and every word of the both the sentences with the
        #each and every word of the input query.
        for i in filtered_sent3:

            for j in filtered_sent1:
                counter1 = counter1 + 1
                sent31_similarity = sent31_similarity + simlilarityCheck(i, j)

            for j in filtered_sent2:
                counter2 = counter2 + 1
                sent32_similarity = sent32_similarity + simlilarityCheck(i, j)

        filtered_sent1 = []
        filtered_sent2 = []
        filtered_sent3 = []

        filtered_sent1 = filteredSentence(sent1)
        filtered_sent2 = filteredSentence(sent2)
        filtered_sent3 = filteredSentence(sent3)

        sent1_count = 0
        sent2_count = 0

        for i in filtered_sent3:

            for j in filtered_sent1:

                if(i == j):
                    sent1_count = sent1_count + 1

            for j in filtered_sent2:
                if(i == j):
                    sent2_count = sent2_count + 1

        if((sent1_count + sent31_similarity) > (sent2_count+sent32_similarity)):
            print("Positive Sentiment")
            #now let's append it to the respective files.
            with open("/home/dharmendra/Desktop/desk/captsone/senti_outputs/pos_senti.txt", "a") as myfile:
                myfile.write(sent3)
        else:
            print("Negative Sentiment")
            with open("/home/dharmendra/Desktop/desk/captsone/senti_outputs/neg_senti.txt", "a") as myfile:
                myfile.write(sent3)

        print("\nTERMINATED")

Enter Query: this shoe is awesome
Positive Sentiment

TERMINATED
Enter Query: end
Positive Sentiment

TERMINATED
