In [1]:
import nltk
import string
import pandas as pd
import pickle
from random import shuffle

from nltk import FreqDist, NaiveBayesClassifier
from nltk.corpus import wordnet, stopwords
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.classify import accuracy


In [2]:
nltk.download("all")

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_eng is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\norbe\AppData\Roaming\nltk_data...
[

True

In [3]:
stemmer = PorterStemmer()
wnl = WordNetLemmatizer()
engStopWords = stopwords.words("english")

In [12]:
def preprocess(doc):
    words = word_tokenize(doc.lower())

    words = [wnl.lemmatize(word) for word in words]
    words = [stemmer.stem(word) for word in words]

    return {word: True for word in words if word not in engStopWords and word.isalpha() and word not in string.punctuation}

def trainModel():
    dataset = pd.read_csv("dataset.csv")

    feature_sets = [(preprocess(text),label) for text,label in zip(dataset["text"], dataset["label"])]

    shuffle(feature_sets)

    split_idx = int(len(feature_sets) * 0.8)
    train_set, test_set = feature_sets[:split_idx], feature_sets[split_idx:]

    classifier = NaiveBayesClassifier.train(train_set)
    acc = accuracy(classifier, test_set)
    print("Accuracy: ", acc)
    
    classifier.show_most_informative_features(5)

    file = open("model.pickle", "wb")
    pickle.dump(classifier,file)
    file.close()

    return classifier

def readModel():
    try:
        file = open("model.pickle", "rb")
        print("Model is Available!")
        print("Loading the model...")
        classifier = pickle.load(file)
        file.close()

        print("Model Loaded Successfully!")
        classifier.show_most_informative_features(5)

    except:
        print("Model Not Available!")
        print("Preparing for model training!")
        classifier = trainModel()

    return classifier

def writeReview():
    while True:
        review = input("Input your review [>= 2 words]: ")
        words = review.split()

        if len(words) >= 2:
            print("Review Added!")
            return review
        else:
            print("Review must consist of atleast 2 words!")

def analyzeReview(review, classifier):
    if len(review) == 0:
        print("Review is Empty!")
        return
    
    words = word_tokenize(review.lower())

    words = FreqDist([word for word in words if word.isalpha() and word not in string.punctuation])

    tagged = pos_tag(words)
    print("Review Part of Speech Tag:")

    for i, word in enumerate(tagged):
        print(f"{i+1}. {word[0]}, {word[1]}")

    for word in words:
        print("==================")
        print(f"{word}")
        print("==================")

        synsets = wordnet.synsets(word)
        synonyms = []
        antonyms = []

        for synset in synsets:
            for lemma in synset.lemmas():
                synonyms.append(lemma.name())
                for antonym in lemma.antonyms():
                    antonyms.append(antonym.name())

        print("Synonyms:")
        if len(synonyms) == 0:
            print("No Synonyms")
        else:
            for syn in synonyms[:5]:
                print(f"(+){syn}")
        
        print("Antonyms:")
        if len(antonyms) == 0:
            print("No Antonyms!")
        else:
            for ant in antonyms[:5]:
                print(f"(-){ant}")

    clean_review = [word for word in word_tokenize(review) if word not in string.punctuation and word not in engStopWords]

    clean_review = [stemmer.stem(wnl.lemmatize(word)) for word in clean_review]

    result = classifier.classify(FreqDist(clean_review))

    print(f"Your Review: {review}")
    print(f"Review Category: {result}")






In [13]:
if __name__ == "__main__":
    classifier = readModel()

    review = ""

    while True:
        print("Food Review Sentiment Analysis")
        print("Your Review:", "Review" if len(review) == 0 else review)
        print("1. Write review")
        print("2. Analyze review")
        print("3. Exit")
        print(">>")
        choice = int(input(">> "))
        if choice == 1:
            review = writeReview()
        elif choice == 2:
            analyzeReview(review,classifier)
        elif choice == 3:
            print("Thanks for using!")
            break
        else:
            print("dont be dumb ty")

Model Not Available!
Preparing for model training!
Accuracy:  0.6363636363636364
Most Informative Features
                 perfect = True           positi : negati =     14.4 : 1.0
                    lack = True           negati : positi =      9.8 : 1.0
                    sign = True           negati : positi =      9.2 : 1.0
                 fantast = True           positi : negati =      8.8 : 1.0
                    rude = True           negati : positi =      8.5 : 1.0
Food Review Sentiment Analysis
Your Review: Review
1. Write review
2. Analyze review
3. Exit
>>
Review Added!
Food Review Sentiment Analysis
Your Review: i dont know, this is kind of sus
1. Write review
2. Analyze review
3. Exit
>>
Review Part of Speech Tag:
1. i, JJ
2. dont, NN
3. know, VBP
4. this, DT
5. is, VBZ
6. kind, NN
7. of, IN
8. sus, NN
i
Synonyms:
(+)iodine
(+)iodin
(+)I
(+)atomic_number_53
(+)one
Antonyms:
No Antonyms!
dont
Synonyms:
No Synonyms
Antonyms:
No Antonyms!
know
Synonyms:
(+)know
(+)know
(+

ValueError: invalid literal for int() with base 10: ''