In [1]:
# Import all the libraries needed
import nltk # Natural Language Processing Library
import pickle # For The Model
import string # For String Operations
import pandas as pd # For DataFrame

# Functions from the libraries
from nltk import FreqDist
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tag import pos_tag
from random import shuffle

In [2]:
# Download
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# Setting variables
stemmer = PorterStemmer()
wnl = WordNetLemmatizer()
eng_stopwords = stopwords.words("english")

In [4]:
def preprocessing(document):
	# Change into lowercase and tokenize
	words = word_tokenize(document.lower())
	
	# Lemmatizing and Stemming
	words = [wnl.lemmatize(word) for word in words]
	words = [stemmer.stem(word) for word in words]

	# Check if words are not in stop_words and only consists of alphabetic
	return {word: True for word in words if word not in eng_stopwords and word.isalpha()}

def trainModel():
	dataset = pd.read_csv("dataset.csv")

	features_sets = [(preprocessing(text), label) for text, label in zip(dataset["text"], dataset["label"])]

	shuffle(features_sets)

	split_index = int(len(features_sets) * .85)
	train_set, test_set = features_sets[:split_index], features_sets[split_index:]

	# Training the model (Naive Bayes)
	classifier = nltk.NaiveBayesClassifier.train(train_set)

	# Testing accuracy
	accuracy = nltk.classify.accuracy(classifier, test_set)
	print("Accuracy: ", accuracy)

	# Print 5 most informative features
	classifier.show_most_informative_features(5)

	# Save the trained model using pickle
	file = open("model.pickle", "wb")
	pickle.dump(classifier, file)
	file.close()

	return classifier

def readModel():
    # Check the model is available or not
    
    # If the model is available
	try:
		file = open("model.pickle", "rb") # Read Binary
		print("Model is available!")
		# Read Model
		print("Loading the model...")
		classifier = pickle.load(file)
		file.close()

		print("Model load succesfully!")
		classifier.show_most_informative_features(5)
  
	# Else (model unvailable)
	except:
		print("Model is not available!")
		print("Preparing for model training!")
		classifier = trainModel()
  
	return classifier

def writeReview():
    while True:
        review = input("Input your review [>= 2 words]: ")
        
        words = review.split()
        
        if len(words) > 1:
            print("Review added!")
            return review
        else:
            print("Your review must consisst of at least 2 words!")
     
def analyzeReview(review, classifier):
    if len(review) == 0:
        print("Review is empty!")
        return
    
    # Tokenizing
    words = word_tokenize(review.lower())
    
    # Frequency Distribution
    words = FreqDist([word for word in words if word.isalpha() and word not in string.punctuation])
    
    # Tagging
    tagged = pos_tag(words)
    
    print("Review Part of Speech Tag: ")
    
    for i, word in enumerate(tagged):
        print(f"{i+1}. {word[0]}, {word[1]}")
    
    # Synonym and Antonym
    for word in words:
        print("==============")
        print(f"Word: {word}")
        print("==============")
        
        # Synsets
        synsets = wordnet.synsets(word)
        synonyms = []
        antonyms = []
        
        for synset in synsets:
            for lemma in synset.lemmas():
                 synonyms.append(lemma.name())
                 for antonym in lemma.antonyms():
                     antonyms.append(antonym.name())
        
        print("Synonyms: ")
        
        if len(synonyms) == 0:
            print("No synonym detected!")
        else:
            for syn in synonyms[:5]:
                print(f"(+){syn}")
                
        print("Antonyms: ")
        
        if len(antonyms) == 0:
            print("No antonym detected!")
        else:
            for antonym in antonyms[:5]:
                print(f"(-){antonym}")
                
        print("===========================")    

	# Predict the review
    
    # Preprocessing to remove punctuation and eng_stopwords and tokenize it
    clean_review = [word for word in word_tokenize(review) if word not in string.punctuation and word not in eng_stopwords]
    
    # clean_review = [wnl.lemmatize(review) for review in clean_review]
    # clean_review = [stemmer.stem(review) for review in clean_review]
    
    # Preprocessing to lemmatize and stemming the words
    clean_review = [wnl.lemmatize(stemmer.stem(word)) for word in clean_review] 	
    
    result = classifier.classify(FreqDist(clean_review))
    
    print(f"Your Review: {review}")
    print(f"Review Category: {result}")
	
        

In [6]:
if __name__ == "__main__":
	# Read Model
	classifier = readModel()
	
	# Review
	review = ""
 
	while True:
		print("Food Review Sentiment Analysis")
		print("Your Review: ", "No Review" if len(review) == 0 else review)
		print("1. Write your review")
		print("2. Analyze your review")
		print("3. Exit")
		print(">> ")
		choice = int(input(">> "))
		if (choice == 1):
			review = writeReview()
		elif (choice == 2):
			analyzeReview(review, classifier)
		elif (choice == 3):
			print("Thanks for using this application!")
			break
		else:
			print("Please only choose the available menu [1-3]!")

Model is available!
Loading the model...
Model load succesfully!
Most Informative Features
                 terribl = True           negati : positi =     12.6 : 1.0
                 horribl = True           negati : positi =     10.0 : 1.0
                  beauti = True           positi : negati =      9.3 : 1.0
                 perfect = True           positi : negati =      8.9 : 1.0
                      ok = True           negati : positi =      8.1 : 1.0
Food Review Sentiment Analysis
Your Review:  No Review
1. Write your review
2. Analyze your review
3. Exit
>> 
Review added!
Food Review Sentiment Analysis
Your Review:  This is just so bad lmao
1. Write your review
2. Analyze your review
3. Exit
>> 
Review Part of Speech Tag: 
1. this, DT
2. is, VBZ
3. just, RB
4. so, RB
5. bad, JJ
6. lmao, NN
Word: this
Synonyms: 
No synonym detected!
Antonyms: 
No antonym detected!
Word: is
Synonyms: 
(+)be
(+)be
(+)be
(+)exist
(+)be
Antonyms: 
(-)differ
Word: just
Synonyms: 
(+)just
(+)equit