# Import Library

In [1]:
import nltk
import pickle
import string
import pandas as pd

from nltk import FreqDist
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tag import pos_tag
from random import shuffle

# Download NLTK Library (if error)

In [2]:
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")
nltk.download("averaged_perceptron_tagger")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

# Settings Variable

In [None]:
stemmer = PorterStemmer()
wnl = WordNetLemmatizer()
eng_stopwords = stopwords.words("english")

# Preprocessing

In [9]:
def preprocessing(document):
    words = word_tokenize(document.lower())
    words = [wnl.lemmatize(word) for word in words]
    words = [stemmer.stem(word) for word in words]
    return {word : True for word in words if word.isalpha() and word not in eng_stopwords}

# Train Model

In [None]:
def train_model():

    dataset = pd.read_csv("./Dataset/Tweets.csv")

    feature_sets = [(preprocessing(text), label) for text, label in zip (dataset["text"], dataset["airline_sentiment"])]

    split_index = int (len(feature_sets) * 0.85)

    train_set, test_set = feature_sets[:split_index], feature_sets[split_index:]

    classifier = nltk.NaiveBayesClassifier.train(train_set)
    accuracy = nltk.classify.accuracy(classifier, test_set)
    print (f"Accuracy : {accuracy}")

    classifier.show_most_informative_features(5)

    file = open("./model.pickle", "wb")
    pickle.dump(classifier, file)
    file.close()

    return classifier


def read_model():

    try:
        file = open("model.pickle", "rb")
        print ("Model is available")
        print ("Loading the model...")
        classifier = pickle.load(file)
        file.close()

        print ("Model load successfully")
        classifier.show_most_informative_features(5)

    except:
        print ("Model not available")
        print ("Prepare for training")
        classifier = train_model()

    return classifier

# Functions

In [6]:
def write_review():
    while True:
        review = input("Input your review [>= 2 words]")
        words = review.split()

        if len(words) >= 2:
            print ("Review Added")
            return review
        else:
            print ("Input must be more than 1 word")

def analyze_review(review, classifier):
    
    if len(review) == 0:
        print ("Review is empty")
        return

    # Tokenizing
    words = word_tokenize(review.lower())

    # Frequency Distribution
    words = FreqDist([word for word in words if word.isalpha() and word not in string.punctuation])

    tagged = pos_tag(words)

    # POS Tagging
    print ("Review Part of Pos tagging:")

    for i, word in enumerate(tagged):
        print (f"{i+1}. {word[0], {word[1]}}")

    # Synonym and Antonym
    for word in words:

        print ("======================")
        print (f"= Words : {word}")
        print ("======================")

        # Synsets

        synsets = wordnet.synsets(word)

        synonyms = []
        antonyms = []

        for synset in synsets:
            for lemma in synset.lemmas():
                synonyms.append(lemma.name())

                for antonym in lemma.antonyms():
                    antonyms.append(antonym.name())

        print ("Synonyms")
        if len(synonyms) == 0:
            print ("No synonyms detected")
        else:
            for syn in synonyms[:5]:
                print (f"(+) : {syn}")
        
        print ("Antonyms")
        if len(antonyms) == 0:
            print ("No antonyms detected")
        else:
            for ant in antonyms[:5]:
                print (f"(-) : {ant}")

        print("===========================")

    
    # Predict the review

    # Internal Preprocessing to remove punctuation and eng_stopwords and tokenize it

    clean_review = [word for word in word_tokenize(review) if word not in eng_stopwords and word not in string.punctuation]

    clean_review = [wnl.lemmatize(stemmer.stem(word)) for word in clean_review]

    result = classifier.classify(FreqDist(clean_review))

    print (f"Your review: {review}")
    print (f"Your review category: {result}")

# Main Menu

In [7]:
def mainMenu():
    
	classifier = read_model()

	review = ""

	while True:
		print ("Tweet Sentiment Analysis")
		print ("Your review: ", "No Review" if len(review) == 0 else review)
		print ("1. Input Review")
		print ("2. Analyze Your Review")
		print ("3. Exit")
		print (">> ")

		choice = input("Please input your menu choice")
		if choice == '1':
			review = write_review()
		elif choice == '2':
			analyze_review(review, classifier)
		elif choice == '3':
			print ("Thank you :)")
			break
		else:
			print ("Input Invalid! Please choose menu choice between 1-3")


In [8]:
mainMenu()

Model is available
Loading the model...
Model load successfully
Most Informative Features
                outstand = True           positi : negati =     29.7 : 1.0
                passbook = True           positi : negati =     29.7 : 1.0
                 fantast = True           positi : negati =     28.7 : 1.0
                  beauti = True           positi : negati =     27.2 : 1.0
                 favorit = True           positi : negati =     27.2 : 1.0
Tweet Sentiment Analysis
Your review:  No Review
1. Input Review
2. Analyze Your Review
3. Exit
>> 
Review Added
Tweet Sentiment Analysis
Your review:  I not love you
1. Input Review
2. Analyze Your Review
3. Exit
>> 
Review Part of Pos tagging:
1. ('i', {'NN'})
2. ('not', {'RB'})
3. ('love', {'VB'})
4. ('you', {'PRP'})
= Words : i
Synonyms
(+) : iodine
(+) : iodin
(+) : I
(+) : atomic_number_53
(+) : one
Antonyms
No antonyms detected
= Words : not
Synonyms
(+) : not
(+) : non
Antonyms
No antonyms detected
= Words : love
Synonyms