In [None]:
!wget https://github.com/JawadR7/InfoRetrievalSentimentAnalysis/raw/main/models_and_vectorizer.zip

--2024-04-24 17:25:06--  https://github.com/JawadR7/InfoRetrievalSentimentAnalysis/raw/main/models_and_vectorizer.zip
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/JawadR7/InfoRetrievalSentimentAnalysis/main/models_and_vectorizer.zip [following]
--2024-04-24 17:25:06--  https://raw.githubusercontent.com/JawadR7/InfoRetrievalSentimentAnalysis/main/models_and_vectorizer.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11914999 (11M) [application/zip]
Saving to: ‘models_and_vectorizer.zip’


2024-04-24 17:25:06 (173 MB/s) - ‘models_and_vectorizer.zip’ saved [11914999/11914999]



In [None]:
!unzip models_and_vectorizer.zip

Archive:  models_and_vectorizer.zip
  inflating: CNB_model.sav           
  inflating: regression_model.sav    
  inflating: rfc_model.sav           
  inflating: svm_model.sav           
  inflating: tfidf_vectorizer.sav    


In [None]:
from sklearn.naive_bayes import ComplementNB
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk

nltk.download('wordnet')
nltk.download('punkt')
nltk.download('vader_lexicon')
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer

import pickle

class LemmaTokenizer:
  ignore_tokens = [',', '.', ';', ':', '"', '``', "''", '`']
  def __init__(self):
      self.wnl = WordNetLemmatizer()
  def __call__(self, doc):
      return [self.wnl.lemmatize(t) for t in word_tokenize(doc) if t not in self.ignore_tokens]

tokenizer = LemmaTokenizer()

def predict(x):
  if x > 0.05:
    return 'pos'
  elif x < -0.05:
    return 'neg'
  return 'neu'

sid = SentimentIntensityAnalyzer()

# Open models and TFIDF vectorizer from pickled .sav files
vectorizer = pickle.load(open('tfidf_vectorizer.sav', 'rb'))
CNB_demo = pickle.load(open('CNB_model.sav', 'rb'))
reg_demo = pickle.load(open('regression_model.sav', 'rb'))
svm_demo = pickle.load(open('svm_model.sav', 'rb'))
rfc_demo = pickle.load(open('rfc_model.sav', 'rb'))


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [None]:
# DEMO
# Input your own text (review) and see how each model classifies it

# Take in user input an generate predictions if input is not null
text = str(input("Type text and hit [ENTER]:\n"))

if text == None:
  print("ERROR: NO TEXT ENTERED")
else:
  text_vector = vectorizer.transform([text])
  CNB_result = CNB_demo.predict(text_vector)
  reg_result = reg_demo.predict(text_vector)
  svm_result = svm_demo.predict(text_vector)
  rfc_result = rfc_demo.predict(text_vector)
  vader_result = predict(sid.polarity_scores(text)['compound'])

  print('\n')
  print('------------------------------------------------')
  print("Predictions")
  print('------------------------------------------------')
  print(f"Complement Naive Bayes: {CNB_result[0]}")
  print(f"Logistic Regression: {reg_result[0]}")
  print(f"SVM: {svm_result[0]}")
  print(f"Random Forest: {CNB_result[0]}")
  print(f"VADER: {vader_result}")
  print('\n\n')

Type text and hit [ENTER]:
I like to come here all the time!!!


------------------------------------------------
Predictions
------------------------------------------------
Complement Naive Bayes: pos
Logistic Regression: pos
SVM: pos
Random Forest: pos
VADER: pos



