# ANALYSIS

In [6]:
import pandas as pd
import re
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import pickle

In [7]:
model = pickle.load(open('modelNB.pkl', 'rb'))

def getSentiment(raw_text: str):
    """
    function getSentiment(raw_text: string) -> (output: string, prediction: (polarity, subjectivity))

    This function takes in a string of raw text and performs sentiment analysis to determine whether the text is positive or negative. It returns a tuple consisting of the sentiment label and the probability of the prediction.

    Args:
        raw_text (str): The raw text to analyze.

    Returns:
        tuple: A tuple consisting of the sentiment label and the probability of the prediction.

    Example:
        >>> raw_text = "This product is amazing! I love it so much."
        >>> getSentiment(raw_text)
        ('Positive', array([[0.00819811, 0.99180189]]))
    """

    # Instantiate PorterStemmer
    p_stemmer = PorterStemmer()

    # Remove HTML
    review_text = BeautifulSoup(raw_text).get_text()

    # Remove non-letters
    letters_only = re.sub("[^a-zA-Z]", " ", review_text)

    # Convert words to lower case and split each word up
    words = letters_only.lower().split()

    # Convert stopwords to a set
    stops = set(stopwords.words('english'))

    # Adding on stopwords that were appearing frequently in both positive and negative reviews
    stops.update(['app','shopee','shoppee','item','items','seller','sellers','bad'])

    # Remove stopwords
    meaningful_words = [w for w in words if w not in stops]

    # Stem words
    meaningful_words = [p_stemmer.stem(w) for w in meaningful_words]

    # Join words back into one string, with a space in between each word
    final_text = pd.Series(" ".join(meaningful_words))

    # Generate predictions
    pred = model.predict(final_text)[0]
    probability = model.predict_proba([pd.Series.to_string(final_text)])


    if pred == 1:
        output = "Negative"
    else:
        output = "Postive"

    return output, probability


In [55]:
text = "This product is amazing! I love it so much. 😋ñ Amaze me ganito pala"

In [56]:
[ PorterStemmer().stem(word) for word in text.split() ]

['thi',
 'product',
 'is',
 'amazing!',
 'i',
 'love',
 'it',
 'so',
 'much.',
 '😋ñ',
 'amaz',
 'me',
 'ganito',
 'pala']

In [57]:
letters_only = re.sub("[^a-zA-Z]", " ", text)
letters_only

'This product is amazing  I love it so much     Amaze me ganito pala'

In [58]:
words = letters_only.lower().split()
words

['this',
 'product',
 'is',
 'amazing',
 'i',
 'love',
 'it',
 'so',
 'much',
 'amaze',
 'me',
 'ganito',
 'pala']

In [59]:
stops = set(stopwords.words('english'))
stops

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'own',
 'r

In [60]:
stops.update(['app','shopee','shoppee','item','items','seller','sellers','bad'])

In [61]:
meaningful_words = [w for w in words if w not in stops]
meaningful_words

['product', 'amazing', 'love', 'much', 'amaze', 'ganito', 'pala']

In [62]:
meaningful_words = [PorterStemmer().stem(w) for w in meaningful_words]
meaningful_words

['product', 'amaz', 'love', 'much', 'amaz', 'ganito', 'pala']

In [65]:
final_text = " ".join(meaningful_words)
final_text

'product amaz love much amaz ganito pala'