## NLP with Machine Learning

### 1. Sentiment Analysis

In [1]:
import pandas as pd

# create a list of sentences
data = [
    "When life gives you lemons, make lemonade! ðŸ™‚",
    "She bought 2 lemons for $1 at Maven Market.",
    "A dozen lemons will make a gallon of lemonade. [AllRecipes]",
    "lemon, lemon, lemons, lemon, lemon, lemons",
    "He's running to the market to get a lemon â€” there's a great sale today.",
    "iced tea is my favorite",
    "I didn't like the taste of that lemonade at all.",
    "My lemons went bad before I could use them, unfortunately.",
]

# expand the column width to see the full sentences
pd.set_option('display.max_colwidth', None)

# turn it into a dataframe
data_df = pd.DataFrame(data, columns=["sentence"])
data_df.head()

# make a copy of the dataframe
df = data_df.copy()
df.head()

Unnamed: 0,sentence
0,"When life gives you lemons, make lemonade! ðŸ™‚"
1,She bought 2 lemons for $1 at Maven Market.
2,A dozen lemons will make a gallon of lemonade. [AllRecipes]
3,"lemon, lemon, lemons, lemon, lemon, lemons"
4,He's running to the market to get a lemon â€” there's a great sale today.


In [2]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [4]:
test = df.sentence[0]
test

'When life gives you lemons, make lemonade! ðŸ™‚'

In [5]:
analyzer = SentimentIntensityAnalyzer()
analyzer.polarity_scores(test)

{'neg': 0.0, 'neu': 0.75, 'pos': 0.25, 'compound': 0.4587}

In [7]:
analyzer.polarity_scores(test)['compound'] # Total Score.

0.4587

In [8]:
def get_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    return analyzer.polarity_scores(text)['compound'] # Total Score.

In [11]:
df['sentiment'] = df.sentence.apply(get_sentiment)

In [12]:
df

Unnamed: 0,sentence,sentiment
0,"When life gives you lemons, make lemonade! ðŸ™‚",0.4587
1,She bought 2 lemons for $1 at Maven Market.,0.0
2,A dozen lemons will make a gallon of lemonade. [AllRecipes],0.0
3,"lemon, lemon, lemons, lemon, lemon, lemons",0.0
4,He's running to the market to get a lemon â€” there's a great sale today.,0.6249
5,iced tea is my favorite,0.4588
6,I didn't like the taste of that lemonade at all.,-0.2755
7,"My lemons went bad before I could use them, unfortunately.",-0.7096


### 2. Text Classification

#### GOAL: Predict which reviews are high priority (vs low priority) that we need to address right away

In [13]:
# import libraries
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [15]:
reviews = pd.read_excel('Data/Popchip_Reviews.xlsx')
reviews.head()

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more."
2,23691,A30NYUHEDLWI0Y,5,Low,Great Alternative to Potato Chips,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!"
3,23692,A2NU55U9LKTB5J,3,High,Not somthing I would crave,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free."
4,23693,A225F7QFP5LIW2,5,Low,healthy and delicious,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!"


In [16]:
reviews.shape

(564, 6)

In [17]:
reviews.Priority.value_counts()

Priority
Low     447
High    117
Name: count, dtype: int64

In [19]:
import maven_text_preprocessing

In [22]:
reviews['Text_Clean'] = maven_text_preprocessing.clean_and_normalize(reviews.Text)
reviews.head(2)

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text,Text_Clean
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,popchip bomb use parmesan garlic scoop cottage cheese healthy alternative chip dip healthy eat program save
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",like puff nature chip make unique chip market order salt vinegar absolutely love flavor hand favorite chip try cheddar regular flavor cheddar 45 regular 35 prefer strong flavor obviously case regular salt vinegar kind weak compare regular sv chip flavorful make want come


In [None]:
# cv, naive bayes

In [29]:
cv = CountVectorizer(stop_words='english', ngram_range=(1,2), min_df=0.2)
X = cv.fit_transform(reviews.Text_Clean)
X_df = pd.DataFrame(X.toarray(), columns=cv.get_feature_names_out())
X_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0
1,0,0,0,4,0,3,0,0,1,1,0,0,0,2,0,0,1
2,0,0,0,3,0,0,0,1,0,2,1,1,1,1,0,0,0
3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0
4,1,0,0,2,1,2,0,1,2,0,0,1,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0,0,0,3,3,1,1,5,0,1,1,4,3,0,0,1,0
560,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1
561,0,0,0,2,0,1,0,2,0,0,0,0,0,0,0,2,0
562,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0


In [30]:
y = reviews.Priority # Y value for the model
y.head()

0     Low
1     Low
2     Low
3    High
4     Low
Name: Priority, dtype: object

In [31]:
# train/test split
X_train, X_test, y_train, y_test = train_test_split(X_df, y, test_size=0.2, random_state=42)

# model
model_nb = MultinomialNB()
model_nb.fit(X_train, y_train)

# predict
y_pred_nb = model_nb.predict(X_test)

# evaluate
print("Accuracy:", accuracy_score(y_test, y_pred_nb))
print(classification_report(y_test, y_pred_nb))

Accuracy: 0.8407079646017699
              precision    recall  f1-score   support

        High       0.60      0.16      0.25        19
         Low       0.85      0.98      0.91        94

    accuracy                           0.84       113
   macro avg       0.73      0.57      0.58       113
weighted avg       0.81      0.84      0.80       113



In [32]:
# test it out on new reviews
new_reviews = pd.Series([
    "Pop chips are my favorite! I love these chips so much.",
    "Taste bad. I don't like the flavor options or taste.",
    "Solid snack."
])

new_reviews

0    Pop chips are my favorite! I love these chips so much.
1      Taste bad. I don't like the flavor options or taste.
2                                              Solid snack.
dtype: object

In [34]:
new_reviews_clean = maven_text_preprocessing.clean_and_normalize(new_reviews)
new_reviews_df = pd.DataFrame(cv.transform(new_reviews_clean).toarray(), columns=cv.get_feature_names_out())
new_reviews_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,2,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [36]:
model_nb.predict(new_reviews_df)

array(['Low', 'High', 'Low'], dtype='<U4')