# Baby Product Sentiment Predictions 

In [52]:
import pandas as pd 

In [53]:
baby_df = pd.read_csv("amazon_baby.csv")

In [54]:
baby_df.head()

Unnamed: 0,name,review,rating
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5


## Building Model Based on Selected Words 

In [55]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [56]:
import re
def count_words(sentence):
    if not isinstance(sentence, str):
        sentence = str(sentence)
    sentence = re.sub(r'[^\w\s]', '', sentence)
    l = sentence.split()
    dic = {}
    for a in selected_words:
        dic[a] = 0
    for x in l:
        for a in selected_words:
            if a == x:
                dic[a] += 1
                break
    return dic

In [57]:
baby_df['word_count']  = baby_df['review'].apply(count_words)

In [58]:
baby_df.head()

Unnamed: 0,name,review,rating,word_count
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am..."
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am..."
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am..."
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am..."
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5,"{'awesome': 0, 'great': 1, 'fantastic': 0, 'am..."


In [59]:
baby_df = baby_df[baby_df['rating'] != 3]

In [60]:
baby_df['sentiment'] = baby_df['rating'] >= 4

In [61]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


In [62]:
X = pd.DataFrame.from_records(baby_df['word_count'].values)
y = baby_df['sentiment']
X.head()
X.sum()

awesome       3078
great        46018
fantastic     1418
amazing       2148
love         32999
horrible       940
bad           3911
terrible       970
awful          565
wow             97
hate           970
dtype: int64

Wow is least used among selected words 

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state= 0)

In [64]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [65]:
y_pred = model.predict(X_test)

In [66]:
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.843213097058559


#### Confusion Matrix 
[[true negatives   false positives]
 [false negatives  true positives]]

In [67]:
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

Confusion Matrix:
[[  248  5114]
 [  115 27874]]


In [68]:
feature_names = X.columns
coefficients = model.coef_[0]

# Create a dictionary to store word-weight pairs
word_weights = dict(zip(feature_names, coefficients))

# Print the word-weight pairs
for word, weight in word_weights.items():
    print(word, ':', weight)

awesome : 1.0997484097414962
great : 0.7575463653703902
fantastic : 0.8150913743209977
amazing : 1.024447896157856
love : 1.321700251833197
horrible : -2.2154777850866108
bad : -0.9513980838100635
terrible : -2.0706322192278392
awful : -1.996044612124784
wow : -0.5183419795710714
hate : -1.3858945113925556


### Majority Class Classifier Accuracy 

In [18]:
baby_df['sentiment'].mean()

0.8411233448474381

## Building a complete sentimental model 

In [72]:
# Filling all null reviews 
baby_df['review'].fillna("   ", inplace=True)

In [74]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(baby_df['review'])


In [75]:
y = baby_df['sentiment']

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [77]:
model1 = LogisticRegression(max_iter=1900)
model1.fit(X_train, y_train)

In [78]:
accuracy = model1.score(X_test, y_test)
print('Accuracy:', accuracy)

Accuracy: 0.9288776948217444


In [79]:
positive_probabilities = model1.predict_proba(X[:])[:, 1]
baby_df['model1_pos_prob'] = positive_probabilities

## Showing why all words gives better performance 

In [80]:
baby_trend = baby_df[baby_df['name'] == 'Baby Trend Diaper Champ']
baby_trend.head()

Unnamed: 0,name,review,rating,word_count,sentiment,model0_pos_prob,model1_pos_prob
312,Baby Trend Diaper Champ,Ok - newsflash. Diapers are just smelly. We\...,4,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.806312,0.826491
314,Baby Trend Diaper Champ,"My husband and I selected the Diaper ""Champ"" m...",1,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",False,0.806312,0.030813
315,Baby Trend Diaper Champ,Excellent diaper disposal unit. I used it in ...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.806312,0.999923
316,Baby Trend Diaper Champ,We love our diaper champ. It is very easy to ...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.939796,0.9944
317,Baby Trend Diaper Champ,Two girlfriends and two family members put me ...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.558528,0.999312


Sorting the table by model 1 predicted positive probability

In [100]:
baby_trend = baby_trend.sort_values(by = 'model1_pos_prob', ascending=False)


In [101]:
baby_trend.head()

Unnamed: 0,name,review,rating,word_count,sentiment,model0_pos_prob,model1_pos_prob
376,Baby Trend Diaper Champ,"This is absolutely, by far, the best diaper pa...",5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.983203,1.0
320,Baby Trend Diaper Champ,I originally put this item on my baby registry...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.806312,1.0
451,Baby Trend Diaper Champ,"As a first time mother, I wanted to get the be...",5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.939796,1.0
414,Baby Trend Diaper Champ,We have been using our Diaper Champ for almost...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.806312,1.0
420,Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty d...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.806312,1.0


In [102]:
baby_trend.tail()

Unnamed: 0,name,review,rating,word_count,sentiment,model0_pos_prob,model1_pos_prob
484,Baby Trend Diaper Champ,Worst diaper pale ever!! I\'ve had mine for 2...,1,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",False,0.806312,0.002219
550,Baby Trend Diaper Champ,"Ok, so the idea of the Diaper Champ is awesome...",1,"{'awesome': 0, 'great': 1, 'fantastic': 0, 'am...",False,0.774233,0.001064
590,Baby Trend Diaper Champ,This product is nice in theory but not all tha...,1,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",False,0.806312,0.000742
489,Baby Trend Diaper Champ,I registered for this product after reading th...,2,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",False,0.312328,0.000606
357,Baby Trend Diaper Champ,This is the worst diaper pail ever! It was gr...,1,"{'awesome': 0, 'great': 1, 'fantastic': 0, 'am...",False,0.898782,0.000317


In [93]:
contrasting = baby_trend[baby_trend['model1_pos_prob']-baby_trend['model0_pos_prob']>0.3]

In [94]:
contrasting.head()

Unnamed: 0,name,review,rating,word_count,sentiment,model0_pos_prob,model1_pos_prob
317,Baby Trend Diaper Champ,Two girlfriends and two family members put me ...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.558528,0.999312
318,Baby Trend Diaper Champ,I waited to review this until I saw how it per...,4,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.616525,0.997291
334,Baby Trend Diaper Champ,I recently gave birth to my third child. When...,5,"{'awesome': 0, 'great': 1, 'fantastic': 0, 'am...",True,0.546794,0.999889
360,Baby Trend Diaper Champ,"As new parents, we started out with the Diaper...",5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.616525,0.998087
403,Baby Trend Diaper Champ,I agree it\'s better than the genie. I own th...,5,"{'awesome': 0, 'great': 0, 'fantastic': 0, 'am...",True,0.616525,0.995971


In [95]:
len(contrasting)

13

In [98]:
for a in contrasting['review']:
    print(a)
    print("\n")

Two girlfriends and two family members put me onto this diaper pail.  They each had tried the Diaper Genie and had horrible results with eventual smells, and costliness of buying proprietary DG cartridges.  My family members eventually started bringing every dirty diaper out to the trash and leaving just wet diapers in the DG, that is until they found out about the Diaper Champ!Wow, what a difference, it seals in orders very well and using normal 8 - 13 gallon trash bags makes it economical.  The ease of use factor is amazing, drop a dirty diaper in the hole, grab the handle, give it a flip, the plunger pushes the diaper down and it drops into the can with a little gravitic help.No wrenching, turning, fighting a cartridge bag system.Opening it for the first time was a little hard, but look at that from a child\'s point of view, a toddler is not going to get into it, and neither is a dog.  Also, it needs to be away from the wall a little bit, so that it\'s flip top access lid locks into

There are certain reviews where model based on selected words fails to capture the idea but the one using complete words captures the idea very well.