## Classifying whether feedback left on a website is either positive or negative.

In [266]:
import numpy as np
import pandas as pd
import scipy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

In [267]:
amazon = pd.read_csv("amazon_cells_labelled.txt",delimiter="\t",header=None)
amazon.columns = ['feedback','score']
amazon.head(5)
# score 1 (positive), score 0 (negative)

Unnamed: 0,feedback,score
0,So there is no way for me to plug it in here i...,0
1,"Good case, Excellent value.",1
2,Great for the jawbone.,1
3,Tied to charger for conversations lasting more...,0
4,The mic is great.,1


In [268]:
# import negative sentiment words data to create list
# source citation: 
#    Minqing Hu and Bing Liu. "Mining and Summarizing Customer Reviews." 
#;       Proceedings of the ACM SIGKDD International Conference on Knowledge 
#;       Discovery and Data Mining (KDD-2004), Aug 22-25, 2004, Seattle, 
#;       Washington, USA, 
neg_words = pd.read_csv("negative-words.txt",delimiter='\t',encoding="ISO-8859-1",skiprows=34,header=None)
neg_words.columns = ["Negative Words"]
print(neg_words.to_string())

                Negative Words
0                      2-faced
1                      2-faces
2                     abnormal
3                      abolish
4                   abominable
5                   abominably
6                    abominate
7                  abomination
8                        abort
9                      aborted
10                      aborts
11                      abrade
12                    abrasive
13                      abrupt
14                    abruptly
15                     abscond
16                     absence
17               absent-minded
18                    absentee
19                      absurd
20                   absurdity
21                    absurdly
22                  absurdness
23                       abuse
24                      abused
25                      abuses
26                     abusive
27                     abysmal
28                   abysmally
29                       abyss
30                  accidental
31      

In [269]:
# change score to boolean values (looking for instances where negative messages return True)
amazon['score'] = (amazon['score'] == 0)
amazon.head(3)

Unnamed: 0,feedback,score
0,So there is no way for me to plug it in here i...,True
1,"Good case, Excellent value.",False
2,Great for the jawbone.,False


In [270]:
# create list of keywords + "!" 
keywords = list(neg_words.values.flatten())
keywords.append("no")
keywords.append("never")
keywords.append("not")
print(keywords)



In [284]:
#strip punctuations from feedback messages
def strip_punctuation(message):
    from string import punctuation
    return ''.join(m for m in message if m not in punctuation)

# compare two lists to see if feedback contains negative word
def neg_message_check(df,col_name,alist):
    message_list = list(df[col_name].values.flatten())
    new_message_list = []
    for message in message_list:
        new_message = strip_punctuation(message.lower())
        new_message_list.append(new_message)
        
    nm = pd.Series(new_message_list)
    df["modified_feedback"] = nm.values
    
    for key in alist:
        df[str(key)] = df.modified_feedback.str.contains("" + str(key) + "",case=False)
        #amazon[str(key)] = amazon.modified_feedback.apply(lambda sentence: any(word in sentence for word in alist))

In [286]:
#neg_message_check(amazon,"feedback",keywords)
display(amazon)

Unnamed: 0,feedback,score,2-faced,2-faces,abnormal,abolish,abominable,abominably,abominate,abomination,...,unnecessary,unneeded,unnerve,unnerved,unnerving,unnervingly,unnoticed,unobserved,unorthodox,unorthodoxy
0,So there is no way for me to plug it in here i...,True,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True
1,"Good case, Excellent value.",False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Great for the jawbone.,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Tied to charger for conversations lasting more...,True,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True
4,The mic is great.,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,I have to jiggle the plug to get it to line up...,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,If you have several dozen or several hundred c...,True,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True
7,If you are Razr owner...you must have this!,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
8,"Needless to say, I wasted my money.",True,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True
9,What a waste of money and time!.,True,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True


In [None]:
# data = 
target = amazon["score"]

In [None]:
from sklearn.naive_bayes import BernoulliNB

bnb = BernoulliNB()

bnb.fit(target,data)

y_pred = bnb.predict(data)

print("Number of mislabeled points out of total {} points: {}".format(data.shape[0],(target != y_pred).sum()))