In [171]:
from pymongo import MongoClient
from bson.objectid import ObjectId
from pprint import pprint
import string
import re

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer 
from nltk import word_tokenize, sentiment
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [168]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/rusherrg/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [2]:
mongo_uri = 'mongodb://localhost:10000/'

In [12]:
# Database Functions
def connect():
    client = MongoClient(mongo_uri)
    return client.products.oneplus6t

def fetch_reviews():
    db = connect().reviews
    return db.find_one()

In [44]:
x = fetch_reviews()

In [51]:
reviews = {}
reviews['all'] = x['all_reviews']

In [56]:
all_reviews = []
reviews_battery = {}
reviews_picture = {}
reviews_value = {}
reviews_sound = {}
reviews_fingerprint = {}

In [57]:
for review in reviews['all']:
    all_reviews.append(review['review'])    

In [106]:
STOP_WORDS = set(stopwords.words('english'))
STOP_WORDS.add('')
 
lemmatizer = WordNetLemmatizer() 

In [129]:
synonyms = {
    'battery': ['battery', 'batterylife', 'batteries'],
    'picture': ['picture', 'camera', 'pictures', 'pic', 'photo', 'photograph', 'photography'],
    'value': ['worth', 'value', 'cheap'],
    'sound': ['sound', 'music', 'speaker', 'loud', 'volume'],
    'fingerprint': ['fingerprint', 'scanner', 'finger'],    
}

In [148]:
clean_reviews = []
for review in all_reviews:
    review = ''.join(ch for ch in review if ch not in string.punctuation)
    words = re.sub('(\n+|\\d|\\W)',' ',review).split()
    review = ' '.join([lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in STOP_WORDS])
    review = review.split()
    for i in range(len(review)):
        for key in synonyms:
            for val in synonyms[key]:
                if review[i] == val:
                    review[i] = key
    review = ' '.join(review)
    clean_reviews.append(review)

In [150]:
reviews_battery['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i].find('battery')!=-1]
reviews_picture['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i].find('picture')!=-1]
reviews_value['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i].find('value')!=-1]
reviews_sound['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i].find('sound')!=-1]
reviews_fingerprint['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i].find('fingerprint')!=-1]

In [174]:
sid = SentimentIntensityAnalyzer()

In [262]:
def sentiment_analysis(reviews, classs):
    ind = reviews['all']
    pos = []
    neu = []
    neg = []
    for i in ind:
        words = nltk.pos_tag(clean_reviews[i].split())
        score = {'neg': 0, 'neu': 0, 'pos': 0}
        for j in range(len(words)):
            sentence = []
            if words[j][0]==classs:
                for k in range(max(0, j-1), min(len(words),j+2)):
                    sentence.append(words[k][0])
                sentence = ' '.join(sentence)
                pol_score = sid.polarity_scores(sentence)
                if pol_score['neu'] == 1.0:
                    pol_score['neu'] = 0.0
                #print(pol_score, sentence)
                for key in ['neg', 'neu', 'pos']:
                    score[key] += pol_score[key]
        for key in ['neg', 'neu', 'pos']:
            score[key] /= max(1.0, sum(score.values()))
        if sum(score.values()) == 0.0:
            pol_score = sid.polarity_scores(clean_reviews[i])
            for key in ['neg', 'neu', 'pos']:
                score[key] += pol_score[key] 
        tag = max(score, key=score.get)
        #print(score, clean_reviews[i])
        eval(tag).append(i)
        #print("\n\n")
    print("#Pos:{}\n#Neu:{}\n#Neg:{}".format(len(pos), len(neu), len(neg)))
    return pos, neu, neg

In [264]:
reviews_class = {
    'battery': reviews_battery,
    'value': reviews_value,
    'fingerprint': reviews_fingerprint,
    'sound': reviews_sound,
    'picture': reviews_picture,
}
for reviews in reviews_class:
    print("Fetching {} reviews".format(reviews))
    pos, neu, neg  = sentiment_analysis(reviews_class[reviews], reviews)
    reviews_class[reviews]['pos'] = pos
    reviews_class[reviews]['neu'] = neu
    reviews_class[reviews]['neg'] = neg
    print('\n')

Fetching battery reviews
#Pos:760
#Neu:778
#Neg:52


Fetching value reviews
#Pos:471
#Neu:5
#Neg:6


Fetching fingerprint reviews
#Pos:188
#Neu:575
#Neg:41


Fetching sound reviews
#Pos:394
#Neu:496
#Neg:49


Fetching picture reviews
#Pos:1020
#Neu:845
#Neg:79




In [265]:
reviews_battery

{'all': [0,
  2,
  8,
  9,
  13,
  14,
  19,
  20,
  23,
  25,
  27,
  28,
  30,
  31,
  34,
  38,
  41,
  43,
  44,
  49,
  50,
  54,
  57,
  58,
  63,
  64,
  65,
  67,
  68,
  71,
  73,
  77,
  83,
  88,
  89,
  92,
  93,
  94,
  102,
  108,
  111,
  114,
  115,
  116,
  118,
  119,
  122,
  123,
  125,
  129,
  131,
  134,
  136,
  139,
  140,
  141,
  142,
  149,
  151,
  152,
  154,
  155,
  156,
  157,
  158,
  160,
  161,
  162,
  163,
  165,
  172,
  173,
  174,
  176,
  177,
  179,
  180,
  181,
  182,
  184,
  185,
  186,
  187,
  189,
  190,
  191,
  193,
  194,
  195,
  197,
  199,
  200,
  202,
  204,
  205,
  207,
  208,
  209,
  210,
  211,
  215,
  216,
  217,
  219,
  221,
  223,
  224,
  226,
  228,
  229,
  231,
  232,
  234,
  235,
  236,
  237,
  239,
  240,
  241,
  242,
  244,
  245,
  247,
  248,
  249,
  254,
  255,
  261,
  262,
  263,
  264,
  265,
  266,
  267,
  268,
  270,
  271,
  272,
  274,
  275,
  276,
  277,
  278,
  280,
  282,
  284,
  286,
  287,

In [257]:
x

{'a': [0, 1, 2, 3, 4, 15, 6, 7, 8, 9], 'b': [1, 2, 3, 4, 5, 16, 7, 8, 9, 10]}