In [4]:
from pymongo import MongoClient
from bson.objectid import ObjectId
from pprint import pprint
import string
import re

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer 
from nltk import word_tokenize, sentiment
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [6]:
mongo_uri = 'mongodb://localhost:10000/'

all_reviews = []
reviews_battery = {}
reviews_picture = {}
reviews_value = {}
reviews_sound = {}
reviews_fingerprint = {}

STOP_WORDS = set(stopwords.words('english'))
STOP_WORDS.add('')
 
lemmatizer = WordNetLemmatizer() 

sid = SentimentIntensityAnalyzer()

synonyms = {
    'battery': ['battery', 'batterylife', 'batteries'],
    'picture': ['picture', 'camera', 'pictures', 'pic', 'photo', 'photograph', 'photography'],
    'value': ['worth', 'value', 'cheap'],
    'sound': ['sound', 'music', 'speaker', 'loud', 'volume'],
    'fingerprint': ['fingerprint', 'scanner', 'finger'],    
    'flagship': ['flagship', 'killer'],
}

In [10]:
# Database Functions
def connect(product):
    product = ''.join(product.lower().split())
    client = MongoClient(mongo_uri)
    return client.products[product]

def fetch_product(product):
    db = connect(product).details
    for i in db.find():
        if i['title'].find(product)!=-1:
            return i['_id']
    
def fetch_reviews(product):
    db = connect(product).reviews
    for i in db.find():
        if i['title'].find(product)!=-1:
            return i
    
def insert_reviews_class(product, data):
    db = connect(product).reviews
    pid = fetch_product(product)
    for key in data:
        print(db.update_one({'product_id':pid}, {'$set':{key: data[key]}}))
    return

In [11]:
x = fetch_reviews('OnePlus 6T')

In [12]:
Reviews = {}
Reviews['all'] = x['all_reviews']

In [13]:
for review in Reviews['all']:
    all_reviews.append([review['review'], review['title']])    

In [14]:
def clean_review(review):
    replace = ['flagship']
    review = ''.join(ch for ch in review if ch not in string.punctuation)
    words = re.sub('(\n+|\\d|\\W)',' ',review).split()
    review = ' '.join([lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in STOP_WORDS])
    review = review.split()
    for i in range(len(review)):
        for key in synonyms:
            for val in synonyms[key]:
                if review[i] == val:
                    review[i] = key
    review = ' '.join(review)
    return review

In [15]:
clean_reviews = []
for review in all_reviews:
    clean = [clean_review(review[0]), clean_review(review[1])]
    clean_reviews.append(clean)

In [16]:
reviews_battery['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('battery')!=-1]
reviews_picture['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('picture')!=-1]
reviews_value['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('value')!=-1]
reviews_sound['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('sound')!=-1]
reviews_fingerprint['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('fingerprint')!=-1]

In [52]:
def sentiment_analysis(reviews, classs):
    ind = reviews['all']
    pos = []
    neu = []
    neg = []
    for i in ind:
        words = nltk.pos_tag(clean_reviews[i][0].split())
        score = {'neg': 0, 'neu': 0, 'pos': 0}
        for j in range(len(words)):
            sentence = []
            if words[j][0]==classs:
                for k in range(max(0, j-1), min(len(words),j+2)):
                    sentence.append(words[k][0])
                sentence = ' '.join(sentence)
                pol_score = sid.polarity_scores(sentence)
                if pol_score['neu'] == 1.0:
                    pol_score['neu'] = 0.0
                #print(pol_score, sentence)
                for key in ['neg', 'neu', 'pos']:
                    score[key] += pol_score[key]
        
        s = sum(score.values())
        if s!=0:
            for key in ['neg', 'neu', 'pos']:
                score[key] /= s
            
        pol_score = sid.polarity_scores(clean_reviews[i][0])
        for key in ['neg', 'neu', 'pos']:
            score[key] += pol_score[key]
          
        #print(score, pol_score)
        s = sum(score.values())
        for key in ['neg', 'neu', 'pos']:
            score[key] /= s
            
        pol_score = sid.polarity_scores(clean_reviews[i][1])
        for key in ['neg', 'neu', 'pos']:
            score[key] += pol_score[key]
        
        s = sum(score.values())
        for key in ['neg', 'neu', 'pos']:
            score[key] /= s
            
        tag = max(score, key=score.get)
        #print(score, clean_reviews[i])
        eval(tag).append([i, score])
        #print("\n\n")
    print("#Pos:{}\n#Neu:{}\n#Neg:{}".format(len(pos), len(neu), len(neg)))
    return pos, neu, neg

In [53]:
reviews_class = {
    'battery': reviews_battery,
    'value': reviews_value,
    'fingerprint': reviews_fingerprint,
    'sound': reviews_sound,
    'picture': reviews_picture,
}
for reviews in reviews_class:
    print("Fetching {} reviews".format(reviews))
    pos, neu, neg  = sentiment_analysis(reviews_class[reviews], reviews)
    reviews_class[reviews]['pos'] = pos
    reviews_class[reviews]['neu'] = neu
    reviews_class[reviews]['neg'] = neg
    print('\n')

Fetching battery reviews
#Pos:772
#Neu:792
#Neg:26


Fetching value reviews
#Pos:307
#Neu:163
#Neg:12


Fetching fingerprint reviews
#Pos:320
#Neu:469
#Neg:15


Fetching sound reviews
#Pos:421
#Neu:486
#Neg:32


Fetching picture reviews
#Pos:940
#Neu:971
#Neg:48




In [57]:
allreviews = fetch_reviews('OnePlus 6T')

for key in ['battery', 'value', 'fingerprint', 'picture', 'sound']:
    for c in ['pos', 'neu', 'neg']:
        for i in allreviews[key][c]:
            try:
                allreviews['all_reviews'][i[0]]['results']
            except:
                allreviews['all_reviews'][i[0]]['results'] = i[1]
            else:
                for j in ['pos', 'neu', 'neg']:
                    allreviews['all_reviews'][i[0]]['results'][j] += i[1][j]
    for i in allreviews[key]['all']:
        try:
            allreviews['all_reviews'][i]['tags']
        except:
            print('Error')
            allreviews['all_reviews'][i]['tags'] = set([key])
        else:
            allreviews['all_reviews'][i]['tags'].add(key)

for i in range(len(allreviews['all_reviews'])):
    try:
        allreviews['all_reviews'][i]['results']
    except:
        pol_score = sid.polarity_scores(clean_reviews[i][1])
        score = {}
        for key in ['neg', 'neu', 'pos']:
            score[key] = pol_score[key]
        allreviews['all_reviews'][i]['results'] = score

for i in range(len(allreviews['all_reviews'])):
    if sum(allreviews['all_reviews'][i]['results'].values()) == 0.0:
            allreviews['all_reviews'][i]['results']['neu'] = 1.0
    
    s = sum(allreviews['all_reviews'][i]['results'].values())
    for c in ['pos', 'neu', 'neg']:
        allreviews['all_reviews'][i]['results'][c] /= s
    if round(sum(allreviews['all_reviews'][i]['results'].values()),2) != 1.0:
           print(i)
    
    for c in ['pos', 'neu', 'neg']:
        allreviews['all_reviews'][i]['results'][c] = int(allreviews['all_reviews'][i]['results'][c]*100)
    if sum(allreviews['all_reviews'][i]['results'].values())!=100:
        allreviews['all_reviews'][i]['results'][min(allreviews['all_reviews'][i]['results'], key=allreviews['all_reviews'][i]['results'].get)] += 100-sum(allreviews['all_reviews'][i]['results'].values())


AttributeError: 'list' object has no attribute 'add'

In [59]:
insert_reviews_class('OnePlus 6T', reviews_class)

<pymongo.results.UpdateResult object at 0x7f20f9a17b08>
<pymongo.results.UpdateResult object at 0x7f20f9a17e88>
<pymongo.results.UpdateResult object at 0x7f20f9a17c88>
<pymongo.results.UpdateResult object at 0x7f20f9a17b08>
<pymongo.results.UpdateResult object at 0x7f20f9a17388>


In [60]:
fetch_reviews('OnePlus 6T')

{'_id': ObjectId('5cb1eaa0c7959b424999e92d'),
 'title': 'OnePlus 6T (Midnight Black, 8GB RAM, 128GB Storage)',
 'all_reviews': [{'customer': 'Tanmay Shukla',
   'rating': 5.0,
   'title': 'Flagship Killer',
   'review': "I got this phone on Friday evening.\n\nPros:\nGreat battery life\nAmazing performance\nPremium design\nImpressive rear and front camera\nIn display fingerprint scanner is really fast\n\nCons:\nNo headphone jack but you'll get a converter\nNo notification LED\nNo microSD card slot\n\nI will give points on my personal experience of 2 days full usage as below:\n\nWeight: 9/10\nOS: 10/10\nScreen size: 10/10\nCPU: 10/10\nPerformance: 10/10\nStorage: 10/10 (you'll get approx 113 gb for use)\nBattery: 8/10\nRear camera: 10/10\nFront camera: 10/10\nIn display fingerprint scanner: 9/10\nFace unlock: 10/10\n\nI hope this will help full.",
   'results': {'neg': 1, 'neu': 33, 'pos': 66},
   'tags': ['battery',
    'fingerprint',
    'picture',
    'battery',
    'fingerprint',
   

In [51]:
set(['key'])

{'key'}