In [328]:
from pymongo import MongoClient
from bson.objectid import ObjectId
from pprint import pprint
import string
import re

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer 
from nltk import word_tokenize, sentiment
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [329]:
mongo_uri = 'mongodb://localhost:10000/'

all_reviews = []
reviews_battery = {}
reviews_picture = {}
reviews_value = {}
reviews_sound = {}
reviews_fingerprint = {}

STOP_WORDS = set(stopwords.words('english'))
STOP_WORDS.add('')
 
lemmatizer = WordNetLemmatizer() 

sid = SentimentIntensityAnalyzer()

synonyms = {
    'battery': ['battery', 'batterylife', 'batteries'],
    'picture': ['picture', 'camera', 'pictures', 'pic', 'photo', 'photograph', 'photography'],
    'value': ['worth', 'value', 'cheap'],
    'sound': ['sound', 'music', 'speaker', 'loud', 'volume'],
    'fingerprint': ['fingerprint', 'scanner', 'finger'],    
    'flagship': ['flagship', 'killer'],
}

In [357]:
# Database Functions
def connect(product):
    product = ''.join(product.lower().split())
    client = MongoClient(mongo_uri)
    return client.products[product]

def fetch_product(product):
    db = connect(product).details
    for i in db.find():
        if i['title'].find(product)!=-1:
            return i['_id']
    
def fetch_reviews(product):
    db = connect(product).reviews
    for i in db.find():
        if i['title'].find(product)!=-1:
            return i
    
def insert_reviews_class(product, data):
    db = connect(product).reviews
    pid = fetch_product(product)
    for key in data:
        print(db.update_one({'product_id':pid}, {'$set':{key: data[key]}}))
    return

In [331]:
x = fetch_reviews('OnePlus 6T')

In [332]:
Reviews = {}
Reviews['all'] = x['all_reviews']

In [333]:
for review in Reviews['all']:
    all_reviews.append([review['review'], review['title']])    

In [334]:
def clean_review(review):
    replace = ['flagship']
    review = ''.join(ch for ch in review if ch not in string.punctuation)
    words = re.sub('(\n+|\\d|\\W)',' ',review).split()
    review = ' '.join([lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in STOP_WORDS])
    review = review.split()
    for i in range(len(review)):
        for key in synonyms:
            for val in synonyms[key]:
                if review[i] == val:
                    review[i] = key
    review = ' '.join(review)
    return review

In [335]:
clean_reviews = []
for review in all_reviews:
    clean = [clean_review(review[0]), clean_review(review[1])]
    clean_reviews.append(clean)

In [336]:
reviews_battery['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('battery')!=-1]
reviews_picture['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('picture')!=-1]
reviews_value['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('value')!=-1]
reviews_sound['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('sound')!=-1]
reviews_fingerprint['all'] = [i for i in range(len(clean_reviews)) if clean_reviews[i][0].find('fingerprint')!=-1]

In [337]:
def sentiment_analysis(reviews, classs):
    ind = reviews['all']
    pos = []
    neu = []
    neg = []
    for i in ind:
        words = nltk.pos_tag(clean_reviews[i][0].split())
        score = {'neg': 0, 'neu': 0, 'pos': 0}
        for j in range(len(words)):
            sentence = []
            if words[j][0]==classs:
                for k in range(max(0, j-1), min(len(words),j+2)):
                    sentence.append(words[k][0])
                sentence = ' '.join(sentence)
                pol_score = sid.polarity_scores(sentence)
                if pol_score['neu'] == 1.0:
                    pol_score['neu'] = 0.0
                #print(pol_score, sentence)
                for key in ['neg', 'neu', 'pos']:
                    score[key] += pol_score[key]
        pol_score = sid.polarity_scores(clean_reviews[i][1])
        for key in ['neg', 'neu', 'pos']:
            score[key] += pol_score[key]
            score[key] /= max(1.0, sum(score.values()))
        
        tag = max(score, key=score.get)
        #print(score, clean_reviews[i])
        eval(tag).append(i)
        #print("\n\n")
    print("#Pos:{}\n#Neu:{}\n#Neg:{}".format(len(pos), len(neu), len(neg)))
    return pos, neu, neg

In [338]:
reviews_class = {
    'battery': reviews_battery,
    'value': reviews_value,
    'fingerprint': reviews_fingerprint,
    'sound': reviews_sound,
    'picture': reviews_picture,
}
for reviews in reviews_class:
    print("Fetching {} reviews".format(reviews))
    pos, neu, neg  = sentiment_analysis(reviews_class[reviews], reviews)
    reviews_class[reviews]['pos'] = pos
    reviews_class[reviews]['neu'] = neu
    reviews_class[reviews]['neg'] = neg
    print('\n')

Fetching battery reviews
#Pos:957
#Neu:560
#Neg:73


Fetching value reviews
#Pos:359
#Neu:116
#Neg:7


Fetching fingerprint reviews
#Pos:426
#Neu:318
#Neg:60


Fetching sound reviews
#Pos:519
#Neu:352
#Neg:68


Fetching picture reviews
#Pos:1183
#Neu:668
#Neg:108




dict_keys(['all', 'pos', 'neu', 'neg'])