In [1]:
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings('ignore')
import nltk
import ast
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import wordnet
nltk.download('sentiwordnet')
from nltk.corpus import sentiwordnet
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer,PorterStemmer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

[nltk_data] Downloading package sentiwordnet to
[nltk_data]     C:\Users\suraj\AppData\Roaming\nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


In [2]:
S10rev = pd.read_csv("D:/AMOD/Final Research Project/imp/S10_data.csv").dropna()
XRrev = pd.read_csv("D:/AMOD/Final Research Project/imp/XR_data.csv").dropna()

In [3]:
S10rev

Unnamed: 0,rating,content,subjectivity
0,2,The phone is great but when I got my package t...,0.750000
1,2,Terrible customer care,1.000000
2,1,"Calls drop, doesn’t pick up my data, constan...",0.666667
3,5,Awesome phone,1.000000
4,1,"This one takes time to position, hard press, ...",0.541667
...,...,...,...
1069,4,A brighter more vivid screen as my tab S3 wou...,0.716667
1070,5,"Ok, so I think thst this is a great phone, and...",0.750000
1071,5,The camera on the s10e has some new cool fea...,0.534848
1072,5,"I really enjoy the ultra-wide lens camera, t...",0.633333


In [4]:
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer() 

def preprocess(sentence):
    sentence=str(sentence)
    sentence = sentence.lower()
    sentence=sentence.replace('{html}',"") 
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', sentence)
    rem_url=re.sub(r'http\S+', '',cleantext)
    rem_num = re.sub('[0-9]+', '', rem_url)
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(rem_num)  
    filtered_words = [w for w in tokens if len(w) > 2 if not w in stopwords.words('english')]
    stem_words=[stemmer.stem(w) for w in filtered_words]
    lemma_words=[lemmatizer.lemmatize(w) for w in stem_words]
    return " ".join(filtered_words)


S10rev['cleanContent']=S10rev['content'].map(lambda s:preprocess(s)) 
#S10rev.drop('content', axis=1, inplace=True)
XRrev['cleanContent']=XRrev['content'].map(lambda s:preprocess(s)) 
#XRrev.drop('content', axis=1, inplace=True)

In [5]:
S10rev

Unnamed: 0,rating,content,subjectivity,cleanContent
0,2,The phone is great but when I got my package t...,0.750000,phone great got package protection phone
1,2,Terrible customer care,1.000000,terrible customer care
2,1,"Calls drop, doesn’t pick up my data, constan...",0.666667,calls drop pick data constantly reboot phone h...
3,5,Awesome phone,1.000000,awesome phone
4,1,"This one takes time to position, hard press, ...",0.541667,one takes time position hard press sec read fi...
...,...,...,...,...
1069,4,A brighter more vivid screen as my tab S3 wou...,0.716667,brighter vivid screen tab would also welcome
1070,5,"Ok, so I think thst this is a great phone, and...",0.750000,think thst great phone everyone experience uni...
1071,5,The camera on the s10e has some new cool fea...,0.534848,camera new cool features image quality bit better
1072,5,"I really enjoy the ultra-wide lens camera, t...",0.633333,really enjoy ultra wide lens camera something ...


In [6]:
search_values = ['design','camera','display','battery','fingerprint','finger print','processor','charging','fast charging','face recognition','recognition','face','storage','performance']

S10rev_useful = S10rev[S10rev.content.str.contains('|'.join(search_values ))]
XRrev_useful = XRrev[XRrev.content.str.contains('|'.join(search_values ))]


In [8]:
def pos_tag(sentences):
    wordsList = nltk.word_tokenize(sentences) 
    tagged = nltk.pos_tag(wordsList)   
    return tagged
    
S10rev_useful['tagged']=S10rev_useful['cleanContent'].map(lambda s:pos_tag(s))
XRrev_useful['tagged']=XRrev_useful['cleanContent'].map(lambda s:pos_tag(s))

In [10]:
pos_tag(XRrev_useful['cleanContent'].iloc[3])

[('great', 'JJ'), ('camera', 'NN')]

In [11]:
XRrev_useful = XRrev_useful[['content','cleanContent','tagged']]
S10rev_useful = S10rev_useful[['content','cleanContent','tagged']]

In [12]:
XRrev_useful = XRrev_useful.reset_index(drop=True)
S10rev_useful = S10rev_useful.reset_index(drop=True)

In [13]:
S10data_dict = S10rev_useful['tagged'].to_dict()
S10data_dict

{0: [('far', 'RB'),
  ('experienced', 'JJ'),
  ('glitches', 'NNS'),
  ('continues', 'VBZ'),
  ('run', 'VBP'),
  ('apps', 'RB'),
  ('quickly', 'RB'),
  ('finger', 'RBR'),
  ('print', 'JJ'),
  ('scanner', 'NN'),
  ('works', 'VBZ'),
  ('great', 'JJ'),
  ('gotten', 'JJ'),
  ('hot', 'JJ'),
  ('fast', 'NN'),
  ('charging', 'VBG'),
  ('included', 'VBD'),
  ('fast', 'JJ'),
  ('wall', 'NN'),
  ('charger', 'NN'),
  ('yootech', 'NN'),
  ('wireless', 'NN'),
  ('charger', 'NN'),
  ('stand', 'VBP'),
  ('issue', 'NN'),
  ('turning', 'VBG'),
  ('pocket', 'NN'),
  ('like', 'IN'),
  ('previous', 'JJ'),
  ('phone', 'NN'),
  ('used', 'VBN')],
 1: [('fingerprint', 'NN'),
  ('scanner', 'NN'),
  ('works', 'VBZ'),
  ('great', 'JJ')],
 2: [('yeah', 'NN'),
  ('sometimes', 'RB'),
  ('try', 'VB'),
  ('fingerprint', 'NN'),
  ('twice', 'RB'),
  ('honestly', 'RB'),
  ('bother', 'JJR')],
 3: [('camera', 'NN'), ('superb', 'NN')],
 4: [('face', 'NN'),
  ('recognition', 'NN'),
  ('unlock', 'NN'),
  ('works', 'VBZ'),
  (

In [14]:
#For S10

prevWordS10=''
prevTagS10=''
currWordS10=''
aspectListS10=[]
outputDictS10={}
#Extracting Aspects
for key,value in S10data_dict.items():
    for word,tag in value:
        if(tag=='NN' or tag=='NNP'):
            if(prevTagS10=='NN' or prevTagS10=='NNP'):
                currWordS10= prevWordS10 + ' ' + word
            else:
                aspectListS10.append(prevWordS10.upper())
                currWordS10= word
        prevWordS10=currWordS10
        prevTagS10=tag
#Eliminating aspect which has 1 or less count
for aspect in aspectListS10:
        if(aspectListS10.count(aspect)>1):
                if(outputDictS10.keys()!=aspect):
                        outputDictS10[aspect]=aspectListS10.count(aspect)
outputAspectS10=sorted(outputDictS10.items(), key=lambda x: x[1],reverse = True)

In [15]:
outputAspectS10

[('PHONE', 27),
 ('CAMERA', 23),
 ('BATTERY LIFE', 19),
 ('SCREEN', 9),
 ('BATTERY', 6),
 ('USE', 6),
 ('LIFE', 6),
 ('TIME', 5),
 ('FINGERPRINT SCANNER', 4),
 ('FINGERPRINT', 4),
 ('FAST', 3),
 ('ISSUE', 3),
 ('FACE RECOGNITION', 3),
 ('FINGERPRINT SENSOR', 3),
 ('CASE', 3),
 ('PROCESSOR', 3),
 ('STORAGE', 3),
 ('SUPPORT', 3),
 ('SAMSUNG', 3),
 ('FINGERPRINT READER', 3),
 ('DESIGN', 3),
 ('SIDE', 2),
 ('GET', 2),
 ('DAY', 2),
 ('POWER', 2),
 ('COOL', 2),
 ('GALAXY', 2),
 ('PURCHASE', 2),
 ('THING', 2),
 ('EVERYTHING', 2),
 ('FINGER PRINT READER', 2),
 ('DISPLAY', 2),
 ('USE CAR', 2),
 ('DISPLAY NAVIGATE', 2),
 ('APP SYGIC CAR', 2),
 ('NAVIGATION', 2),
 ('MEMBER CONSORTIUM', 2),
 ('MIRRORLINK', 2),
 ('PHONE MOBILE', 2),
 ('SECURE', 2),
 ('WORTHLESS', 2),
 ('IMPROVEMENT', 2),
 ('CHARGE', 2),
 ('CAMERA EXCELLENT', 2),
 ('SCREEN PROTECTOR', 2),
 ('QUALITY', 2),
 ('PHONE CAMERA', 2)]

In [16]:
#For S10
new=[]
for x in outputAspectS10:
    if x[0] == 'PHONE' or x[0] == 'CAMERA' or x[0] == 'BATTERY LIFE' or x[0] == 'SCREEN' or x[0] == 'BATTERY' or x[0] == 'FINGERPRINT SCANNER' or x[0] == 'FINGERPRINT' or x[0] == 'FINGERPRINT READER' or x[0] == 'FACE RECOGNITION' or x[0] == 'FINGERPRINT SENSOR' or x[0] == 'PROCESSOR' or x[0] == 'STORAGE' or x[0] == 'DESIGN' or x[0] == 'FINGER PRINT READER' or x[0] == 'DISPLAY' or x[0] == 'SECURE' or x[0] == 'PHONE CAMERA' or x[0] == 'SCREEN PROTECTOR':
        new.append(x)

In [17]:
new

[('PHONE', 27),
 ('CAMERA', 23),
 ('BATTERY LIFE', 19),
 ('SCREEN', 9),
 ('BATTERY', 6),
 ('FINGERPRINT SCANNER', 4),
 ('FINGERPRINT', 4),
 ('FACE RECOGNITION', 3),
 ('FINGERPRINT SENSOR', 3),
 ('PROCESSOR', 3),
 ('STORAGE', 3),
 ('FINGERPRINT READER', 3),
 ('DESIGN', 3),
 ('FINGER PRINT READER', 2),
 ('DISPLAY', 2),
 ('SECURE', 2),
 ('SCREEN PROTECTOR', 2),
 ('PHONE CAMERA', 2)]

In [18]:
#For S10

def orientation(inputWord):
    wordSynset=wordnet.synsets(inputWord)
    if(len(wordSynset) != 0):
        word=wordSynset[0].name()
        orientation=sentiwordnet.senti_synset(word)
        if(orientation.pos_score()>orientation.neg_score()):
            return True
        elif(orientation.pos_score()<orientation.neg_score()):
            return False

outputAspectOpinionTuplesS10={}
orientationCacheS10={}
negativeWordSet = {"don't","never", "nothing", "nowhere", "noone", "none", "not",
              "hasn't","hadn't","can't","couldn't","shouldn't","won't",
              "wouldn't","don't","doesn't","didn't","isn't","aren't","ain't"}
for aspect,no in new:
    aspectTokens= word_tokenize(aspect)
    #print(aspectTokens)
    count=0
    for key,value in S10data_dict.items():
        condition=True
        isNegativeSen=False
        for subWord in aspectTokens:
            #print(subWord)
            if(subWord in str(value).upper()):
                condition = condition and True
            else:
                condition = condition and False
        if(condition):
            for negWord in negativeWordSet:
                if(not isNegativeSen):#once senetence is negative no need to check this condition again and again
                    if negWord.upper() in str(value).upper():
                        isNegativeSen=isNegativeSen or True
            outputAspectOpinionTuplesS10.setdefault(aspect,[0,0,0])
            for word,tag in value:
                 if(tag=='JJ' or tag=='JJR' or tag=='JJS'or tag== 'RB' or tag== 'RBR'or tag== 'RBS'):
                    #print(tag)   
                    count+=1
                    if(word not in orientationCacheS10):
                        orien=orientation(word)
                        orientationCacheS10[word]=orien
                    else:
                        orien=orientationCacheS10[word]
                    if(isNegativeSen and orien is not None):
                        orien= not orien
                    if(orien==True):
                        outputAspectOpinionTuplesS10[aspect][0]+=1
                    elif(orien==False):
                        outputAspectOpinionTuplesS10[aspect][1]+=1
                    elif(orien is None):
                        outputAspectOpinionTuplesS10[aspect][2]+=1

    if(count>0):
        #print(count)
        outputAspectOpinionTuplesS10[aspect][0]=round((outputAspectOpinionTuplesS10[aspect][0]/count)*100,2)
        outputAspectOpinionTuplesS10[aspect][1]=round((outputAspectOpinionTuplesS10[aspect][1]/count)*100,2)
        outputAspectOpinionTuplesS10[aspect][2]=round((outputAspectOpinionTuplesS10[aspect][2]/count)*100,2)

S10DF = pd.DataFrame(outputAspectOpinionTuplesS10).T
S10DF.columns = ['Percentage Positive', 'Percentage Negative', 'Percentage Neutral']
S10DF.iloc[:,0:2]
        


Unnamed: 0,Percentage Positive,Percentage Negative
PHONE,31.88,10.48
CAMERA,33.76,11.39
BATTERY LIFE,37.58,5.37
SCREEN,27.68,11.86
BATTERY,37.43,6.7
FINGERPRINT SCANNER,41.18,20.59
FINGERPRINT,26.42,17.61
FACE RECOGNITION,23.53,11.76
FINGERPRINT SENSOR,16.95,15.25
PROCESSOR,16.67,12.5


In [19]:
XRdata_dict = XRrev_useful['tagged'].to_dict()

In [20]:
#For XR
prevWordXR=''
prevTagXR=''
currWordXR=''
aspectListXR=[]
outputDictXR={}
#Extracting Aspects
for key,value in XRdata_dict.items():
    for word,tag in value:
        if(tag=='NN' or tag=='NNP'):
            if(prevTagXR=='NN' or prevTagXR=='NNP'):
                currWordXR= prevWordXR + ' ' + word
            else:
                aspectListXR.append(prevWordXR.upper())
                currWordXR= word
        prevWordXR=currWordXR
        prevTagXR=tag
#Eliminating aspect which has 1 or less count
for aspect in aspectListXR:
        if(aspectListXR.count(aspect)>1):
                if(outputDictXR.keys()!=aspect):
                        outputDictXR[aspect]=aspectListXR.count(aspect)
outputAspectXR=sorted(outputDictXR.items(), key=lambda x: x[1],reverse = True)

In [21]:
outputAspectXR

[('PHONE', 39),
 ('BATTERY LIFE', 39),
 ('CAMERA', 34),
 ('IPHONE', 27),
 ('PERFORMANCE', 25),
 ('BATTERY', 21),
 ('DAY', 16),
 ('BATTERY BACKUP', 13),
 ('LIFE', 13),
 ('DISPLAY', 12),
 ('PRODUCT', 11),
 ('QUALITY', 10),
 ('CAMERA QUALITY', 9),
 ('FACE RECOGNITION', 8),
 ('APPLE', 7),
 ('TIME', 6),
 ('CHARGE', 5),
 ('PROCESSOR', 5),
 ('INTERFACE', 5),
 ('FACE DETECTION', 5),
 ('SIM', 4),
 ('CHARGER', 4),
 ('CAMERA BATTERY LIFE', 4),
 ('PORTRAIT MODE', 4),
 ('FACE', 4),
 ('FACE UNLOCK', 3),
 ('ISSUE', 3),
 ('BIT', 3),
 ('USE', 3),
 ('PROBLEM', 3),
 ('PRICE', 3),
 ('THING', 3),
 ('SCREEN', 3),
 ('PORTRAIT', 3),
 ('EXPERIENCE', 3),
 ('IPHONE CAMERA', 3),
 ('SIDE', 2),
 ('ZOOM', 2),
 ('USE CAMERA', 2),
 ('FRONT CAMERA', 2),
 ('ADAPTER', 2),
 ('BATTERY PERFORMANCE', 2),
 ('CAMERA PERFORMANCE', 2),
 ('QUALITY CAMERA', 2),
 ('JOB', 2),
 ('FINGERPRINT READER', 2),
 ('YOUTUBE', 2),
 ('THINK', 2),
 ('CHIP', 2),
 ('SYSTEM', 2),
 ('USAGE', 2),
 ('SPEAKER', 2),
 ('PHONE CAMERA', 2),
 ('CON', 2),
 (

In [22]:
#For XR
newXR=[]
for x in outputAspectXR:
    if x[0] == 'PHONE' or x[0] == 'CAMERA' or x[0] == 'BATTERY LIFE' or x[0] == 'BATTERY BACKUP' or x[0] == 'BATTERY' or x[0] == 'DISPLAY' or x[0] == 'CAMERA QUALITY' or x[0] == 'PERFORMANCE' or x[0] == 'FACE RECOGNITION' or x[0] == 'PROCESSOR' or x[0] == 'INTERFACE' or x[0] == 'FACE DETECTION' or x[0] == 'FACE UNLOCK' or x[0] == 'SCREEN' or x[0] == 'DISPLAY' or x[0] == 'FRONT CAMERA' or x[0] == 'BATTERY PERFORMANCE' or x[0] == 'CHIP' or x[0] == 'SPEAKER' or x[0] == 'CAMERA PICTURE QUALITY' or x[0] == 'PRICE' or x[0] == 'ADAPTER' or x[0] == 'ANDROID'or x[0] == 'CHIP'or x[0] == 'COLOR':
        newXR.append(x)

In [23]:
newXR

[('PHONE', 39),
 ('BATTERY LIFE', 39),
 ('CAMERA', 34),
 ('PERFORMANCE', 25),
 ('BATTERY', 21),
 ('BATTERY BACKUP', 13),
 ('DISPLAY', 12),
 ('CAMERA QUALITY', 9),
 ('FACE RECOGNITION', 8),
 ('PROCESSOR', 5),
 ('INTERFACE', 5),
 ('FACE DETECTION', 5),
 ('FACE UNLOCK', 3),
 ('PRICE', 3),
 ('SCREEN', 3),
 ('FRONT CAMERA', 2),
 ('ADAPTER', 2),
 ('BATTERY PERFORMANCE', 2),
 ('CHIP', 2),
 ('SPEAKER', 2),
 ('CAMERA PICTURE QUALITY', 2),
 ('ANDROID', 2)]

In [24]:
#For XR

outputAspectOpinionTuplesXR={}
orientationCacheXR={}
negativeWordSet = {"don't","never", "nothing", "nowhere", "noone", "none", "not",
              "hasn't","hadn't","can't","couldn't","shouldn't","won't",
              "wouldn't","don't","doesn't","didn't","isn't","aren't","ain't"}

for aspect,no in newXR:
    aspectTokens= word_tokenize(aspect)
    count=0
    for key,value in XRdata_dict.items():
        condition=True
        isNegativeSen=False
        for subWord in aspectTokens:
            if(subWord in str(value).upper()):
                condition = condition and True
            else:
                condition = condition and False
        if(condition):
            for negWord in negativeWordSet:
                if(not isNegativeSen):#once senetence is negative no need to check this condition again and again
                    if negWord.upper() in str(value).upper():
                        isNegativeSen=isNegativeSen or True
            outputAspectOpinionTuplesXR.setdefault(aspect,[0,0,0])
            for word,tag in value:
                 if(tag=='JJ' or tag=='JJR' or tag=='JJS'or tag== 'RB' or tag== 'RBR'or tag== 'RBS'):
                    count+=1
                    if(word not in orientationCacheXR):
                        orien=orientation(word)
                        orientationCacheXR[word]=orien
                    else:
                        orien=orientationCacheXR[word]
                    if(isNegativeSen and orien is not None):
                        orien= not orien
                    if(orien==True):
                        outputAspectOpinionTuplesXR[aspect][0]+=1
                    elif(orien==False):
                        outputAspectOpinionTuplesXR[aspect][1]+=1
                    elif(orien is None):
                        outputAspectOpinionTuplesXR[aspect][2]+=1
    if(count>0):
        outputAspectOpinionTuplesXR[aspect][0]=round((outputAspectOpinionTuplesXR[aspect][0]/count)*100,2)
        outputAspectOpinionTuplesXR[aspect][1]=round((outputAspectOpinionTuplesXR[aspect][1]/count)*100,2)
        outputAspectOpinionTuplesXR[aspect][2]=round((outputAspectOpinionTuplesXR[aspect][2]/count)*100,2)

XRDF = pd.DataFrame(outputAspectOpinionTuplesXR).T
XRDF.columns = ['Percentage Positive', 'Percentage Negative', 'Percentage Neutral']
XRDF.iloc[:,0:2]
      

Unnamed: 0,Percentage Positive,Percentage Negative
PHONE,31.33,11.66
BATTERY LIFE,37.0,7.95
CAMERA,37.4,9.54
PERFORMANCE,36.26,11.7
BATTERY,34.58,8.47
BATTERY BACKUP,40.0,10.0
DISPLAY,33.08,10.77
CAMERA QUALITY,46.27,6.72
FACE RECOGNITION,36.11,11.11
PROCESSOR,30.77,2.56
