In [1]:
import spacy
import pandas as pd
import numpy as np

In [2]:
text = "The battery life on this phone is really good."

In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
doc = nlp(text)

In [5]:
for word in doc:
    print(f"Text:{word.text}, POS:{word.pos_}")

Text:The, POS:DET
Text:battery, POS:NOUN
Text:life, POS:NOUN
Text:on, POS:ADP
Text:this, POS:DET
Text:phone, POS:NOUN
Text:is, POS:AUX
Text:really, POS:ADV
Text:good, POS:ADJ
Text:., POS:PUNCT


In [17]:
### Extract all the nouns used in a corpus of comment data
with open("../codes/data/samsung.txt","r",encoding='utf-8') as r:
    review_text = r.read()

In [18]:
review_text[0:300]

"I feel so LUCKY to have found this used (phone to us & not used hard at all), phone on line from someone who upgraded and sold this one. My Son liked his old one that finally fell apart after 2.5+ years and didn't want an upgrade!! Thank you Seller, we really appreciate it & your honesty re: said us"

In [19]:
reviews = review_text.split("\n")

In [20]:
reviews[0]

"I feel so LUCKY to have found this used (phone to us & not used hard at all), phone on line from someone who upgraded and sold this one. My Son liked his old one that finally fell apart after 2.5+ years and didn't want an upgrade!! Thank you Seller, we really appreciate it & your honesty re: said used phone.I recommend this seller very highly & would but from them again!!"

In [21]:
len(reviews)

46355

In [22]:
def print_pos(doc):
    for word in doc:
        print(f"Text:{word.text}, POS:{word.pos_}")

In [23]:
doc = nlp(reviews[0])

In [29]:
from tqdm import tqdm
nouns = []
for review in tqdm(reviews[0:10000]):
    doc = nlp(review)
    for word in doc:
        if word.pos_=="NOUN":
            nouns.append(word.lemma_)           

100%|████████████████████████████████| 10000/10000 [01:15<00:00, 132.99it/s]


## Reasons we are slow
- We are processing each word on a single thread, if we process on multiple threads
- We are also running predictions from models that we don't need (NER, Dependency Parse)

In [30]:
pd.Series(nouns).value_counts().head(10)

phone      10774
price       1174
battery      958
card         952
time         926
screen       903
camera       845
app          836
product      747
problem      671
dtype: int64

In [31]:
### Disable certain models in spacy
nlp = spacy.load("en_core_web_sm",disable = ['ner','parser'])

In [32]:
nouns = []
for review in tqdm(reviews[0:10000]):
    doc = nlp(review)
    for word in doc:
        if word.pos_=="NOUN":
            nouns.append(word.lemma_)    

100%|████████████████████████████████| 10000/10000 [00:32<00:00, 310.15it/s]


In [37]:
nouns = []
for doc in tqdm(nlp.pipe(reviews[0:10000],batch_size=1,n_process=-1),total=10000):
    for word in doc:
        if word.pos_=="NOUN":
            nouns.append(word.lemma_)

100%|████████████████████████████████| 10000/10000 [00:18<00:00, 551.05it/s]


In [38]:
nouns = []
for doc in tqdm(nlp.pipe(reviews,batch_size=1,n_process=-1),total=len(reviews)):
    for word in doc:
        if word.pos_=="NOUN":
            nouns.append(word.lemma_)

100%|████████████████████████████████| 46355/46355 [01:05<00:00, 705.03it/s]


In [40]:
pd.Series(nouns).value_counts().head(10)

phone      42945
battery     4261
product     3894
screen      3851
time        3817
card        3376
price       3142
problem     3137
camera      2918
app         2500
dtype: int64

In [41]:
### Now that we have the product features, we would want to know in which 
## context these features are being talked about?
"The battery life is bad"
"The battery life is awesome"
"The screen resolution is good"
### Find out the prefixes and suffixes to each keyword and then we will find out which are the most 
## freq prefixes or suffixes
# How will you find prefixes and suffixes (Hint: Read about regular expressions)

'The screen resolution is good'

In [46]:
"The battery life is awesome"
### Prefix<space>keyword<space>Suffix
## Symbols to represent a word
## Symbol to represent a <space>
import re
pattern_text = "\w+\sbattery\s\w+"
pattern = re.compile(pattern_text)

In [47]:
s1 = "The battery life is awesome"

In [52]:
re.findall(pattern,s1)[0].split(" ")

['The', 'battery', 'life']

In [53]:
re.findall(pattern,s1)[0].split(" ")[0]

'The'

In [54]:
re.findall(pattern,s1)[0].split(" ")[-1]

'life'

In [56]:
type(reviews)

list

In [58]:
triplets = re.findall(pattern,review_text)

In [59]:
triplets[0:4]

['that battery life',
 'The battery was',
 'great battery life',
 'removable battery or']

In [62]:
### For all the triplets extract the prefix and suffix in separate lists
prefixes = []
suffixes = []
for triplet in triplets:
    pre = triplet.split(" ")[0]
    sufix = triplet.split(" ")[-1]
    prefixes.append(pre.lower())
    suffixes.append(sufix.lower())

In [64]:
pd.Series(prefixes).value_counts().head()

the      1396
good      122
great      90
and        82
long       60
dtype: int64

In [65]:
pd.Series(suffixes).value_counts().head()

life     1052
is        210
and       146
lasts      83
was        66
dtype: int64

In [66]:
stopwords = ["a","about","above","after","again","against","ain","all","am","an","and","any","are","aren","aren't","as","at","be","because","been","before","being","below","between","both","but","by","can","couldn","couldn't","d","did","didn","didn't","do","does","doesn","doesn't","doing","don","don't","down","during","each","few","for","from","further","had","hadn","hadn't","has","hasn","hasn't","have","haven","haven't","having","he","her","here","hers","herself","him","himself","his","how","i","if","in","into","is","isn","isn't","it","it's","its","itself","just","ll","m","ma","me","mightn","mightn't","more","most","mustn","mustn't","my","myself","needn","needn't","no","nor","not","now","o","of","off","on","once","only","or","other","our","ours","ourselves","out","over","own","re","s","same","shan","shan't","she","she's","should","should've","shouldn","shouldn't","so","some","such","t","than","that","that'll","the","their","theirs","them","themselves","then","there","these","they","this","those","through","to","too","under","until","up","ve","very","was","wasn","wasn't","we","were","weren","weren't","what","when","where","which","while","who","whom","why","will","with","won","won't","wouldn","wouldn't","y","you","you'd","you'll","you're","you've","your","yours","yourself","yourselves","could","he'd","he'll","he's","here's","how's","i'd","i'll","i'm","i've","let's","ought","she'd","she'll","that's","there's","they'd","they'll","they're","they've","we'd","we'll","we're","we've","what's","when's","where's","who's","why's","would","able","abst","accordance","according","accordingly","across","act","actually","added","adj","affected","affecting","affects","afterwards","ah","almost","alone","along","already","also","although","always","among","amongst","announce","another","anybody","anyhow","anymore","anyone","anything","anyway","anyways","anywhere","apparently","approximately","arent","arise","around","aside","ask","asking","auth","available","away","awfully","b","back","became","become","becomes","becoming","beforehand","begin","beginning","beginnings","begins","behind","believe","beside","besides","beyond","biol","brief","briefly","c","ca","came","cannot","can't","cause","causes","certain","certainly","co","com","come","comes","contain","containing","contains","couldnt","date","different","done","downwards","due","e","ed","edu","effect","eg","eight","eighty","either","else","elsewhere","end","ending","enough","especially","et","etc","even","ever","every","everybody","everyone","everything","everywhere","ex","except","f","far","ff","fifth","first","five","fix","followed","following","follows","former","formerly","forth","found","four","furthermore","g","gave","get","gets","getting","give","given","gives","giving","go","goes","gone","got","gotten","h","happens","hardly","hed","hence","hereafter","hereby","herein","heres","hereupon","hes","hi","hid","hither","home","howbeit","however","hundred","id","ie","im","immediate","immediately","importance","important","inc","indeed","index","information","instead","invention","inward","itd","it'll","j","k","keep","keeps","kept","kg","km","know","known","knows","l","largely","last","lately","later","latter","latterly","least","less","lest","let","lets","like","liked","likely","line","little","'ll","look","looking","looks","ltd","made","mainly","make","makes","many","may","maybe","mean","means","meantime","meanwhile","merely","mg","might","million","miss","ml","moreover","mostly","mr","mrs","much","mug","must","n","na","name","namely","nay","nd","near","nearly","necessarily","necessary","need","needs","neither","never","nevertheless","new","next","nine","ninety","nobody","non","none","nonetheless","noone","normally","nos","noted","nothing","nowhere","obtain","obtained","obviously","often","oh","ok","okay","old","omitted","one","ones","onto","ord","others","otherwise","outside","overall","owing","p","page","pages","part","particular","particularly","past","per","perhaps","placed","please","plus","poorly","possible","possibly","potentially","pp","predominantly","present","previously","primarily","probably","promptly","proud","provides","put","q","que","quickly","quite","qv","r","ran","rather","rd","readily","really","recent","recently","ref","refs","regarding","regardless","regards","related","relatively","research","respectively","resulted","resulting","results","right","run","said","saw","say","saying","says","sec","section","see","seeing","seem","seemed","seeming","seems","seen","self","selves","sent","seven","several","shall","shed","shes","show","showed","shown","showns","shows","significant","significantly","similar","similarly","since","six","slightly","somebody","somehow","someone","somethan","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specifically","specified","specify","specifying","still","stop","strongly","sub","substantially","successfully","sufficiently","suggest","sup","sure","take","taken","taking","tell","tends","th","thank","thanks","thanx","thats","that've","thence","thereafter","thereby","thered","therefore","therein","there'll","thereof","therere","theres","thereto","thereupon","there've","theyd","theyre","think","thou","though","thoughh","thousand","throug","throughout","thru","thus","til","tip","together","took","toward","towards","tried","tries","truly","try","trying","ts","twice","two","u","un","unfortunately","unless","unlike","unlikely","unto","upon","ups","us","use","used","useful","usefully","usefulness","uses","using","usually","v","value","various","'ve","via","viz","vol","vols","vs","w","want","wants","wasnt","way","wed","welcome","went","werent","whatever","what'll","whats","whence","whenever","whereafter","whereas","whereby","wherein","wheres","whereupon","wherever","whether","whim","whither","whod","whoever","whole","who'll","whomever","whos","whose","widely","willing","wish","within","without","wont","words","world","wouldnt","www","x","yes","yet","youd","youre","z","zero","a's","ain't","allow","allows","apart","appear","appreciate","appropriate","associated","best","better","c'mon","c's","cant","changes","clearly","concerning","consequently","consider","considering","corresponding","course","currently","definitely","described","despite","entirely","exactly","example","going","greetings","hello","help","hopefully","ignored","inasmuch","indicate","indicated","indicates","inner","insofar","it'd","keep","keeps","novel","presumably","reasonably","second","secondly","sensible","serious","seriously","sure","t's","third","thorough","thoroughly","three","well","wonder"]

In [67]:
prefixes = [w for w in prefixes if w not in stopwords]
suffixes = [w for w in suffixes if w not in stopwords]

In [73]:
pd.Series(prefixes).value_counts().head(5).index.tolist()

['good', 'great', 'long', 'removable', 'replaceable']

In [69]:
pd.Series(suffixes).value_counts().head(5)

life      1052
lasts       83
runs        31
drains      28
charge      21
dtype: int64

In [71]:
keyword = "screen"
f"\w+\s{keyword}\s\w+"

'\\w+\\sscreen\\s\\w+'

In [84]:
def get_prefix_suffix(keyword,review_text,stopwords,k=5):
    review_text = review_text.replace("\n"," ")
    pattern = re.compile(f'\w+\s{keyword}\s\w+')
    triplets = re.findall(pattern,review_text)
    prefixes = [i.split(" ")[0].lower() for i in triplets]
    suffixes = [i.split(" ")[-1].lower() for i in triplets]
    prefixes = [w for w in prefixes if w not in stopwords]
    suffixes = [w for w in suffixes if w not in stopwords]
    p = pd.Series(prefixes).value_counts().head(k).index.tolist()
    s = pd.Series(suffixes).value_counts().head(k).index.tolist()
    r = pd.DataFrame({'prefix':p,'suffix':s})
    r['kw'] = keyword
    return r[['prefix','kw','suffix']]
    
    

In [81]:
get_prefix_suffix("screen",review_text,stopwords,k=5)

Unnamed: 0,prefix,kw,suffix
0,touch,screen,protector
1,big,screen,size
2,great,screen,resolution
3,large,screen,protectors
4,bigger,screen,quality


In [82]:
get_prefix_suffix("battery",review_text,stopwords,k=5)

Unnamed: 0,prefix,kw,suffix
0,good,battery,life
1,great,battery,lasts
2,long,battery,runs
3,removable,battery,drains
4,replaceable,battery,charge


In [85]:
get_prefix_suffix("product",review_text,stopwords,k=5)

Unnamed: 0,prefix,kw,suffix
0,good,product,arrived
1,excellent,product,excellent
2,great,product,good
3,excelent,product,great
4,quality,product,works


In [86]:
def get_nouns(reviews):
    nouns = []
    for review in tqdm(reviews):
        doc = nlp(review)
        for word in doc:
            if word.pos_=="NOUN":
                nouns.append(word.lemma_)
    return pd.Series(nouns).value_counts().head(5).index.tolist()

In [88]:
R = ["Sold kidney bought this, now not feeling well but the number of days I am alive with one kidney will enjoy using this phone. Guys be careful if you rich it’s ok else sell something else but not kidney it hurts",
    '''Worst battery performance.
Iphone 11 is far better den this..
In 4 hour battery will come down from 100 to 15 percent.
Please dont buy this product at this price.''',
    '''Another beauty from Apple. I was upgraded from iPhone X and it was great experience with all the latest specs and flawlessly smooth. Thanks to all new A14 Bionic chip.
Build quality was great as always from Apple.
Phone feels light on hands due to aluminium body. I chose Blue colour because wanted to go for different colour than routine Black and white iPhones so initially I was bit skeptical but after seeing the phone in person I’m happy with the blue colour “ not bad at all and not too flashy”
Grip and feel perfect. Phone comes with iOS 14.1. Camera quality was excellent and sufficient as a normal user.
If you are from iOS ecosystem nothing will go wrong here.
There is nothing to question about the quality and interface from Apple ( it’s my personal opinion as a long time apple user)
As you all know iPhone 12 won’t comes with earphones and charging brick( adapter)
I bought separately 20w apple adapter from new Apple India online store for Rs.1900
I don’t judge here apple because of accessories removal and the price tag. I hope apple can justify this.
I only hope and wanted to see so badly Made in India IPhones so that we all can enjoy lower priced ( at least a better priced) iPhones in the future.
A big thanks to Appario Retail Pvt. Ltd and Amazon for delivering genuine iPhone as always in perfect condition. Delivered way before the promised date.
Update: (01/12/2020) After testing 25 days especially on battery life of this iPhone 12, results were amazing and I’m completely satisfied with the battery performance. On Moderate gaming, Moderate Video Content, heavy on calls and heavy on browsing this iPhone 12 battery life is sufficient for your regular Day.
My conclusion is definitely a better and improved battery performer than the previous iPhone model.
Thank you Apple India
Thank you Appario Retail Pvt. Ltd.
Thank you Amazon India.
( Note: charger brick was shown in my video was bought myself from Apple India online store because it won’t comes with the phone)''',
    '''This was my first switch to an ios device after using an android untill now. And I am not disappointed. The build quality, the responsive touch, the display, the camera, the battery life are just exceptional.
Been using for about 10 days now, and consistently giving around 7h of screen on time.
The camera is just way too good. Espeecially the night mode impressed me a lot.
Only issue when switching from Android to iPhone I encountered was to be not able to restore my whatsapp chats. There are no official/free ways for doing this.
The phone is simple amazing.''',
    '''xtremely disappointed with this phone. It’s a seemingly average phone which is over priced by 4 to 5 times at least. This phone is a joke in comparison with any Samsung flagship phone. I am shocked that so many fools like me are falling for apple’s trickery and gimmickry.

The battery doesn’t last half a day. Screen size is average. Thickness of this phone will constantly remind you that this phone belongs to another era. 64gb space won’t be enough for anyone.

And then there are other apple quirks - won’t come with a SD card, won’t come with a charging brick or a set of head phones. Your headphones with the regular audio pin won’t work with this one. Face ID won’t work with masks on. So your forced to remove your masks when you’re out and vulnerable. This phone won’t allow you set google maps as default. So you continue to suffer using their apple map which I bet even Tim Cook doesn’t use. You won’t be able to set google chrome as your default browser as iOS won’t let you.

Overall the phone is overpriced and overrated. Anybody with half the wit will figure that this phone is a joke and a mere marketing gimmick. An ordinary phone like this shouldn’t cost you more than 12 to 14K.

Apple is going down!

Amazon won’t take this phone back as they have a 10 day return policy. So I am pretty much stuck with this damn excuse for a phone till I get to upgrade to a Samsung phone.''',
    '''If you are planning to buy an iphone then please buy from stores only.
I got a defective product and when I complain about it to amazon customer care and requested for replacement they simply deny to replace it.
Don't waste your money.''',
    '''Hello there,
I am writing this review after 3-4 days of usage, Iphone 12 is a beast in terms of:-
Memory
Build quality
Camera
Processor
Display
Except battery there’s rumour going on that iPhone 12 battery is bad or worst, so i am here to clarify all your doubts,
iPhone battery will last easily a day but not more than that and here’s some condition that you should aware about
1. If you are using your phone on wifi than it will last a day easily without any problem.
2. If you are on 4g than it will not last a day for example, if you fully charged your phone at 8 am than it will last till 8 pm not more than that and this happen if you’re on 4g continuously otherwise it will last about 10 pm.
Here’s some tip you should do after buying iPhone 12, watch video on YouTube to know “How to save battery on iPhone 12” this will help you increase your battery life as well battery health.
For my experience, i think you should go for this device without any second thoughts & those who don’t have any tech knowledge and thinking of buying this then I recommend you to go for android because iOS has complicate settings than android...you will regret after buying it.

Got original product Btw!!!!!''']

In [None]:
get_nouns