In [0]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import nltk
from nltk.stem import WordNetLemmatizer 
from nltk.tokenize import word_tokenize 

In [0]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [3]:
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

## Load Data

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
data = pd.read_csv('/content/drive/My Drive/Algoscale/NLP/Data/amazon_phone_dataset.csv')

In [6]:
data.head()

Unnamed: 0,Product_name,by_info,Product_url,Product_img,Product_price,rating,total_review,ans_ask,prod_des,feature,cust_review
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",Samsung,https://www.amazon.in/Samsung-Galaxy-Ocean-Blu...,https://images-na.ssl-images-amazon.com/images...,"₹ 7,990.00",4.0 out of 5 stars,"7,353 customer reviews",1000+ answered questions,The Samsung Galaxy M10 is especially created f...,13MP+5MP ultra-wide angle dual camera | 5MP f2...,"Well, I was a bit sceptical before buying this..."
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)",Mi,https://www.amazon.in/Redmi-Pro-Black-64GB-Sto...,https://images-na.ssl-images-amazon.com/images...,,4.1 out of 5 stars,"32,250 customer reviews",1000+ answered questions,"Qualcomm Snapdragon 625, 2.0 GHz processor wit...",12MP+5MP dual rear camera | 5MP front facing c...,"Display quality is top notch, overall the qual..."
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...",Coolpad,https://www.amazon.in/Coolpad-Cool-Plus-Ocean-...,https://images-na.ssl-images-amazon.com/images...,"₹ 5,999.00",3.1 out of 5 stars,76 customer reviews,69 answered questions,"Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","13MP primary camera with bokeh mode, timelapse...",Low bagget high kwality***It's not good Phone ...
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)",Mi,https://www.amazon.in/Redmi-Pro-Black-32GB-Sto...,https://images-na.ssl-images-amazon.com/images...,,4.1 out of 5 stars,"32,250 customer reviews",1000+ answered questions,"Qualcomm Snapdragon 625, 2.0 GHz processor wit...",12MP+5MP dual rear camera | 5MP front facing c...,"Display quality is top notch, overall the qual..."
4,Nokia 105 (Black),Nokia,https://www.amazon.in/Nokia-105-Black/dp/B0745...,https://images-na.ssl-images-amazon.com/images...,,4.1 out of 5 stars,"6,474 customer reviews",1000+ answered questions,The design Shaped for your palm Featuring a cu...,4.572 centimeters (1.8-inch) display with 240 ...,Using the mobile phone for last 3 months. I am...


In [0]:
data = pd.DataFrame({'Product_name':data['Product_name'],'prod_des':data['prod_des']}).dropna().reset_index(drop = True)

In [8]:
data

Unnamed: 0,Product_name,prod_des
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit..."
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H..."
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit..."
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...
...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see..."


#1. Search Appropriate Keywords
Search engine appropriate keywords can be:</br>
a. Specific : eg. Samsung Galaxy, Flip cover, Redmi note 3, etc. i.e. proper nouns</br>
b. General :Features such as: Smartphone, earphone, tangle free earphone (nouns other than proper)

Task: To extract search engine appropriate keywords for a given product description

## 1. POS tagging approach

In [0]:
def extract_noun(doc):
  return ([i for i in nlp(doc) if i.pos_=='NOUN'],[i for i in nlp(doc) if i.pos_=='PROPN'])

In [0]:
data['noun_keywords'] =  data['prod_des'].apply(extract_noun)

In [0]:
df_noun_out = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'keywords':data['noun_keywords']})

In [14]:
df_noun_out.head()

Unnamed: 0,Product_name,prod_des,keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"([millennials, edge, infinity, V, display, ang..."
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","([GHz, processor, nm, architecture, battery, c..."
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","([HD, dewdrop, display, GB, ROM, upto, sensor,..."
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","([GHz, processor, nm, architecture, battery, c..."
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"([design, palm, body, island, layout, dialling..."


In [15]:
df_noun_out.tail()

Unnamed: 0,Product_name,prod_des,keywords
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"([], [TokyoTon, Mobile, Battery, Moto, M, XT16..."
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"([USB], [Fast, Charging, Nylon, Braided, LED, ..."
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"([Product, Metal, plastic, tools, scrapers, sc..."
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","([folio, complement, edge, calls, alarms, even..."
3413,TheGiftKart Full Body 3 in 1 Slim Fit 360 Degr...,"DESCRIPTION:br>Beautiful design, elegant appea...","([DESCRIPTION, design, appearance, protection,..."


In [0]:
df_noun_out.to_csv('df_pos_output.csv',index = False)

## 2. POS + TF-IDF approach

In [0]:
def tuple_to_list(t):
  l1,l2 = t
  l1.extend(l2)
  return [str(i) for i in l1]

In [0]:
data['keywords'] = df_noun_out['keywords'].apply(tuple_to_list)

In [19]:
data['keywords']

0       [millennials, edge, infinity, V, display, angl...
1       [GHz, processor, nm, architecture, battery, ca...
2       [HD, dewdrop, display, GB, ROM, upto, sensor, ...
3       [GHz, processor, nm, architecture, battery, ca...
4       [design, palm, body, island, layout, dialling,...
                              ...                        
3409    [TokyoTon, Mobile, Battery, Moto, M, XT1662, X...
3410    [USB, Fast, Charging, Nylon, Braided, LED, Cab...
3411    [Product, Metal, plastic, tools, scrapers, scr...
3412    [folio, complement, edge, calls, alarms, event...
3413    [DESCRIPTION, design, appearance, protection, ...
Name: keywords, Length: 3414, dtype: object

In [0]:
df = [' '.join(i) for i in data['keywords']] 

### Trial 1:

In [24]:
vectorizer1 = TfidfVectorizer(df,lowercase = True, analyzer='word', stop_words='english', min_df = 0.1,max_df = 0.9)
tfidfmat1 = vectorizer1.fit_transform(df)
print("TFIDF shape: ", tfidfmat1.shape)
print("Terms in TFIDF: ",vectorizer1.get_feature_names())

TFIDF shape:  (3414, 27)
Terms in TFIDF:  ['access', 'android', 'battery', 'buttons', 'camera', 'case', 'cover', 'design', 'device', 'display', 'experience', 'features', 'gb', 'material', 'mobile', 'music', 'phone', 'phones', 'power', 'product', 'protection', 'quality', 'scratches', 'screen', 'smartphone', 'technology', 'time']


### Trial 1.2 : Top 5

#### simple topn 

In [0]:
def topn(tfidfmat,n):
  lst = []
  lst = [tfidfmat.todense()[i].argsort()[:,-n:] for i in range(tfidfmat.shape[0])]
  return lst

In [0]:
def get_topn_multi_keywords(tfidfmat,n,terms):
  topn_lst_indices = topn(tfidfmat,n)
  topn_lst_indices = np.array(topn_lst_indices).reshape((tfidfmat.shape[0],n))
  doc_keywords = [[terms[i] for i in l] for l in topn_lst_indices]
  return doc_keywords

In [27]:
doc_keywords = get_topn_multi_keywords(tfidfmat1,3,terms)
df_tfidf_out1 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'multi_keywords':doc_keywords})
df_tfidf_out1

Unnamed: 0,Product_name,prod_des,multi_keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[camera, display, smartphone]"
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[android, camera, gb]"
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[display, android, gb]"
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[android, camera, gb]"
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[quality, battery, time]"
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[time, battery, mobile]"
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[android, gb, time]"
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[product, phones, cover]"
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[features, access, cover]"


### Trial 2: TFIDF + POS tagging on Preprocessed

In [0]:
def preprocessing(s):
  s = re.sub('[!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]','',s)
  s = re.sub(r"http\S+",'', s)  
  d = nlp(s)
  lemmas = [token.lemma_ for token in d if not token.is_stop]
  return lemmas

In [0]:
pre_df = [preprocessing(i) for i in df]

In [31]:
pre_df[0]

['millennial',
 'edge',
 'infinity',
 'V',
 'display',
 'angle',
 'camera',
 'processor',
 'smartphone',
 'Samsung',
 'Galaxy',
 'M10',
 'Galaxy',
 'M10']

In [0]:
docs = [' '.join(i) for i in pre_df]

In [34]:
vectorizer2 = TfidfVectorizer(docs,lowercase = True, min_df = 0.05,max_df = 0.95)
tfidfmat2 = vectorizer2.fit_transform(docs)
tfidfmat2.shape

(3414, 116)

In [36]:
vectorizer2.get_feature_names()[:5]

['access', 'android', 'audio', 'bass', 'battery']

In [37]:
max_ind = [np.argmax(i) for i in tfidfmat2.todense()]
terms2 = vectorizer2.get_feature_names()
doc_keywords = [terms2[i] for i in max_ind]
df_tfidf_out2 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'keywords':doc_keywords})
df_tfidf_out2

Unnamed: 0,Product_name,prod_des,keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,galaxy
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...",mp
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...",gb
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...",mp
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,day
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,mobile
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,iphone
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,cover
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...",view


In [39]:
doc_keywords = get_topn_multi_keywords(tfidfmat2,3,terms2)
df_tfidf_out2 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'multi_keywords':doc_keywords})
df_tfidf_out2

Unnamed: 0,Product_name,prod_des,multi_keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[processor, edge, galaxy]"
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[flash, mode, mp]"
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[hd, sensor, gb]"
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[flash, mode, mp]"
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[color, hour, day]"
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[wireless, battery, mobile]"
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[cable, usb, iphone]"
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[weight, iphone, cover]"
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[samsung, galaxy, view]"


### Trial3

In [40]:
vectorizer3 = TfidfVectorizer(docs,lowercase = True, min_df = 0.1,max_df = 0.9)
tfidfmat3 = vectorizer3.fit_transform(docs)
tfidfmat3.shape

(3414, 32)

In [41]:
max_ind = [np.argmax(i) for i in tfidfmat3.todense()]
terms3 = vectorizer3.get_feature_names()
doc_keywords = [terms3[i] for i in max_ind]
df_tfidf_out3 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'keywords':doc_keywords})
df_tfidf_out3

Unnamed: 0,Product_name,prod_des,keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,smartphone
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...",gb
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...",gb
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...",gb
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,time
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,mobile
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,access
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,cover
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...",cover


### Trial 3.2

In [42]:
doc_multi_keywords = get_topn_multi_keywords(tfidfmat3,3,terms3)
df_tfidf_out3 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'keywords':doc_multi_keywords})
df_tfidf_out3

Unnamed: 0,Product_name,prod_des,keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[camera, display, smartphone]"
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[android, camera, gb]"
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[display, android, gb]"
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[android, camera, gb]"
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[quality, battery, time]"
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[video, battery, mobile]"
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[android, time, video]"
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[product, phone, cover]"
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[screen, access, cover]"


## 3. Final Approach: POS+ TFIDF+ ngram with preprocessing

In [44]:
vectorizer4 = TfidfVectorizer(docs,lowercase = True, min_df = 0.05,max_df = 0.95,ngram_range=(1,2))
tfidfmat4 = vectorizer4.fit_transform(docs)
tfidfmat4.shape

(3414, 121)

In [0]:
terms4 =  vectorizer4.get_feature_names()
doc_multi_keywords4 = get_topn_multi_keywords(tfidfmat4,3,terms4)

In [47]:
terms4[:10]

['access',
 'android',
 'audio',
 'bass',
 'battery',
 'bluetooth',
 'brand',
 'button',
 'cable',
 'call']

In [48]:
df_tfidf_multi_out4 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'multi_keywords':doc_multi_keywords4})
df_tfidf_multi_out4

Unnamed: 0,Product_name,prod_des,multi_keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[edge, samsung galaxy, galaxy]"
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[flash, mode, mp]"
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[hd, sensor, gb]"
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[flash, mode, mp]"
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[color, hour, day]"
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[wireless, battery, mobile]"
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[cable, usb, iphone]"
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[weight, iphone, cover]"
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[samsung, galaxy, view]"


### Trial 3.2

In [49]:
vectorizer5 = TfidfVectorizer(docs,lowercase = True, min_df = 0.02,max_df = 0.95,ngram_range=(1,2))
tfidfmat5 = vectorizer5.fit_transform(docs)
tfidfmat5.shape

(3414, 373)

In [0]:
terms5 =  vectorizer5.get_feature_names()
doc_multi_keywords5 = get_topn_multi_keywords(tfidfmat5,3,terms5)

In [51]:
df_tfidf_multi_out5 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'multi_keywords':doc_multi_keywords5})
df_tfidf_multi_out5

Unnamed: 0,Product_name,prod_des,multi_keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[samsung galaxy, angle, galaxy]"
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[mode, portrait, mp]"
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[upto, id, gb]"
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[mode, portrait, mp]"
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[talk, companion, nokia]"
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[year, battery, mobile]"
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[led, ipad, charging]"
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[phone cover, cover phone, cover]"
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[flip cover, galaxy, view]"


In [0]:
def get_tuned_keywords(docs,min_df,max_df,ngram_range,no_of_keywords):
  vectorizer5 = TfidfVectorizer(docs,lowercase = True, min_df =min_df,max_df = max_df,ngram_range=ngram_range)
  tfidfmat5 = vectorizer5.fit_transform(docs)
  print("Tfidf shape:",tfidfmat5.shape)
  terms5 =  vectorizer5.get_feature_names()
  print(terms5)
  doc_multi_keywords5 = get_topn_multi_keywords(tfidfmat5,no_of_keywords,terms5)
  df_tfidf_multi_out5 = pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'multi_keywords':doc_multi_keywords5})
  return(df_tfidf_multi_out5)

In [53]:
final_out_task1_df = get_tuned_keywords(docs,0.02,0.95,(1,3),7)
final_out_task1_df

Tfidf shape: (3414, 374)
['absorption', 'accelerometer', 'access', 'access button', 'access port', 'accessory', 'adapter', 'ai', 'air', 'alarm', 'android', 'angle', 'anti', 'app', 'appearance', 'apple', 'armor', 'audio', 'auto', 'band', 'bass', 'battery', 'battery capacity', 'battery life', 'beauty', 'black', 'bluetooth', 'body', 'box', 'brand', 'bump', 'bumper', 'button', 'button camera', 'button port', 'cable', 'call', 'camera', 'camera camera', 'camera flash', 'camera speaker', 'capacity', 'car', 'card', 'case', 'case case', 'case cover', 'case phone', 'case protection', 'case quality', 'cases', 'cell', 'cell phone', 'charge', 'charger', 'charging', 'choice', 'clarity', 'class', 'clip', 'cm', 'coating', 'color', 'colour', 'comfort', 'companion', 'compatibility', 'connectivity', 'connector', 'construction', 'control', 'convenience', 'core', 'core processor', 'corner', 'cover', 'cover case', 'cover phone', 'crystal', 'customer', 'cut', 'cut out', 'cutout', 'damage', 'day', 'degree', '

Unnamed: 0,Product_name,prod_des,multi_keywords
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[smartphone, samsung, processor, edge, samsung..."
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[proximity sensor, oreo, gb, flash, mode, port..."
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[fingerprint, core processor, ram gb, gb ram g..."
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[proximity sensor, oreo, gb, flash, mode, port..."
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[shell, month, moment, standby, talk, companio..."
...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[finish, fingerprint, finger, frequency, year,..."
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[year, cable, usb, iphone, led, ipad, charging]"
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[iphone, plastic, metal, damage, phone cover, ..."
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[flip, edge, cover, samsung, flip cover, galax..."


# Key Selling Points
1. Key selling points can be treated as general, but quantified features eg. 1.3ghz processor, bezel less screen, 24 hr battery backup, octa core processor


In [54]:
i=1
print("Original PDP:\n",data['prod_des'][i])
print("After POS:\n",docs[i])
print("Final Keywords:\n",final_out_task1_df['multi_keywords'][i])

Original PDP:
 Qualcomm Snapdragon 625, 2.0 GHz processor with 14nm architecture. 4000mAh battery capacity. 14.83 cm (5.84") FHD+ (1080x2280) Display,. 4GB + 64GB Flash Memory. Stock Android Oreo 8.1. 12 MP + 5 MP dual rear camera with portrait mode PDAF, HDR, LED flash. 5 MP front camera with portrait mode. Proximity sensor, E compass, Gyroscope, Accelerometer, IR Blaster.
After POS:
 GHz processor nm architecture battery capacity cm camera portrait mode flash camera portrait mode Proximity sensor e compass Qualcomm Snapdragon FHD 1080x2280 Display GB GB Flash Memory Stock Android Oreo MP MP PDAF HDR LED MP Gyroscope Accelerometer IR Blaster
Final Keywords:
 ['proximity sensor', 'oreo', 'gb', 'flash', 'mode', 'portrait', 'mp']


In [55]:
i = 0
print("Original PDP:\n",data['prod_des'][i]+"\n")
doc = nlp(data['prod_des'][i])
for chunk in doc.noun_chunks:
    print("ct:"+chunk.text+"\ncrt:"+ chunk.root.text+"\ncrd:"+ chunk.root.dep_+
            "\ncrh:"+chunk.root.head.text+"\n\n")

Original PDP:
 The Samsung Galaxy M10 is especially created for the millennials who live on the edge and do not compromise on anything. With a beautiful infinity V-cut display, a wide angle camera and a powerful processor, the Galaxy M10 is a powerful smartphone.

ct:The Samsung Galaxy M10
crt:M10
crd:nsubjpass
crh:created


ct:the millennials
crt:millennials
crd:pobj
crh:for


ct:who
crt:who
crd:nsubj
crh:live


ct:the edge
crt:edge
crd:pobj
crh:on


ct:anything
crt:anything
crd:pobj
crh:on


ct:a beautiful infinity V-cut display
crt:display
crd:pobj
crh:With


ct:the Galaxy M10
crt:M10
crd:nsubj
crh:is


ct:a powerful smartphone
crt:smartphone
crd:attr
crh:is




In [56]:
chunk_text = [i.text.lower() for i in doc.noun_chunks]
chunk_root_text = [i.root.text.lower() for i in doc.noun_chunks]
print(chunk_text)
print(chunk_root_text)
l = chunk_text
lst_kw = final_out_task1_df['multi_keywords'][1]
print(lst_kw)

['the samsung galaxy m10', 'the millennials', 'who', 'the edge', 'anything', 'a beautiful infinity v-cut display', 'the galaxy m10', 'a powerful smartphone']
['m10', 'millennials', 'who', 'edge', 'anything', 'display', 'm10', 'smartphone']
['proximity sensor', 'oreo', 'gb', 'flash', 'mode', 'portrait', 'mp']


In [0]:
def match(lst_kw,lst_crt_ct):
  l = []
  for i in lst_kw:
    for j in lst_crt_ct:
      if i in j.split():
        #print(i,":",j)
        l.append(j)
  return list(set(l))

In [0]:
prod_des_lst = [i for i in final_out_task1_df['prod_des']]
def extract_ksp(col1,col2):
  lst_keywords = col1
  prod_des = col2
  doc = nlp(prod_des)
  lst_chunk_text = [i.text.lower() for i in doc.noun_chunks]
  lst_chunk_text.extend([i.root.text.lower() for i in doc.noun_chunks])
  return(match(lst_keywords,lst_chunk_text))

In [0]:
out = final_out_task1_df.apply(lambda x: extract_ksp(x.multi_keywords,x.prod_des),axis = 1)

In [0]:
df_ksp= pd.DataFrame(data = {'Product_name': data['Product_name'],'prod_des':data['prod_des'],'search_engine_appt_keywords':final_out_task1_df['multi_keywords'],'key_selling_points':out})
df_ksp.to_csv('final_out.csv',index=False)

In [73]:
df_ksp

Unnamed: 0,Product_name,prod_des,search_engine_appt_keywords,key_selling_points
0,"Samsung Galaxy M10 (Ocean Blue, 3+32GB)",The Samsung Galaxy M10 is especially created f...,"[smartphone, samsung, processor, edge, samsung...","[the samsung galaxy m10, the edge, smartphone,..."
1,"Redmi 6 Pro (Black, 4GB RAM, 64GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[proximity sensor, oreo, gb, flash, mode, port...","[64gb flash memory, led flash, 12 mp, 5 mp dua..."
2,"Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB...","Coolpad Cool 3 Plus-Designed for all, 5.71'' H...","[fingerprint, core processor, ram gb, gb ram g...","[expandable upto 128gb, fingerprint sensor, gr..."
3,"Redmi 6 Pro (Black, 3GB RAM, 32GB Storage)","Qualcomm Snapdragon 625, 2.0 GHz processor wit...","[proximity sensor, oreo, gb, flash, mode, port...","[12 mp, led flash, 5 mp dual rear camera, stoc..."
4,Nokia 105 (Black),The design Shaped for your palm Featuring a cu...,"[shell, month, moment, standby, talk, companio...","[your everyday companion, its polycarbonate sh..."
...,...,...,...,...
3409,TokyoTon Mobile Battery for Moto M XT1662 XT16...,TokyoTon Mobile Battery for Moto M XT1662 XT16...,"[finish, fingerprint, finger, frequency, year,...","[battery, tokyoton mobile battery]"
3410,AM Safe x Cable for iPhone LED Fast Charging D...,Fast Charging Nylon Braided USB LED Cable for ...,"[year, cable, usb, iphone, led, ipad, charging]","[ipad, iphone, ipad, usb led cable, fast charg..."
3411,Ktrack Metal and Plastic Open Pry Screwdriver ...,Product: Tool Metal and plastic. 8 in 1 tools ...,"[iphone, plastic, metal, damage, phone cover, ...","[the phone cover, iphone, metal, tool metal, d..."
3412,Samsung S-View Flip Cover for Samsung Galaxy S...,"The S-View Flip Cover, Clear allows you to see...","[flip, edge, cover, samsung, flip cover, galax...","[the samsung s-view flip cover, the samsung ga..."


In [74]:
i=2
print("Product_name\n",df_ksp['Product_name'][i])
print("Original PDP:\n",data['prod_des'][i])
print("After POS:\n",docs[i])
print("Search Engine Apppropriate Keywords:\n",final_out_task1_df['multi_keywords'][i])
print("Key selling Points:\n",df_ksp['key_selling_points'][i])

Product_name
 Coolpad Cool 3 Plus (Ocean Blue, 2GB RAM, 16GB Storage)
Original PDP:
 Coolpad Cool 3 Plus-Designed for all, 5.71'' HD dewdrop display, 2GB RAM, 16GB ROM with expandable upto 128GB, Fingerprint sensor, Faceunlock, Gradient ID, Helio A22 Quad Core Processor, Android Pie,OTG Support.
After POS:
 hd dewdrop display GB rom upto sensor support Coolpad Cool GB RAM GB Fingerprint Faceunlock Gradient ID Helio A22 Quad Core Processor Android Pie OTG
Search Engine Apppropriate Keywords:
 ['fingerprint', 'core processor', 'ram gb', 'gb ram gb', 'upto', 'id', 'gb']
Key selling Points:
 ['expandable upto 128gb', 'fingerprint sensor', 'gradient id', 'id', 'gb']
