In [1]:
!pip install textblob
!python -m textblob.download_corpora

Defaulting to user installation because normal site-packages is not writeable
Finished.


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\dheeraj\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\dheeraj\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\dheeraj\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\dheeraj\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\dheeraj\AppData\Roaming\nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\dheeraj\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_re

## Project 4: Auto Keyword Extraction from Articles Text Using TextBlob

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv(r"C:\Users\dheeraj\Downloads\NLP-Project\redmi6.csv", encoding='ISO-8859-1')
df = df[['Customer name','Comments']]
df.head()

Unnamed: 0,Customer name,Comments
0,Rishikumar Thakur,Another Midrange killer Smartphone by Xiaomi\n...
1,Raza ji,All ok but vry small size mobile
2,Vaibhav Patel,Quite good
3,Amazon Customer,Redmi has always have been the the king of bud...
4,Sudhakaran Wadakkancheri,worst product from MI. I am a hardcore fan of ...


## clean text

In [6]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re

stopwords = set(stopwords.words('english'))

In [7]:
def clean(text):
    # lower casing
    text = text.lower()
    
    # removing punctuations, special chars etc
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    
    # tokinzation
    text = word_tokenize(text)
    
    # stopwords removing
    text = [w for w in text if w not in stopwords]
    return " ".join(text)

In [8]:
text = "Machine! learning enables 890808 @#$#@$#@ computers to? learn from data without being explicitly programmed."

clean(text)

'machine learning enables 890808 computers learn data without explicitly programmed'

In [9]:
df['cleaned_comments'] = df['Comments'].apply(clean)

df

Unnamed: 0,Customer name,Comments,cleaned_comments
0,Rishikumar Thakur,Another Midrange killer Smartphone by Xiaomi\n...,another midrange killer smartphone xiaomi majo...
1,Raza ji,All ok but vry small size mobile,ok vry small size mobile
2,Vaibhav Patel,Quite good,quite good
3,Amazon Customer,Redmi has always have been the the king of bud...,redmi always king budget segmentand yet anothe...
4,Sudhakaran Wadakkancheri,worst product from MI. I am a hardcore fan of ...,worst product mi hardcore fan mi one really di...
...,...,...,...
275,Rahul,"I like This Phone, Awesome look and design.\nI...",like phone awesome look design im using phone
276,Sunil Soni,Product is avasome but invoice is note include...,product avasome invoice note include
277,D.C.Padhi,"Redmi Note4, Note5, now 6pro..It seems the old...",redmi note4 note5 6proit seems older model bet...
278,Mahesh,I love mi,love mi


Explanation (bellow code)

Nouns: Captures both common and proper nouns (singular and plural).

Adjectives: Includes comparative (JJR) and superlative (JJS) forms in addition to base adjectives (JJ).

Verbs: Includes various forms (base, past, gerund, participle, etc.).

Adverbs: Includes comparative (RBR) and superlative (RBS) adverbs as well as standard adverbs (RB).

In [10]:
from textblob import TextBlob
from collections import Counter


def get_keywords(text):
    # Create a TextBlob object
    blob = TextBlob(text)

    # Extract a broader range of keywords based on POS tagging
    keywords = [word for word, tag in blob.tags if tag in ('NN', 'NNS', 'NNP', 'NNPS',  # Nouns
                                                           'JJ', 'JJR', 'JJS',          # Adjectives
                                                           'RB', 'RBR', 'RBS')]         # Adverbs

    # Count the most common keywords
    keyword_counts = Counter(keywords)
    most_common_keywords = keyword_counts.most_common(5)  # Get the top 5 most common keywords
    
    return most_common_keywords

In [11]:
df['keywords'] = df['cleaned_comments'].apply(get_keywords)
df

Unnamed: 0,Customer name,Comments,cleaned_comments,keywords
0,Rishikumar Thakur,Another Midrange killer Smartphone by Xiaomi\n...,another midrange killer smartphone xiaomi majo...,"[(pro, 10), (phone, 9), (xiaomi, 8), (redmi, 8..."
1,Raza ji,All ok but vry small size mobile,ok vry small size mobile,"[(ok, 1), (vry, 1), (small, 1), (size, 1), (mo..."
2,Vaibhav Patel,Quite good,quite good,"[(quite, 1), (good, 1)]"
3,Amazon Customer,Redmi has always have been the the king of bud...,redmi always king budget segmentand yet anothe...,"[(redmi, 2), (useage, 2), (give, 2), (always, ..."
4,Sudhakaran Wadakkancheri,worst product from MI. I am a hardcore fan of ...,worst product mi hardcore fan mi one really di...,"[(worst, 1), (product, 1), (mi, 1), (hardcore,..."
...,...,...,...,...
275,Rahul,"I like This Phone, Awesome look and design.\nI...",like phone awesome look design im using phone,"[(phone, 2), (awesome, 1), (look, 1), (design,..."
276,Sunil Soni,Product is avasome but invoice is note include...,product avasome invoice note include,"[(product, 1), (avasome, 1), (invoice, 1), (no..."
277,D.C.Padhi,"Redmi Note4, Note5, now 6pro..It seems the old...",redmi note4 note5 6proit seems older model bet...,"[(models, 3), (note4, 2), (note5, 2), (swipe, ..."
278,Mahesh,I love mi,love mi,"[(love, 1), (mi, 1)]"
