In [1]:
from textblob import TextBlob
import pandas as pd
import re

In [2]:
def clean_text(text): 
        ''' 
        Utility function to clean text by removing links,
        special characters using simple regex statements. 
        '''
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", text).split()) 

def get_text_sentiment(text):
    ''' 
    Utility function to classify sentiment of passed
    text using textblob's sentiment method 
    '''
    # create TextBlob object of passed text 
    analysis = TextBlob(clean_text(text)) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        return 'positive'
    elif analysis.sentiment.polarity == 0: 
        return 'neutral'
    else: 
        return 'negative'

In [3]:
def feature_extraction(text): 
        blob = TextBlob(text)
        return blob.noun_phrases

In [4]:
dataset = pd.read_csv("G:/PuthonRun/Amazon_Review/data/Amazon_Unlocked_Mobile.csv")
dataset.head()

Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes
0,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,I feel so LUCKY to have found this used (phone...,1.0
1,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,"nice phone, nice up grade from my pantach revu...",0.0
2,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,Very pleased,0.0
3,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,It works good but it goes slow sometimes but i...,0.0
4,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,Great phone to replace my lost phone. The only...,0.0


In [5]:
#Selecting a subset from dataset
subset_sample=dataset.loc[dataset['Product Name'] == "Samsung Convoy U640 Phone for Verizon Wireless Network with No Contract (Gray) Rugged"]
subset_sample.head()

Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes
413473,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,I loved it. It was in good shape. I wanted an ...,0.0
413474,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,Great phone to use with page plus cellular min...,0.0
413475,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,3,This is probably a phone that was used very ha...,0.0
413476,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,1,The Pnone was a piece of junkIt was all scratc...,1.0
413477,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,"While it was listed as refurbished, it appears...",0.0


In [6]:
#subset_sample['Reviews']
#Extracting Features from subset
features = []
for reviews in subset_sample['Reviews']:
    features.append(feature_extraction(reviews))
    
subset_sample["features"] = features
subset_sample.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_sample["features"] = features


Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes,features
413473,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,I loved it. It was in good shape. I wanted an ...,0.0,"[good shape, perfect fit]"
413474,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,Great phone to use with page plus cellular min...,0.0,"[great phone, cellular minutes, esn, long batt..."
413475,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,3,This is probably a phone that was used very ha...,0.0,[]
413476,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,1,The Pnone was a piece of junkIt was all scratc...,1.0,"[pnone, ownernot hapy]"
413477,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,"While it was listed as refurbished, it appears...",0.0,[excellent condition]


In [7]:
#subset_sample['Reviews']
sentiment = []
for reviews in subset_sample['Reviews']:
    sentiment.append(get_text_sentiment(reviews))
    
subset_sample["Sentiment"] = sentiment
subset_sample.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_sample["Sentiment"] = sentiment


Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes,features,Sentiment
413473,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,I loved it. It was in good shape. I wanted an ...,0.0,"[good shape, perfect fit]",positive
413474,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,Great phone to use with page plus cellular min...,0.0,"[great phone, cellular minutes, esn, long batt...",positive
413475,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,3,This is probably a phone that was used very ha...,0.0,[],negative
413476,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,1,The Pnone was a piece of junkIt was all scratc...,1.0,"[pnone, ownernot hapy]",neutral
413477,Samsung Convoy U640 Phone for Verizon Wireless...,Samsung,79.95,5,"While it was listed as refurbished, it appears...",0.0,[excellent condition],positive


In [8]:
#Features With their counts
counts = {}
def feature_count(feature):
        if  feature in counts:
            counts[feature] += 1
        else:
            counts[feature] = 1
for feature_list in subset_sample['features']:
    for feature in feature_list:
        feature_count(feature)
        

In [9]:
print(counts)

