## Objective :

Scrape the customer reviews for 10 mobile phones listed on Amazon and perform
Sentiment Analysis for the customer reviews.


In [7]:
#Installing beatutifulsoup
#Beautiful Soup is a pure Python library for extracting structured data from a website. It allows us to parse data from HTML and XML files. It acts as a helper module and interacts with HTML in a similar and better way as to how you would interact with a web page using other available developer tools.
!pip3 install beautifulsoup4



In [8]:
#Importing some important libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import re
import time
from datetime import datetime
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests

In [9]:
#Function which will scrap mobile model with its price,review,overall rating,individual rating etc.

no_pages = 1

def get_data(pageNo):  # Function input the page numbers as an argument
    
    #Defining a user-agent which will help in bypassing the detection as a scraper
    headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
    
    #Specifying the URL to requests.get and passing the user-agent header as an argument
    r = requests.get('https://www.amazon.in/gp/bestsellers/electronics/1389432031/ref=zg_bs_pg_'+str(pageNo)+'?ie=UTF8&pg='+str(pageNo), headers=headers)#, proxies=proxies)
    
    #Extracting the content from requests.get
    content = r.content
    
    #Scraping the specified page and assigning it to soup variable
    soup = BeautifulSoup(content)

    alls = []
    count=0
    #Identifying the parent tag under which all the data we need will reside.
    for d in soup.findAll('div', attrs={'class':'a-section a-spacing-none aok-relative'}):
        if count >= 10:
            break
        count+=1 
         
        name = d.find('span', attrs={'class':'zg-text-center-align'})
        n = name.find_all('img', alt=True)
        rating = d.find('span', attrs={'class':'a-icon-alt'})
        users_rated = d.find('a', attrs={'class':'a-size-small a-link-normal'})
        price = d.find('span', attrs={'class':'p13n-sc-price'})

        modelName="unknown-product"
        modelRatings="-1"
        modelPrice='0'

        if name is not None:
            modelName=n[0]['alt']

        if rating is not None:
            modelRatings=rating.text
            
        if price is not None:
            modelPrice=price.text


        if users_rated is not None:
            reeviewHref='https://www.amazon.in'
            reeviewHref=reeviewHref+users_rated.get('href')
            no_review_pages = 10
            for i in range(1, no_review_pages+1):
                reeviewHref=reeviewHref+'&pageNumber='+str(pageNo)
                reviewR = requests.get(reeviewHref, headers=headers)#, proxies=proxies)
                reviewContent = reviewR.content
                reviewSoup = BeautifulSoup(reviewContent)
                for dd in reviewSoup.findAll('div', attrs={'class':'a-section review aok-relative'}):
                
                    all1=[]
                    all1.append(modelName)
                    all1.append(modelPrice)
                    all1.append(modelRatings)
                    all1.append(users_rated.text)
                
                    reviewTitle = dd.find('a', attrs={'class':'a-size-base a-link-normal review-title a-color-base review-title-content a-text-bold'})
                    if reviewTitle is not None:
                        reviewTitleText=reviewTitle.find('span')
                        all1.append(reviewTitleText.text)
                    else:
                        all1.append('NONE')
                
                
                    reviewUserName = dd.find('span', attrs={'class':'a-profile-name'})
                    all1.append(reviewUserName.text)
                
                    reviewUserRating = dd.find('span', attrs={'class':'a-icon-alt'})
                    all1.append(reviewUserRating.text)
                
                    reviewData = dd.find('span', attrs={'class':'a-size-base review-text review-text-content'})
                    reviewText=reviewData.find('span')
                    all1.append(reviewText.text)
                
                    alls.append(all1)     
    return alls

In [10]:
#Calling the get_data function inside a for loop,
#The for loop will iterate over this function starting from 1 till the number of pages+1.
results = []
for i in range(1, no_pages+1):
    results.append(get_data(i))
#Since the output will be a nested list, we would first flatten the list and then pass it to the DataFrame.
flatten = lambda l: [item for sublist in l for item in sublist]
df = pd.DataFrame(flatten(results),columns=['Mobile Model','Price','Overall Ratings','Reviews Count','Review Title','Reviewer','Reviewer Rating','Reviewe Comment' ])
# saving the dataframe as a CSV file.
df.to_csv('amazon_products.csv', index=False, encoding='utf-8')

In [11]:
#Reading the csv file
df = pd.read_csv("amazon_products.csv")

In [12]:
#Checking the shape of the dataframe
print(df.shape)
#Checking the number of unique mobile model in the dataframe
df['Mobile Model'].nunique()


(1000, 8)


10

In [13]:
#From the above we can see that we have extracted 100 reviews for each mobile model.Lets see in tabular format

In [14]:
df.head(101)

Unnamed: 0,Mobile Model,Price,Overall Ratings,Reviews Count,Review Title,Reviewer,Reviewer Rating,Reviewe Comment
0,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Dont buy.. after 10 to.15 mins gaming phone la...,Arpit,1.0 out of 5 stars,\n Dont buy.. after 10 to.15 mins gaming phon...
1,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Overall a good phone 👌,Ujjwal,5.0 out of 5 stars,\n Good phoneDecent look and looks stylishCam...
2,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Not good,Sam singh,2.0 out of 5 stars,\n Not good phone stopped working after some ...
3,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Just Read before buying. My opinion,PRABAHARAN,3.0 out of 5 stars,\n Here my product review after usage of 2 we...
4,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Worst,Malik,2.0 out of 5 stars,"\n Worst product, not as expected its not eve..."
...,...,...,...,...,...,...,...,...
96,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Not good like other phones of xiaomi.,Ansh satoeya,3.0 out of 5 stars,\n My Mobile phone has some screen issue. On ...
97,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Everything nice but only body vibration proble...,Surya K.,4.0 out of 5 stars,\n Very nice Phone.. smooth performance.. cha...
98,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Good phone..,Siddhanth Reddy,5.0 out of 5 stars,\n Totally impressed by the built and looks. ...
99,"Redmi Note 9 (Pebble Grey, 4GB RAM 64GB Storag...","₹ 10,999.00",4.2 out of 5 stars,9966,Pathetic service by Amazon and worst quality m...,Rakesh kumar,1.0 out of 5 stars,\n Worst mobile... Ordered mobile on August 0...


In [15]:
#After each 100 record mobile model changes