In [33]:
# Scraping the Review Content from Amazon

In [1]:
''' Importing Beautiful Soup Library for Web Scraping and requests to get data from the URL'''

from bs4 import BeautifulSoup
import requests

In [2]:
r = requests.get("https://www.amazon.in/Apple-iPhone-11-128GB-Black/product-reviews/B07XVLW7YK/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews")

''' To print the URL that we search for along with Query '''
print(r.url)

https://www.amazon.in/Apple-iPhone-11-128GB-Black/product-reviews/B07XVLW7YK/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews


In [3]:
''' This will print the HTML content of the web page which doesn't look like HTML format though but I further use the prettify() function to print the content in HTML format. '''

print(r.content)



In [4]:
''' By default we have to give HTML parser '''
soup = BeautifulSoup(r.text, 'html.parser')

''' Use prettify to make the HTML code look better.'''
print(soup.prettify())

<!DOCTYPE doctype html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-in">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11HCPocBs0L._RC|01xMIxnQ9BL.css,017DsKjNQJL.css,01NL3TVKoYL.css,01XYrMlv7eL.css,41EWOOlBJ9L.css,11A-+6fHBeL.css,01ElnPiDxWL.css,11QxHU4QYaL.css,01Sp8sB1HiL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,01evdoiemkL.css,01K+Ps1DeEL.css,31pdJv9iSzL.css,01W6EiNzKkL.css,51AZ-Jz5kmL.css,11UGC+GXOPL.css,21LK7jaicML.css,11L58Qpo0GL.css,21kyTi1FabL.css,01ruG+gDPFL.css,01YhS3Cs-hL.css,21GwE3cR-yL.css,01

In [37]:
Name = soup.findAll("span", {"class" : "a-profile-name"})     # find <span> element with attributes class = a-profile-name

# Extract name from span tag
Reviewer = []
for i in range(2,len(Name)):     # Loop is initiated from 2 because we have to exclude the Top Positive and Top Critical Review which otherwise will get repeated.
    Reviewer.append(Name[i].get_text())
    
print(Reviewer)

['Suman Biswas', 'Kaushik Bajaj', 'Sunny Kumar', 'shanu Kumar', 'Amazon Customer', 'Satyapal singh', 'Ghar Sayan', 'Gurmeet singh', 'Krusshna', 'Vinay Kumar Gupta']


In [38]:
Product_Rating = soup.findAll("i", {"class" : "review-rating"})     # find <i> element with attributes class = review-rating

# Extract name from span tag
Rating = []
for i in range(2,len(Product_Rating)):     # Loop is initiated from 2 because we have to exclude the Top Positive and Top Critical Review which otherwise will get repeated.
    Rating.append(Product_Rating[i].get_text())
    
print(Rating)

['1.0 out of 5 stars', '5.0 out of 5 stars', '5.0 out of 5 stars', '5.0 out of 5 stars', '1.0 out of 5 stars', '1.0 out of 5 stars', '5.0 out of 5 stars', '1.0 out of 5 stars', '1.0 out of 5 stars', '1.0 out of 5 stars']


In [30]:
Review_Title = soup.findAll("a", {"class" : "review-title-content"})     # find <a> element with attributes class = review-title-content

# Extract name from span tag
Review_Summary = []
for i in range(0,len(Review_Title)):
    Review_Summary.append(Review_Title[i].get_text())

# We will remove the '\n' from before and after of every Review Title
Review_Summary[:] = [i.lstrip('\n').rstrip('\n') for i in Review_Summary]

print(Review_Summary)

["Keep away from Amazon when it's Apple.", 'Amazing', 'First Time iPhone User Review :-)', 'Solid premium phone from iphone', 'Worst Experience Ever.!', 'iPhone 11', 'Best iPhone ever | Super Night Mode | Super Camera | Yellow', 'Defective Iphone 11', 'Don’t buy it from this seller', 'Too much heat on normal use']


In [32]:
Review_Description = soup.findAll("span", {"class" : "review-text-content"})     # find <span> element with attributes class = review-text-content

# Extract name from span tag
Description = []
for i in range(0,len(Review_Description)):
    Description.append(Review_Description[i].get_text())

# We will remove the '\n' from before and after of every Review Desciption
Description[:] = [i.lstrip('\n').rstrip('\n') for i in Description]

print(Description)

["May be my first negative review about the product & Amazon both. I was much elated to receive the iPhone 11 so fast, next day of dispatch i.e. 28/09/19, but the thing I got started heating up every now and then. Contacted Applecare, just to be consoled that it's quite normal. As it continued, tried to return the product by speaking to Amazon customer support but in vain. Some body called me back to convey that only Apple will decide which one to take back. Why is then Amazon took up the sacred duty of selling such an item which they can't exchange/ have no control ? The product developed new issues like proximity sensor malfunction and last but most importantly loosing mobile network every other minute(even had two software updates). It was handed over to the Apple ASP as the return window closed on 10/10/19 (what use it was for??) and diagnosed as having issues and has further been sent to Apple repair facility at Bengaluru. So I'm here w/out my first iPhone after using it(suffering

In [34]:
# Saving the Scraped Content to .csv file from 

In [35]:
import pandas as pd

In [36]:
# Make a DataFrame to add Details in it
Data = pd.DataFrame()

In [41]:
# Adding Review Information in DataFrame

Data["Reviewer"] = Reviewer
Data["Rating"] = Rating
Data["Review Title"] = Review_Summary
Data["Review Description"] = Description

In [45]:
Data

Unnamed: 0,Reviewer,Rating,Review Title,Review Description
0,Suman Biswas,1.0 out of 5 stars,Keep away from Amazon when it's Apple.,May be my first negative review about the prod...
1,Kaushik Bajaj,5.0 out of 5 stars,Amazing,It's very expensive but the quality you get is...
2,Sunny Kumar,5.0 out of 5 stars,First Time iPhone User Review :-),The iPhone design is good and the camera quali...
3,shanu Kumar,5.0 out of 5 stars,Solid premium phone from iphone,Awesome Phone. Nice upgrade from iPhone 6s to ...
4,Amazon Customer,1.0 out of 5 stars,Worst Experience Ever.!,My Phone is Producing Too Much Heat Even Didn’...
5,Satyapal singh,1.0 out of 5 stars,iPhone 11,"Defective product,got heat up within 5 minutes..."
6,Ghar Sayan,5.0 out of 5 stars,Best iPhone ever | Super Night Mode | Super Ca...,"In my opinion, don’t think much about the pho..."
7,Gurmeet singh,1.0 out of 5 stars,Defective Iphone 11,The product i got was defective . The face id ...
8,Krusshna,1.0 out of 5 stars,Don’t buy it from this seller,Bought the mobile from appario retail ltd. Mob...
9,Vinay Kumar Gupta,1.0 out of 5 stars,Too much heat on normal use,"Just after i switched it on, it started produc..."


In [47]:
# Converting DataFrame to .csv file and Saving it in your Machine

Data.to_csv(r'C:\Users\lenovo\python\reviews.csv', index= 'True')