In [286]:
from pymongo import MongoClient
from bson.objectid import ObjectId
import requests
from selenium import webdriver
import bs4
import re
from pprint import pprint
import time
import pickle

In [246]:
mongo_uri = 'mongodb://localhost:10000/'
url = 'https://www.amazon.in/dp/B07DJHY82F/ref=gbph_img_m-5_d182_b23b14bf?smid=A23AODI1X2CEAE&pf_rd_p=a3a8dc53-aeed-4aa1-88bb-72ce9ddad182&pf_rd_s=merchandised-search-5&pf_rd_t=101&pf_rd_i=1389401031&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_r=P3FSQH2KEB3B5QQ1NQD5'

- [X] Product Title
- [ ] Product Description
- [X] Product Enlarge Image
- [X] Product Price (With Exchange and Without Exchange)
- [X] Product Colours
- [X] No. of Reviews
- [X] Star Rating
- [X] Technical Details
- [X] Most Recent 100 Reviews
- [X] All Reviews

In [248]:
#Scraping Data
def scrape_selenium():
    data = {}
    browser = webdriver.Firefox()
    print("Opening Browser")
    browser.get(url)
    print("Scraping Product Details")
    data = {
        'title': browser.find_element_by_id('productTitle').text,
        'price': {
            'with_exchange': re.sub(' +',' ',browser.find_element_by_id('maxBuyBackDiscountSection').text),
            'mrp': re.sub(' +','',browser.find_element_by_class_name('priceBlockStrikePriceString').text),
            'deal_price': re.sub(' +','',browser.find_element_by_id('priceblock_dealprice').text),
        },
        'colors': [i.get_attribute('alt') for i in browser.find_elements_by_class_name('imgSwatch')],
        'no_of_reviews': int(re.sub(',','',browser.find_element_by_id('acrCustomerReviewText').text.split()[0])),
        'images': [re.search('\"(.+?)\"',i.get_attribute('style'))[0] for i in browser.find_elements_by_class_name('ivThumbImage')  if i.get_attribute('style')!=''],
        'ratings': float(browser.find_element_by_class_name('arp-rating-out-of-text').text.split()[0]), 
        'tech_details':{},
        'all_reviews': [],
        'most_recent_reviews': [],
    }

    print("Scraping Technical Details")
    # scraping Technical Details
    for i in browser.find_element_by_class_name('pdTab').find_elements_by_tag_name('tr'):
        key_value = i.find_elements_by_tag_name('td')
        if key_value[0].text.split():
            data['tech_details'][key_value[0].text.strip()] = ''.join(key_value[1].text.strip())
    
    for i in browser.find_elements_by_css_selector("span[data-action='main-image-click']"):
        try:
            i.click()
        except Exception as e:
            continue
    data['images'] = [re.search('\"(.+?)\"',i.get_attribute('style'))[0] for i in browser.find_elements_by_class_name('ivThumbImage')  if i.get_attribute('style')!='']
 
    reviews_url = browser.find_element_by_css_selector("a[data-hook='see-all-reviews-link-foot']").get_attribute('href')
    return data, reviews_url, browser

In [250]:
def most_recent(rurl):
    # Fetch 100 most recent reviews
    print("Fetching 100 most recent reviews")
    most_recent_reviews = []
    revurl = rurl + '&sortBy=recent&pageNumber='
    for page in range(1,11):
        reurl = revurl + str(page)
        resp = requests.get(reurl)
        count = 0
        print("Page :",page,end='\r')
        while resp.status_code!=200:
            count += 1
            print(count,end='\r')
        soup = bs4.BeautifulSoup(resp.text)
        rev_list = soup.select_one('#cm_cr-review_list').select('.review')
        #print(rev_list)
        for rev in rev_list:
            review = {
                'customer': rev.select_one('.a-profile-name').text.strip(),
                'rating': float(re.search('\d(\.\d)?',rev.select_one('.review-rating')['class'][2])[0]),
                'title': rev.select_one('.review-title').text.strip(),
                'review': rev.select_one('.review-text').get_text(separator=' ').strip(),
            }
            #print(review)
            most_recent_reviews.append(review)
    return most_recent_reviews

def all_reviews(rurl):
    # Fetch all reviews
    print("Fetching all Reviews")
    reviews = []
    revurl = rurl + '&sortBy=helpful&pageNumber='
    pages = data['no_of_reviews']//10 + (data['no_of_reviews']%10 + 9)//10  
    for page in range(1,pages+1):
        reurl = revurl + str(page)
        resp = requests.get(reurl)
        count = 0
        print("Page :",page,end='\r')
        while resp.status_code!=200:
            count += 1
            print(count,end='\r')
            time.sleep(5)
        soup = bs4.BeautifulSoup(resp.text)
        rev_list = soup.select_one('#cm_cr-review_list').select('.review')
        #print(rev_list)
        for rev in rev_list:
            review = {
                'customer': rev.select_one('.a-profile-name').text.strip(),
                'rating': float(re.search('\d(\.\d)?',rev.select_one('.review-rating')['class'][2])[0]),
                'title': rev.select_one('.review-title').text.strip(),
                'review': rev.select_one('.review-text').get_text(separator=' ').strip(),
            }
            #print(review)
            reviews.append(review)
    return reviews

# Scraping All Reviews
def fetch_all_reviews(browser, rurl):
    print("Fetching All Reviews")
    reviews = []
    pages = data['no_of_reviews']//10 + (data['no_of_reviews']%10 + 9)//10
    revurl = rurl + '&sortBy=helpful&pageNumber='
    for page in range(1,pages+1):
        print("Page :",page,end='\r')
        browser.get(revurl+str(page))
        try:
            browser.find_element_by_class_name("no-reviews-section")
        except Exception as e:
            pass
        else:
            break
        review_list = browser.find_element_by_id('cm_cr-review_list').find_elements_by_class_name('review')
        for rev in review_list:
            review = {
                'customer': rev.find_element_by_class_name('a-profile-name').text,
                'rating': float(re.search('\d(\.\d)?',rev.find_element_by_class_name('review-rating').get_attribute('class'))[0]),
                'title': rev.find_element_by_class_name('review-title').text.strip(),
                'review': ''.join(rev.find_element_by_class_name('review-text').text.strip()),
            }
            #print(review)
            reviews.append(review)
    return reviews

In [304]:
# Database Functions
def connect():
    client = MongoClient(mongo_uri)
    return client.products.oneplus6t

def insert_product(data):
    doc = {}
    for i in ['title', 'price', 'colors', 'images', 'tech_details', 'no_of_reviews', 'ratings']:
        doc[i] = data[i]
    db = connect().details
    if not db.find_one({'title':doc['title']}):
        return db.insert(doc)
    print("Already Present")
    return False

def insert_reviews(data, pid):
    doc = {}
    db = connect().reviews
    for i in ['title', 'all_reviews', 'most_recent_reviews']:
        doc[i] = data[i]
    doc['product_id'] = pid
    if not db.find_one({'product_id': pid}):
        return db.insert_one(doc)
    print("Already Present")
    return False

def insert_reviews_classified(data, pid):
    doc = {}
    db = connect().classified_reviews
    for i in ['title', 'Battery Life', 'Picture Quality', 'Value for Money', 'Sound Quality', 'Fingerprint']:
        doc[i] = data[i]
    doc['product_id'] = pid
    if not db.find_one({'product_id': pid}):
        return db.insert_one(data)
    print("Already Present")
    return False


In [249]:
data, rurl, browser = scrape_selenium()
data['most_recent_reviews'] = most_recent(rurl)
data['all_reviews'] = fetch_all_reviews(browser, rurl)
pprint(data)

Opening Browser
Scraping Product Details
Scraping Technical Details
{'all_reviews': [],
 'colors': ['Mirror Black', 'Midnight Black', 'Speed Orange'],
 'images': ['"https://images-na.ssl-images-amazon.com/images/I/41VkqnrF85L._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/31zrxJ74RHL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/31-GdZkkyUL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/41YJBZcf4UL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/21bXdUPe8fL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/21uJnr-B7TL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/31ohyErZTPL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/21oIB5yMVgL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazon.com/images/I/214iFzKjoXL._AA50_.jpg"',
            '"https://images-na.ssl-images-amazo

In [280]:
#saving the data in pickle
with open('data.pickle', 'wb') as f:
    pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)

#loading the pickle data
with open('data.pickle', 'rb') as f:
    b = pickle.load(f)

In [305]:
pid = insert_product(data)
insert_reviews(data, pid)

  if sys.path[0] == '':


<pymongo.results.InsertOneResult at 0x7f8ee57abbc8>

In [306]:
db = connect()
for doc in db.reviews.find():
    pprint(doc)

{'_id': ObjectId('5cb1eaa0c7959b424999e92d'),
 'all_reviews': [{'customer': 'Tanmay Shukla',
                  'rating': 5.0,
                  'review': 'I got this phone on Friday evening.\n'
                            '\n'
                            'Pros:\n'
                            'Great battery life\n'
                            'Amazing performance\n'
                            'Premium design\n'
                            'Impressive rear and front camera\n'
                            'In display fingerprint scanner is really fast\n'
                            '\n'
                            'Cons:\n'
                            "No headphone jack but you'll get a converter\n"
                            'No notification LED\n'
                            'No microSD card slot\n'
                            '\n'
                            'I will give points on my personal experience of 2 '
                            'days full usage as below:\n'
                 

                  'rating': 2.0,
                  'review': 'Lot of issues,\n'
                            '1. Camera gets crashed frequently.\n'
                            '2. Apps crash sometimes.\n'
                            '3. Camera app is dead slow while loading.\n'
                            "4. Picture qlty isn't as expected.\n"
                            '5. Speakers are not good... Voice cracks with '
                            'half the volume.\n'
                            '6. Gallery crashes sometimes.',
                  'title': 'Lot f issues and full of bugs.'},
                 {'customer': 'Atul Chounde',
                  'rating': 2.0,
                  'review': 'Device was working fine till 15days but yesterday '
                            'all Physical buttons(except alert slider) stopped '
                            'working suddenly on oneplus 6T.\n'
                            'I have raised the complaint in one plus site but '
                     

                 {'customer': 'Sangeeth C Radh',
                  'rating': 5.0,
                  'review': 'It took almost 2 weeks for Flipkart to deliver my '
                            'phone. the bullet earphones are really nice. the '
                            'is good at performance but oxygen os needs some '
                            'improvements like when a group message '
                            'notification is shown it highlights the phone '
                            'number of a contact rather than the group name. '
                            'so we might misunderstand its a message from some '
                            "unknown number if the contact isn't saved in the "
                            'phone. Regarding the active display, the os could '
                            'be tuned to show a preview of notification shown, '
                            'when selected on that icon in active display\n'
                            '\n'
                   

                            'Instead they could have provided the earphpne in '
                            'box...',
                  'title': 'OnePlus 6T McLaren edition is simply good...'},
                 {'customer': 'Techie_Guy',
                  'rating': 5.0,
                  'review': "I have been using the device and it's super "
                            'snappy... The night mode in camera does a decent '
                            'job since some photos were not as good as the '
                            'others clicked in the night mode. The onscreen '
                            'fingerprint sensor is ok. Not as fast as the '
                            "physical one. Overall it's a superb product. Any "
                            "who doesn't have OP6 and planning to buy a mobile "
                            'should definitely consider this one.',
                  'title': 'Super Snappy....'},
                 {'customer': 'tejaskathane',
                  'r

                  'title': '1 plus lacks knowledge on what should be packed '
                           'for a limited edition mobile'},
                 {'customer': 'Pradeep K',
                  'rating': 3.0,
                  'review': 'Phone is good but not up to the mark.\n'
                            'Design is also not much good. I can say that not '
                            'at all stylish.\n'
                            'Previous versions of one plus are far better than '
                            'this compare to design.\n'
                            'And Coming to touch sensors, I can not feel much '
                            'faster. Even one plus 3t is better compare to '
                            'this.\n'
                            "Even charging is not fast it's taking much time "
                            'to fully charge.\n'
                            'I personally feel like, they should have '
                            'concentrated on phone not o

                  'title': 'One plus 6T ..Things Must Know Before Buy'},
                 {'customer': 'Amazon Customer',
                  'rating': 4.0,
                  'review': "It's a do anything phone according to me. Photos "
                            ', sound , display and many things. You ask it it '
                            "will do it in speed of course. One thing that's "
                            'really good about it is the charging speed and '
                            "also don't have any complains about the batter "
                            'life for my mediocre use it comes for a whole day '
                            'and under heavy use till evening without any '
                            'issue (is this what I have experienced from my '
                            'use )\n'
                            'Real good product wanna keep your phone for 2 '
                            'year or so you can buy this without any second '
                      

                 {'customer': 'Adv. Shekhar Anand',
                  'rating': 5.0,
                  'review': 'This is my 2nd phone of One Plus, previously used '
                            '5T. Honestly, I am very impressed with this phone '
                            'as there is no problem in last 8 days.\n'
                            '\n'
                            'Everything is superb.',
                  'title': 'The beast in killer mode.'},
                 {'customer': 'Soumen guchhait',
                  'rating': 5.0,
                  'review': 'Very good camera quality and excellent night '
                            'camera.just awesome.\n'
                            'Battery performance excellent\n'
                            'Mostly security features .ohhhhhh\n'
                            'I used Samsung note 8 but 6T to much faster.\n'
                            'Face unlock and finger print scanner to much '
                            'faster than any ph

                  'title': 'This is awesome mobile'},
                 {'customer': 'vijay menon',
                  'rating': 5.0,
                  'review': 'Bhai kya phone hai !!! . At this price point such '
                            "an amazing phone it's crazy. I was afraid I will "
                            "miss the head phone jack but I don't actually. "
                            'After just a few days of use I am perfectly fine '
                            'with it.',
                  'title': 'Super se bhi upar'},
                 {'customer': 'Amazon Customer',
                  'rating': 5.0,
                  'review': 'I was using Samsung S9+ for last few months and '
                            'after using this since last few days i can say '
                            'that oneplus 6t experience is really good and '
                            'better than Samsung in many ways be it new '
                            'gestures or things like fingerprint senso

                  'rating': 4.0,
                  'review': 'Look is fabulous...back camera is great...selfi '
                            'is ok ok type.. battery life is great... ringtone '
                            'sound is bit lower side.. performance is buttery '
                            'smooth...touch response is great...dash charge is '
                            'mind blowing....i feel rs37999 is not '
                            'justified... little disappointed by oneplus this '
                            'time',
                  'title': 'Normal user review'},
                 {'customer': 'ritesh',
                  'rating': 4.0,
                  'review': 'This smartphone is amazing. But after few months '
                            'i got an issue with calling. When i gave it to '
                            "service centre for repair they said there's no "
                            'issue with microphone. But after they clean up my '
                     

                            'for 3 days. it has a very good camera as well as '
                            'battery back up apart from other features. Face '
                            'unlock is very fast and fingerprint also works '
                            'fast. overall good phone (One plus 6T). Happy to '
                            'have it. Prompt delivery by amazon.',
                  'title': 'excellent mobile with good battery backup'},
                 {'customer': 'yogesh',
                  'rating': 1.0,
                  'review': "Don't buy one plus 6t bcoz camera is disgusting "
                            'axcept night mode n spcly camera is blur during '
                            'slow motion vedios.battery back up is not at '
                            'good. absolutely waste of money',
                  'title': "Don't buy anyone this product"},
                 {'customer': 'Pankaj Bansal',
                  'rating': 1.0,
                  'review': 'One

                 {'customer': 'SP',
                  'rating': 5.0,
                  'review': 'This is an awesome phone. Display, camera, '
                            'performance, UI, battery, looks, packaging '
                            'everything is premium quality, a class above.',
                  'title': 'Best OnePlus yet. Loving it!'},
                 {'customer': 'Amazon Customer',
                  'rating': 1.0,
                  'review': 'Just 10 days since i have bought this product . '
                            'it hangs frequently',
                  'title': 'Product hangs frequently'},
                 {'customer': 'jeet sharma',
                  'rating': 5.0,
                  'review': 'Everything is appropriate, even my mob was putted '
                            'down to my hand but there was no screech and '
                            'damage now you can imagine that what qualities '
                            'they are providing us. But this cell

                  'title': 'Killer mobile. Superb phone'},
                 {'customer': 'Amazon Customer',
                  'rating': 5.0,
                  'review': 'Oneplus 6t is best than any high price mobiles '
                            'like s9+ and others',
                  'title': 'Oneplus 6t is killer'},
                 {'customer': 'Vimal vishal',
                  'rating': 4.0,
                  'review': 'The mobile comes with good package. And the '
                            'mobile also looks awesome and performance well. '
                            'The battery drains so fast',
                  'title': 'Need to but'},
                 {'customer': 'Agfamal',
                  'rating': 4.0,
                  'review': 'But I faced some problem with play store means '
                            "it's showing download pending and then it won't "
                            'download or else it would take much time '
                            'remaining ev

                  'title': 'Nice one'},
                 {'customer': 'Avinash',
                  'rating': 5.0,
                  'review': 'Except the onscreen finger print sensor ... '
                            'Everything is awesome ....',
                  'title': 'The best from one plus till date'},
                 {'customer': 'Rajendra',
                  'rating': 1.0,
                  'review': 'Beware, The radiation level of this phone is very '
                            'high, called SAR value. Wish i had known this '
                            'before buying.\n'
                            'Highly overrated mobile.',
                  'title': 'Beware, High Radiation levels'},
                 {'customer': 'Karthikraja',
                  'rating': 5.0,
                  'review': 'Battery life amazing 10/10\n'
                            'Camera 8/10\n'
                            'Sound quality 9/10',
                  'title': 'Best mobile for 38k'},
          

                  'review': 'Love you Amazon and Love You OnePlus... Superb .. '
                            'Awesome... Excellent... Overall proud and happy '
                            'with Amazon teaming up with OnePlus..',
                  'title': 'Amazon and OnePlus awesome team'},
                 {'customer': 'Customer',
                  'rating': 5.0,
                  'review': 'One of the best premium Android phones in the '
                            'market. Worth the buy.',
                  'title': 'Best Phone in the market'},
                 {'customer': 'Shubham',
                  'rating': 5.0,
                  'review': 'Overall it is amazing however the battery drains '
                            'very fast when you are playing PUBG. And if you '
                            'use charger with earfones then it might be '
                            'difficult to use and charger is superfast.',
                  'title': 'Phone is best... Battery takes more r

                 {'customer': 'Jikendra',
                  'rating': 5.0,
                  'review': 'An awesome phone.....can beat iPhone x and note 9 '
                            'in speed...photos comes really well....very '
                            'satisfied with this phone ..loved it..became one '
                            'plus fan',
                  'title': 'Great flagship phone'},
                 {'customer': 'Amazon Customer',
                  'rating': 5.0,
                  'review': 'The phone is a good package from all aspects. '
                            'However, feels a little fragile because of the '
                            'glass back.\n'
                            '\n'
                            'Overall a very good buy. Go for it.',
                  'title': 'No brainer for the price point.'},
                 {'customer': 'Bhupinder',
                  'rating': 5.0,
                  'review': 'Just got for it. No question to ask. Best '
    

                  'title': 'Front camera issue'},
                 {'customer': 'Pawan Kumar',
                  'rating': 3.0,
                  'review': 'Battery life is less than expected, a full charge '
                            "don't even last 12 hrs. Apps keep crashing, "
                            'internet connectivity is also poor compared to my '
                            'previous device',
                  'title': 'Not satisfied'},
                 {'customer': 'Saritha',
                  'rating': 5.0,
                  'review': 'Gifted to my brother. He loved it so much. '
                            'Awesome phone. Awesome features',
                  'title': 'Superb'},
                 {'customer': 'Great looking HDD',
                  'rating': 5.0,
                  'review': 'Great phone, brilliant camera and build quality '
                            'top rung',
                  'title': 'Great Phone'},
                 {'customer': 'mahipal sharma',


                  'rating': 5.0,
                  'review': "It's looking good,. Battery back up superb. It's "
                            'always one plus rock killer',
                  'title': 'One of the best phone - One Plus'},
                 {'customer': 'kalyan',
                  'rating': 5.0,
                  'review': 'Nice phone.. sound quality and picture quality '
                            'and all very good but battery life is not good as '
                            "I expected.. it's worth a phone to buy..",
                  'title': 'One plus!! Again you killed everyone with amazing '
                           'smart phone!!'},
                 {'customer': 'Amazon Customer',
                  'rating': 4.0,
                  'review': 'The 6t is good and fast. However, the functioning '
                            'of fingerprint scanner is suboptimal.',
                  'title': 'Can be Improved!'},
                 {'customer': 'Narendra Gudapati',
    

                  'title': 'Great phone'},
                 {'customer': 'Karandeep',
                  'rating': 2.0,
                  'review': 'Lots of issues including hanging and fingure lock '
                            'accessibility.',
                  'title': 'Slow performance'},
                 {'customer': 'Apoorv',
                  'rating': 5.0,
                  'review': 'Best phone',
                  'title': 'Budget phone go for it'},
                 {'customer': 'Anand Gautam',
                  'rating': 5.0,
                  'review': 'I had hoped that the next model would take some '
                            'time but they launched the McLaren Edition pretty '
                            'fast.',
                  'title': 'Best Phone Ever!!!'},
                 {'customer': 'Ashish jangir',
                  'rating': 5.0,
                  'review': 'This is my third OnePlus device after, I have '
                            'already used 3t, 5t and n

In [296]:
all_reviews = data['all_reviews']

In [297]:
all_reviews

[{'customer': 'Tanmay Shukla',
  'rating': 5.0,
  'title': 'Flagship Killer',
  'review': "I got this phone on Friday evening.\n\nPros:\nGreat battery life\nAmazing performance\nPremium design\nImpressive rear and front camera\nIn display fingerprint scanner is really fast\n\nCons:\nNo headphone jack but you'll get a converter\nNo notification LED\nNo microSD card slot\n\nI will give points on my personal experience of 2 days full usage as below:\n\nWeight: 9/10\nOS: 10/10\nScreen size: 10/10\nCPU: 10/10\nPerformance: 10/10\nStorage: 10/10 (you'll get approx 113 gb for use)\nBattery: 8/10\nRear camera: 10/10\nFront camera: 10/10\nIn display fingerprint scanner: 9/10\nFace unlock: 10/10\n\nI hope this will help full."},
 {'customer': 'Surbhi Garg',
  'rating': 2.0,
  'title': 'Camera quality is very poor.',
  'review': 'Camera quality is not upto the mark. I visited one plus store and the store representative checked my phone and compared the camera quality with his demo phone. He notic