In [None]:
# importing the libraries

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import numpy as np 
import time
from bs4 import BeautifulSoup

pd.set_option('display.max_columns', None)

In [None]:
# function to check and return if a particular text exists on the website

def get_text(source, class_name, attr):
    
    try:
        if source.find(class_=class_name) is None:
            return np.NaN
        elif source.find(class_=class_name) and attr == 'alt_text1':
            return source.find(class_=class_name).span.contents[2].text.split()[0]
        elif source.find(class_=class_name) and attr == 'alt_text2':
            return source.find(class_=class_name).span.contents[0].text.split()[0]
        elif source.find(class_=class_name) and attr == 'text':
            return source.find(class_=class_name).text
        else:
            return np.NaN
    except:
        return np.NaN

In [None]:
# launching the webdriver and the webpage to be scraped

driver = webdriver.Chrome()
driver.implicitly_wait(5)
driver.maximize_window()
driver.get("https://www.flipkart.com")

search_box = driver.find_element(By.CLASS_NAME, "Pke_EE")
search_box.click()
search_box.send_keys("smartphone")
search_box.send_keys(Keys.ENTER)

In [None]:
# data scraped from the site will be stored in the below lists

names,features,sp,mrp,discount,star,ratings,reviews = ([] for _ in range(8))

In [None]:
# additional scraped that will be stored in the features list

cols = ['Display Size', 'Resolution', 'Resolution Type', 'Display Type', 'Operating System', 'Processor Brand',
        'Processor Type', 'Processor Core', 'Internal Storage', 'RAM', 'Primary Camera', 'Secondary Camera', 'Hybrid Sim Slot',
        'Network Type', 'Micro USB Version', 'Bluetooth Version', 'Wi-Fi Version', 'NFC', 'Audio Jack', 'Battery Capacity',
        'Width', 'Height', 'Depth', 'Weight']

In [None]:
# scraping data from multiple pages

while True:
    time.sleep(2)
    srch_rslts = driver.current_url
    soup = BeautifulSoup(driver.page_source)
    phones_src = soup.find_all(class_="CGtC98")

    for src in phones_src:
        
        url = "https://www.flipkart.com" + src['href']
        driver.get(url)
        phone_soup = BeautifulSoup(driver.page_source)
        phone_details = phone_soup.find(class_="DOjaWF gdgoEp col-8-12")

        try:
            driver.find_element(By.XPATH,'//*[@class="VU-ZEz"]').click()
        except NoSuchElementException:
            continue
        
        names.append(get_text(phone_details, "VU-ZEz", "text"))
        sp.append(get_text(phone_details, "Nx9bqj CxhGGd", "text"))
        mrp.append(get_text(phone_details, "yRaY8j A6+E6v", "text")) 
        discount.append(get_text(phone_details, "UkUFwK WW8yVX", "text"))  
        star.append(get_text(phone_details, "XQDdHH", "text"))
        ratings.append(get_text(phone_details, "Wphh3N", 'alt_text2'))
        reviews.append(get_text(phone_details, "Wphh3N",  'alt_text1'))
     
        driver.find_element(By.XPATH, '//button[@class="QqFHMw _4FgsLt"]').click()
        param = [x.text for x in phone_details.find_all(class_="+fFi1w col col-3-12")]
        value = [x.text for x in phone_details.find_all(class_="HPETK2")]
        all_specs = dict(zip(param, value))
        specs = {k:v for k,v in all_specs.items() if k in cols}
        features.append(specs)

    driver.get(srch_rslts)
    
    try:
        driver.find_element(By.XPATH, '//*[text()="Next"]').click()
    except NoSuchElementException:
        print("NoSuchElementException")
        
    nxt_srch_rslts  = driver.current_url
    if srch_rslts == nxt_srch_rslts:
        print("No next page")
        break

In [None]:
# creating a dataframe of the scraped data

data = {"name":names, "sp":sp, "mrp":mrp, "discount":discount, "star":star, "ratings":ratings, "reviews":reviews}

df_data = pd.DataFrame(data)
df_data.head()

In [None]:
df_data.shape

In [None]:
# creating a dataframe of the features from the features dictionary

fts = pd.DataFrame(features)
fts.head()

In [None]:
fts.shape

In [None]:
# joining the main dataframe and the features dataframe 

df = df_data.join(fts, how='inner')
df.head()

In [None]:
# exporting data for analysis

df.to_excel("mobiles_data.xlsx", index=False)