In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
import undetected_chromedriver as uc
import time
import re
import numpy as np
import pandas as pd

In [2]:
def wait_for_page_to_load(driver, wait):
    page_title = driver.title
    try:
        wait.until(
            lambda d: d.execute_script("return document.readyState") == 'complete'
        )
    except:
        print(f"The page \"{page_title}\" did not get fully loaded within the given time duration\n")
    else:
        print(f"The page \"{page_title}\" got fully loaded within the given time duration\n")

In [9]:
def scrape_data(url):
    driver = uc.Chrome()
    driver.maximize_window()
    wait = WebDriverWait(driver, 10)
    
    # accessing the website
    driver.get(url)
    wait_for_page_to_load(driver, wait)
    time.sleep(3)
    
    
    # Going to the bottom of the page by scrolling infinitely
    while True:
        time.sleep(1)
        rows = driver.find_elements(By.CSS_SELECTOR, ".sm-product.has-tag.has-features.has-actions")
        if len(rows) >= 1000:
            break
        try:
            # wait for the load button to be present
            load_more_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "sm-load-more"))
            )
    
            # wait for the load button to be clickable
            WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable(load_more_button)
            )
            
        except:
            print("Load more option is not clickable anymore. We have navigated to the most bottom of the list")
            break
        else:
            # scroll to it to ensure it's in the viewport
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
        
            # click via JavaScript to avoid any overlay/hidden issues
            driver.execute_script("arguments[0].click();", load_more_button)
    
    data = []
    rows = driver.find_elements(By.CSS_SELECTOR, ".sm-product.has-tag.has-features.has-actions")
    for row in rows:
        # name and link
        try:
            element = row.find_element(By.CSS_SELECTOR, "a.name.clamp-2")
            name = element.text
            link = element.get_attribute("href")
        except:
            name = 'NA'
            link = 'NA'
            
        # price
        try:
            price = row.find_element(By.CLASS_NAME, "price").text
        except:
            price = np.nan
    
        # rating
        try:
            rating = row.find_element(By.CSS_SELECTOR, "span.sm-rating").get_attribute("style")
        except:
            rating = np.nan
    
        # spec_score
        try:
            spec_score = row.find_element(By.CSS_SELECTOR, "div.score > b").text
        except:
            spec_score = np.nan
    
        # rest of the deatils
        details = row.find_elements(By.TAG_NAME, "li")
    
        # connectivity_details
        try:
            connectivity_details = details[0].text
        except:
            connectivity_details = np.nan
    
        # processor_details
        try:
            processor_details = details[1].text
            processor_details = re.sub(r'\s+', ' ', processor_details).strip()
        except:
            processor_details = np.nan
    
        # memory_details
        try:
            memory_details = details[2].text
            memory_details = re.sub(r'\s+', ' ', memory_details).strip()
        except:
            memory_details = np.nan
    
        # battery_details
        try:
            battery_details = details[3].text
            battery_details = re.sub(r'\s+', ' ', battery_details).strip()
        except:
            battery_details = np.nan
    
        # display_details
        try:
            display_details = details[4].text
            display_details = re.sub(r'\s+', ' ', display_details).strip()
        except:
            display_details = np.nan
    
        # camera_details
        try:
            camera_details = details[5].text
            camera_details = re.sub(r'\s+', ' ', camera_details).strip()
        except:
            camera_details = np.nan
    
        # expandable_memory and android_version details
        expandable_memory = np.nan
        android_version = np.nan
    
        try:
            some_detail = details[6].text
            some_detail = re.sub(r'\s+', ' ', some_detail).strip().lower()
            if 'memory' in some_detail:
                expandable_memory = some_detail
            elif 'android' in some_detail:
                android_version = some_detail
        except:
            pass
                
        if len(details) > 7:
            try:
                some_detail = details[7].text
                some_detail = re.sub(r'\s+', ' ', some_detail).strip().lower()
                if 'memory' in some_detail:
                    expandable_memory = some_detail
                elif 'android' in some_detail:
                    android_version = some_detail
            except:
                pass
    
        data.append({
            "name" : name,
            "link" : link,
            "price" : price,
            "rating" : rating,
            "spec_score" : spec_score,
            "connectivity_details" : connectivity_details,
            "processor_details" : processor_details,
            "memory_details" : memory_details,
            "battery_details" : battery_details,
            "display_details" : display_details,
            "camera_details" : camera_details,
            "expandable_memory" : expandable_memory,
            "android_version" : android_version
            
        })
    
    time.sleep(2)
    driver.quit()
    return data

In [10]:
URLs = ["https://www.smartprix.com/mobiles/price-5000_to_10000",
       "https://www.smartprix.com/mobiles/price-10000_to_15000",
       "https://www.smartprix.com/mobiles/price-15000_to_20000",
        "https://www.smartprix.com/mobiles/price-20000_to_30000",
        "https://www.smartprix.com/mobiles/price-above_30000"
       ]

In [11]:
smartphones = []
for url in URLs:
    smartphones.extend(scrape_data(url))
    time.sleep(10)

The page "Mobile Phones Between ₹5,000 and ₹10,000 | Smartprix" got fully loaded within the given time duration

The page "Mobile Phones Between ₹10,000 and ₹15,000 | Smartprix" got fully loaded within the given time duration

The page "Mobile Phones Between ₹15,000 and ₹20,000 | Smartprix" got fully loaded within the given time duration

The page "Mobile Phones Between ₹20,000 and ₹30,000 | Smartprix" got fully loaded within the given time duration

The page "Mobile Phones Above ₹30,000 | Smartprix" got fully loaded within the given time duration



In [12]:
len(smartphones)

5000

In [15]:
smartprix_smartphones = pd.DataFrame(smartphones)

In [17]:
smartprix_smartphones.duplicated().sum()

np.int64(513)

In [18]:
smartprix_smartphones.drop_duplicates(inplace = True)

In [19]:
smartprix_smartphones

Unnamed: 0,name,link,price,rating,spec_score,connectivity_details,processor_details,memory_details,battery_details,display_details,camera_details,expandable_memory,android_version
0,Xiaomi Redmi A5 5G,https://www.smartprix.com/mobiles/xiaomi-redmi...,"₹9,990",--rating: 4.15;,68,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 4 Gen3, Octa Core Processor","4 GB RAM, 128 GB inbuilt",5500 mAh Battery with 33W Fast Charging,"6.78 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 8 MP Front C...,"memory card supported, upto 1 tb",android v14
1,POCO C71,https://www.smartprix.com/mobiles/poco-c71-ppd...,"₹6,499",--rating: 4;,68,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Unisoc T7250, Octa Core, 1.8 GHz Processor","4 GB RAM, 64 GB inbuilt",5200 mAh Battery with 15W Fast Charging,"6.88 inches, 720 x 1640 px, 120 Hz Display wit...",32 MP Dual Rear & 8 MP Front Camera,"memory card supported, upto 2 tb",android v15
2,Samsung Galaxy M06 5G,https://www.smartprix.com/mobiles/samsung-gala...,"₹9,199",--rating: 4;,73,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 6300, Octa Core, 2.4 GHz Processor","4 GB RAM, 128 GB inbuilt",5000 mAh Battery with 25W Fast Charging,"6.74 inches, 720 x 1600 px, 90 Hz Display with...",50 MP + 2 MP Dual Rear & 8 MP Front Camera,"memory card (hybrid), upto 1.5 tb",android v15
3,Realme C75,https://www.smartprix.com/mobiles/realme-c75-p...,"₹9,990",--rating: 4.05;,78,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi, NFC","Helio G92 Max, Octa Core, 2 GHz Processor","8 GB RAM, 128 GB inbuilt",6000 mAh Battery with 45W Fast Charging,"6.72 inches, 1080 x 2400 px Display with Punch...",50 MP Dual Rear & 8 MP Front Camera,memory card supported,android v14
4,Vivo Y19e,https://www.smartprix.com/mobiles/vivo-y19e-pp...,"₹7,999",--rating: 4.25;,65,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Unisoc T7225, Octa Core, 1.8 GHz Processor","4 GB RAM, 64 GB inbuilt",5500 mAh Battery with 15W Fast Charging,"6.74 inches, 720 x 1600 px Display with Water ...",13 MP + 0.08 MP Dual Rear & 5 MP Front Camera,memory card supported,android v14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,Nubia Red Magic 7S,https://www.smartprix.com/mobiles/nubia-red-ma...,"₹46,990",--rating: 4.35;,83,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","8 GB RAM, 128 GB inbuilt",4500 mAh Battery with 120W Fast Charging,"6.8 inches, 1080 x 2400 px, 165 Hz Display",64 MP + 8 MP + 2 MP Triple Rear & 8 MP Front C...,,android v12
4996,Motorola Moto X30 Pro,https://www.smartprix.com/mobiles/motorola-mot...,"₹41,990",--rating: 4.75;,91,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","8 GB RAM, 128 GB inbuilt",4450 mAh Battery with 125W Fast Charging,"6.67 inches, 1080 x 2400 px, 144 Hz Display wi...",200 MP + 50 MP + 12 MP Triple Rear & 60 MP Fro...,,android v12
4997,Nubia Red Magic 7S Pro,https://www.smartprix.com/mobiles/nubia-red-ma...,"₹60,990",--rating: 4.25;,86,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 135W Fast Charging,"6.8 inches, 1080 x 2400 px, 120 Hz Display",64 MP + 8 MP + 2 MP Triple Rear & 8 MP Front C...,,android v12
4998,Lenovo Legion Y70,https://www.smartprix.com/mobiles/lenovo-legio...,"₹34,990",--rating: 4.15;,84,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","8 GB RAM, 128 GB inbuilt",5100 mAh Battery with 68W Fast Charging,"6.67 inches, 1080 x 2400 px, 144 Hz Display wi...",50 MP + 13 MP + 2 MP Triple Rear & 16 MP Front...,,android v12


In [23]:
smartprix_smartphones = smartprix_smartphones.sample(frac = 1, random_state = 42).reset_index(drop = True)

In [24]:
smartprix_smartphones.head()

Unnamed: 0,name,link,price,rating,spec_score,connectivity_details,processor_details,memory_details,battery_details,display_details,camera_details,expandable_memory,android_version
0,Samsung Galaxy A01 Core,https://www.smartprix.com/mobiles/samsung-gala...,"₹4,999",--rating: 4.1;,50,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","(28 nm), Quad Core, 1.5 GHz Processor","1 GB RAM, 16 GB inbuilt",3000 mAh Battery,"5.3 inches, 720 x 1480 px Display",8 MP Rear & 5 MP Front Camera,memory card supported,android v10
1,Doogee S119,https://www.smartprix.com/mobiles/doogee-s119-...,"₹29,999",--rating: 4.4;,89,"Dual Sim, 3G, 4G, Wi-Fi, NFC","Helio MT8788, Octa Core, 2 GHz Processor","8 GB RAM, 512 GB inbuilt",10200 mAh Battery with 33W Fast Charging,"6.72 inches, 1080 x 2400 px Display",Dual Display,memory card supported,
2,Realme Narzo 80x 5G (8GB RAM + 128GB),https://www.smartprix.com/mobiles/realme-narzo...,"₹14,499",--rating: 4.05;,80,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 6400, Octa Core, 2.5 GHz Processor","8 GB RAM, 128 GB inbuilt",6000 mAh Battery with 45W Fast Charging,"6.72 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 2 MP Dual Rear & 8 MP Front Camera,memory card (hybrid),android v15
3,OnePlus 5,https://www.smartprix.com/mobiles/oneplus-5-p1...,"₹22,999",--rating: 4.4;,74,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi, NFC","Snapdragon 835, Octa Core, 2.45 GHz Processor","8 GB RAM, 128 GB inbuilt",3300 mAh Battery,"5.5 inches, 1080 x 1920 px Display",20 MP + 16 MP Dual Rear & 16 MP Front Camera,memory card not supported,android v7.1.1 (nougat)
4,Samsung Galaxy J2 Pro (2018),https://www.smartprix.com/mobiles/samsung-gala...,"₹7,999",--rating: 4.1;,49,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Snapdragon 425, Quad Core, 1.4 GHz Processor","1.5 GB RAM, 16 GB inbuilt",2600 mAh Battery,"5 inches, 540 x 960 px Display",8 MP Rear & 5 MP Front Camera,memory card supported,android v7.1 (nougat)


In [25]:
smartprix_smartphones.shape

(4487, 13)

In [28]:
smartprix_smartphones.to_csv("smartprix_smartphones.csv", index = False)