In [14]:
# Importing necessary Libraries
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
from tqdm import tqdm
import time

In [15]:
def get_url(search_item, page_number):
    '''
    This function fetches the URL of the item that you want to search
    '''
    template = 'https://www.flipkart.com/search?q={}&otracker=search&otracker1=search&marketplace=FLIPKART&page={}'
    search_item = search_item.replace(" ","+")
    # Add term query to URL
    url = template.format(search_item, page_number)
    return url


In [16]:
def extract_phone_model_info(item):
    """
    This function extracts model, price, ram, storage, stars , number of ratings, number of reviews, 
    storage expandable option, display option, camera quality, battery , processor, warranty of a phone model at flipkart
    """
    # Extracting the model of the phone from the 1st card
    model = item.find('div',{'class':"_4rR01T"}).text
    # Extracting Stars from 1st card
    try:
        star = item.find('div',{'class':"_3LWZlK"}).text
    except AttributeError:
        star = ""
    # Extracting Number of Ratings from 1st card
    num_ratings_elem = item.find('span',{'class':"_2_R_DZ"})
    num_ratings = num_ratings_elem.text.replace('\xa0&\xa0'," ; ")[0:num_ratings_elem.text.replace('\xa0&\xa0'," ; ").find(';')].strip() if num_ratings_elem is not None else ''
    # Extracting Number of Reviews from 1st card
    reviews_elem = item.find('span',{'class':"_2_R_DZ"})
    reviews = reviews_elem.text.replace('\xa0&\xa0'," ; ")[reviews_elem.text.replace('\xa0&\xa0'," ; ").find(';')+1:].strip() if reviews_elem is not None else ''
    # Extracting RAM from the 1st card
    ram = item.find('li',{'class':"rgWa7D"}).text[0:item.find('li',{'class':"rgWa7D"}).text.find('|')]
    # Extracting Storage/ROM from 1st card
    storage = item.find('li',{'class':"rgWa7D"}).text[item.find('li',{'class':"rgWa7D"}).text.find('|')+1:][0:10].strip()
    # Extracting whether there is an option of expanding the storage or not
    expandable = item.find('li',{'class':"rgWa7D"}).text[item.find('li',{'class':"rgWa7D"}).text.find('|')+1:][13:]
    # Extracting the display option from the 1st card
    display = item.find_all('li')[1].text.strip()
    # Extracting camera options from the 1st card
    camera = item.find_all('li')[2].text.strip()
    # Extracting the battery option from the 1st card
    battery = item.find_all('li')[3].text if len(item.find_all('li')) > 3 else ""
    # Extracting the processor option from the 1st card
    processor = item.find_all('li')[4].text.strip() if len(item.find_all('li')) > 4 else ""
    # Extracting Warranty from the 1st card
    warranty = item.find("span", {"class": "warrantyText"})  # Extracting Warranty from the 1st card
    if warranty:
        warranty = warranty.text.strip()
    else:
        warranty = "N/A"

    # Extracting price of the model from the 1st card
    try:
        price = item.find('div',{'class':'_30jeq3 _1_WHN1'}).text
    except AttributeError:
        price = ""
    result = (model, star, num_ratings, reviews, ram, storage, expandable, display, camera, battery, processor, warranty, price)
    return result

In [17]:
def main(search_item):
    '''
    This function will create a dataframe for all the details that we are fetching from all the multiple pages
    '''
    driver = webdriver.Chrome()
    records = []
    models = set()  # create a set to store unique models
    for batch in (range(1, 4)): # iterate in batches of 20 pages
        for page in tqdm(range((batch-1)*20+1, batch*20+1)):
            url = get_url(search_item, page)
            driver.get(url)
            time.sleep(5)  # add a 5-second delay to mimic manual browsing
            soup = BeautifulSoup(driver.page_source,'html.parser')
            results = soup.find_all('a',{'class':"_1fQZEK"})
            for item in results:
                model = item.find('div',{'class':"_4rR01T"}).text
                if model not in models:  # check if model is already extracted
                    records.append(extract_phone_model_info(item))
                    models.add(model)  # add model to set
        # add additional delay between batches to mimic manual browsing behavior
        if batch < 3:
            time.sleep(30)
    driver.close()
    # Saving the data into a csv file
    with open('Flipkart_results.csv','w',newline='',encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Model','Stars','Num_of_Ratings','Reviews','Ram','Storage','Expandable',
                        'Display','Camera','Battery','Processor','Warranty','Price'])
        writer.writerows(records)


In [18]:
%%time
main('mobile phones')

100%|██████████| 20/20 [02:13<00:00,  6.70s/it]
100%|██████████| 20/20 [02:14<00:00,  6.72s/it]
100%|██████████| 20/20 [02:06<00:00,  6.31s/it]


CPU times: total: 5.2 s
Wall time: 7min 37s


In [19]:
mobile_df = pd.read_csv(r"C:\Users\acer\OneDrive\Desktop\AMAZON PRICE PREDICTOR\Flipkart_results.csv")
mobile_df.head()

Unnamed: 0,Model,Stars,Num_of_Ratings,Reviews,Ram,Storage,Expandable,Display,Camera,Battery,Processor,Warranty,Price
0,"SAMSUNG Galaxy F13 (Nightsky Green, 64 GB)",4.4,"1,26,429 Ratings","7,352 Reviews",4 GB RAM,64 GB ROM,Expandable Upto 1 TB,16.76 cm (6.6 inch) Full HD+ Display,50MP + 5MP + 2MP | 8MP Front Camera,6000 mAh Lithium Ion Battery,Exynos 850 Processor,,"₹9,699"
1,"SAMSUNG Galaxy F13 (Sunrise Copper, 64 GB)",4.4,"1,26,429 Ratings","7,352 Reviews",4 GB RAM,64 GB ROM,Expandable Upto 1 TB,16.76 cm (6.6 inch) Full HD+ Display,50MP + 5MP + 2MP | 8MP Front Camera,6000 mAh Lithium Ion Battery,Exynos 850 Processor,,"₹9,699"
2,"POCO C50 (Country Green, 32 GB)",4.4,"3,353 Ratings",160 Reviews,2 GB RAM,32 GB ROM,Expandable Upto 512 GB,16.56 cm (6.52 inch) HD+ Display,8MP Dual Camera | 5MP Front Camera,5000 mAh Lithium Polymer Battery,"Mediatek Helio A22 Processor, Upto 2.0 GHz Pro...",,"₹5,749"
3,"POCO C50 (Royal Blue, 32 GB)",4.4,"3,353 Ratings",160 Reviews,2 GB RAM,32 GB ROM,Expandable Upto 512 GB,16.56 cm (6.52 inch) HD+ Display,8MP Dual Camera | 5MP Front Camera,5000 mAh Lithium Polymer Battery,"Mediatek Helio A22 Processor, Upto 2.0 GHz Pro...",,"₹5,749"
4,"SAMSUNG Galaxy F13 (Waterfall Blue, 64 GB)",4.4,"1,26,429 Ratings","7,352 Reviews",4 GB RAM,64 GB ROM,Expandable Upto 1 TB,16.76 cm (6.6 inch) Full HD+ Display,50MP + 5MP + 2MP | 8MP Front Camera,6000 mAh Lithium Ion Battery,Exynos 850 Processor,,"₹9,699"


In [20]:
mobile_df.Model.unique()

array(['SAMSUNG Galaxy F13 (Nightsky Green, 64 GB)',
       'SAMSUNG Galaxy F13 (Sunrise Copper, 64 GB)',
       'POCO C50 (Country Green, 32 GB)', 'POCO C50 (Royal Blue, 32 GB)',
       'SAMSUNG Galaxy F13 (Waterfall Blue, 64 GB)',
       'REDMI 10 (Caribbean Green, 64 GB)',
       'REDMI 10 (Midnight Black, 64 GB)',
       'REDMI 10 (Pacific Blue, 64 GB)',
       'realme C35 (Glowing Black, 64 GB)',
       'realme C35 (Glowing Green, 64 GB)',
       'SAMSUNG Galaxy F13 (Waterfall Blue, 128 GB)',
       'realme C35 (Glowing Green, 128 GB)',
       'REDMI 10 (Shadow Black, 64 GB)',
       'realme C35 (Glowing Black, 128 GB)',
       'SAMSUNG Galaxy F13 (Sunrise Copper, 128 GB)',
       'Infinix HOT 20 Play (Fantasy Purple, 64 GB)',
       'Infinix HOT 20 Play (Racing Black, 64 GB)',
       'Infinix HOT 20 Play (Aurora Green, 64 GB)',
       'Infinix HOT 20 Play (Luna Blue, 64 GB)',
       'SAMSUNG Galaxy F13 (Nightsky Green, 128 GB)',
       'SAMSUNG Galaxy F23 5G (Forest Green, 128 GB

In [21]:
mobile_df.shape

(702, 13)