# Selenium and BeautifulSoup

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import csv

In [2]:
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

In [3]:
def generate_url(search_term):
    base_template = "https://www.flipkart.com/search?q={}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    return base_template.format(search_term)

### Creating Chrome Driver

In [4]:
def create_driver()-> webdriver.Chrome:
    options = Options()
    options.add_argument("--headless") #No GUI, run on background
    driver = webdriver.Chrome(options=options)
    return driver

## Generate File Name

In [5]:
def generate_filename(search_item):
    filename = '_'.join(search_item.split(' '))
    return filename +'.csv'

## Collect the Products List from Current Page

In [6]:
def collect_products_list(driver):
    products = driver.find_elements(By.CLASS_NAME, "tUxRFH")
    return products

### Collect info from specific products

In [7]:
def extract_product_data(product):
    title = product.find_element(By.CLASS_NAME, "KzDlHZ").text
        
    # EXTRACTING PRICE
    price = product.find_element(By.XPATH, ".//div[@class='Nx9bqj _4b5DiR']").text
        
    # EXTRACTING REVIEW
    try:
        review = product.find_element(By.CLASS_NAME, "XQDdHH").text
    except:
        review="NULL"
        
    # EXTRACTING LINK
    link = product.find_element(By.CLASS_NAME, "CGtC98").get_attribute('href')

    return title, price, review, link
    

### Import data into CSV

In [8]:
def save_data_to_csv(record, filename, new_file=False):
    header = ['Title', 'Price', 'Review', 'Link']
    if new_file:
        with open(filename, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(header)
    else:
        with open(filename, 'a+', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(record)

In [9]:

# header = ['Title', 'Price', 'Review', 'Link']
# with open('laptop_list.csv', 'w', newline='', encoding='utf-8') as f:
#     writer = csv.writer(f)
#     writer.writerow(header)
#     writer.writerow(record)

In [10]:
# for page in range(1,21):
#     base_url = generate_url("laptop") + "&page={}"
#     f_url = base_url.format(page)
    
#     driver.get(f_url)
#     products = driver.find_elements(By.CLASS_NAME, "tUxRFH")
    
#     with open('laptop_list.csv', 'a+', newline='', encoding='utf-8') as f:
#         writer = csv.writer(f)
#         for product in products:
#             # EXTRACTING TITLE/DESCRIPTIONS
#             title = product.find_element(By.CLASS_NAME, "KzDlHZ").text
        
#             # EXTRACTING PRICE
#             price = product.find_element(By.XPATH, ".//div[@class='Nx9bqj _4b5DiR']").text
        
#             # EXTRACTING REVIEW
#             try:
#                 review = product.find_element(By.CLASS_NAME, "XQDdHH").text
#             except:
#                 review="NULL"
        
#             # EXTRACTING LINK
#             link = product.find_element(By.CLASS_NAME, "CGtC98").get_attribute('href')
    
#             writer.writerow([title, price, review, link])
#     driver.quit()

In [13]:
def run(search_item):
    filename = generate_filename(search_item)
    save_data_to_csv(None, filename, new_file = True)
    driver = create_driver()
    for page in range(1,21):
        url = generate_url(search_item)
        driver.get(url)

        # Extract the products info
        products = collect_products_list(driver)
        for product in products:
            record = extract_product_data(product)
            save_data_to_csv(record, filename)
    driver.quit()
        

In [14]:
if __name__ == '__main__':
    item = 'laptop'
    run(item)