## Flipkart Web Scrapper

In [1]:
import csv
from bs4 import BeautifulSoup
from selenium import webdriver


def get_url_flipkart(search_term):
    #Generate a url from search term
    template = 'https://www.flipkart.com/search?q={}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off'
    search_term = search_term.replace(' ','+')
    
    # add term query to url
    url = template.format(search_term)
    
    # add page query placeholder
    url += '&sort=price_asc&page={}'
    
    return url


def extract_record_flipkart(item):
    #Extract and return data from a single record
    
    # description and url
    atag = item.find('div',attrs={'class':'_4rR01T'})
    description = atag.text.strip()
    url = 'https://www.flipkart.com' + item.find('a')['href']
    
    try:
        # price
        price_parent = item.find('div',attrs={'class':'_30jeq3 _1_WHN1'})
        price = price_parent.text
    except AttributeError:
        return
    
    try:
        # rank and rating
        rating = item.find('div',attrs={'class':'_3LWZlK'}).text
        review_count = item.find('span',{'class':'_2_R_DZ'}).text
    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result


def main_flipkart(search_term):
    '''Run main program routine'''
    # startup the webdriver
    driver = webdriver.Chrome()
    
    records = []
    url = get_url_flipkart(search_term)
    
    for page in range(1,21):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div',{'class':'_2kHMtA'})
        
        for item in results:
            record = extract_record_flipkart(item)
            if record:
                records.append(record)
                
    driver.close()
    
    # save the data to csv file
    with open('result.csv', 'a', newline = '', encoding = 'utf-8') as f:
        writer = csv.writer(f,delimiter = '|')
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)

## Amazon Web Scrapper

In [2]:
import csv
from bs4 import BeautifulSoup
from selenium import webdriver


def get_url_amazon(search_term):
    #Generate a url from search term
    template = 'https://www.amazon.in/s?k={}&crid=1TC4WK4BF59QM&sprefix=ultrawide+monitor%2Caps%2C364&ref=nb_sb_noss_1'
    search_term = search_term.replace(' ','+')
    
    # add term query to url
    url = template.format(search_term)
    
    # add page query placeholder
    url += '&s=price-asc-rank&page={}'
    
    return url


def extract_record_amazon(item):
    #Extract and return data from a single record
    
    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = 'https://www.amazon.in' + atag.get('href')
    
    try:
        # price
        price_parent = item.find('span','a-price')
        price = price_parent.find('span','a-offscreen').text
    except AttributeError:
        return
    
    try:
        # rank and rating
        rating = item.find('span',{'class':'a-icon-alt'}).text[0:3]
        review_count = item.find('span',{'class':'a-size-base puis-light-weight-text s-link-centralized-style'}).text
    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result


def main_amazon(search_term):
    '''Run main program routine'''
    # startup the webdriver
    driver = webdriver.Chrome()
    
    records = []
    url = get_url_amazon(search_term)
    
    for page in range(1,21):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div',{'data-component-type': 's-search-result'})
        
        for item in results:
            record = extract_record_amazon(item)
            if record:
                records.append(record)
                
    driver.close()
    
    # save the data to csv file
    with open('result.csv', 'w', newline = '', encoding = 'utf-8') as f:
        writer = csv.writer(f,delimiter = '|')
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)

## GUI Framework

In [None]:
from tkinter import *
from tkinter import ttk
import csv

main_window = Tk()
main_window.title("Web Scrapper")
# Labels
Label(main_window, text = "Amazon").grid(row = 0, column = 0)
amazon = Entry(main_window, width = 50, borderwidth = 5)
amazon.grid(row = 0, column = 1)
Label(main_window, text = "Flipkart").grid(row = 0, column = 2)
flipkart = Entry(main_window, width = 50, borderwidth = 5)
flipkart.grid(row = 0, column = 3)

my_tree = ttk.Treeview(main_window)

# define our columns
my_tree['columns'] = ('Description','Price','Rating','ReviewCount','Url')

# formate our columns
my_tree.column('#0', width=0, stretch=NO)
my_tree.column('Description', anchor = W, width = 120)
my_tree.column('Price', anchor = CENTER, width=80)
my_tree.column('Rating', anchor = CENTER, width=80)
my_tree.column('ReviewCount', anchor = W, width=120)
my_tree.column('Url', anchor = W, width=120)


# create headings
my_tree.heading('#0', text="", anchor = W)
my_tree.heading('Description', text="Description", anchor = W)
my_tree.heading('Price', text="Price", anchor = CENTER)
my_tree.heading('Rating', text="Rating", anchor = W)
my_tree.heading('ReviewCount', text="ReviewCount", anchor = W)
my_tree.heading('Url', text="Url", anchor = W)

def on_click():
    am = amazon.get()
    fl = flipkart.get()
    if am:
        main_amazon(am)
    if fl:
        main_flipkart(fl)
    lis = []
    with open("C:\\Users\\user\\Documents\\Project\\result.csv", 'r',encoding = 'utf-8') as file:
        csvreader = csv.reader(file,delimiter='|')
        for row in csvreader:
            lis.append(row)
        
    count = 0
    for record in lis:
        my_tree.insert(parent='', index = 'end',iid=count,text="",values=(record[0],record[1],record[2],record[3],record[4]))
        count += 1
    my_tree.grid(row=2, column=1, rowspan=10)
    
#Buttons
Button(main_window, text = 'Search', command = on_click).grid(row = 1, column = 1)
Button(main_window, text = 'Search', command = on_click).grid(row = 1, column = 3)



main_window.mainloop()