# Web Scraping Using Selenium

In [1]:
!pip install selenium



In [2]:
# Importing all the required libraries
import selenium
import pandas as pd
from selenium import webdriver
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException

### Q1: Write a python program to scrape data for “Data Analyst” Job position in “Bangalore” location. You have to scrape the job-title, job-location, company_name,experience_required. You have to scrape first 10 jobs data.

In [3]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [4]:
# Getting the webpage of mentioned url
url = 'https://www.naukri.com/'
driver.get(url)

In [5]:
# Searching the web element for job title
search_job = driver.find_element_by_id('qsb-keyword-sugg')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="4abfca05b45cd1d2056a3030d11bce9a", element="c6edf9d0-cd37-42ee-8c3b-fd9933ada951")>

In [6]:
# Entering “Data Analyst” in “Skill,Designations,Companies” field 
search_job.send_keys('Data Analyst')

In [7]:
# Searching the web element for job location
search_location = driver.find_element_by_id('qsb-location-sugg')
search_location

<selenium.webdriver.remote.webelement.WebElement (session="4abfca05b45cd1d2056a3030d11bce9a", element="59a885db-b6df-4f43-9c58-e3ef602314e6")>

In [8]:
# Entering "Bangalore" in “enter the location” field
search_location.send_keys('Bangalore')

In [9]:
# Clicking on Search button
search_btn = driver.find_element_by_xpath("/html/body/div[1]/div[3]/div[2]/section/div/form/div[3]/button")
search_btn.click()

In [10]:
# Fetching all URLs
url_tag = driver.find_elements_by_xpath("//a[@class='title fw500 ellipsis']")

urls=[]   #empty list
for i in url_tag:
    urls.append(i.get_attribute("href"))
urls

['https://www.naukri.com/job-listings-data-analyst-rapido-bike-taxi-bangalore-bengaluru-2-to-4-years-290721501591?src=jobsearchDesk&sid=16287834287142920&xp=1&px=1',
 'https://www.naukri.com/job-listings-process-data-analyst-hitachi-abb-power-grids-bangalore-bengaluru-2-to-6-years-280521501722?src=jobsearchDesk&sid=16287834287142920&xp=2&px=1',
 'https://www.naukri.com/job-listings-data-analyst-innovsource-services-private-limited-bangalore-bengaluru-1-to-6-years-290721002866?src=jobsearchDesk&sid=16287834287142920&xp=3&px=1',
 'https://www.naukri.com/job-listings-data-analyst-icf-next-bangalore-bengaluru-1-to-2-years-220721901048?src=jobsearchDesk&sid=16287834287142920&xp=4&px=1',
 'https://www.naukri.com/job-listings-data-analyst-ii-customer-support-vimeo-technologies-private-limited-bangalore-bengaluru-1-to-5-years-210621500212?src=jobsearchDesk&sid=16287834287142920&xp=5&px=1',
 'https://www.naukri.com/job-listings-product-data-analyst-sql-tableau-qlikview-connexions-bangalore-beng

In [11]:
# Extracting the data for the first 10 jobs results
# Scraping data for job titles
job_titles = []   # empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        title_tag = driver.find_element_by_xpath("//h1[@class='jd-header-title']")
        job_titles.append(title_tag.text)
    except:
        job_titles.append('-')
job_titles = job_titles[:10]
job_titles

['Data Analyst',
 'Process Data Analyst',
 'Data Analyst',
 'Data Analyst',
 'Data Analyst II - Customer Support',
 'Product/Data Analyst - SQL/Tableau/Qlikview',
 'Data Analyst _ 2-5 Years _ Bangalore',
 'Data Analyst',
 'Data Analyst',
 'Data Analyst - Transportation']

In [12]:
# Scraping data for job location 
job_location = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        location_tag = driver.find_element_by_xpath("//span[@class='location ']//a")
        job_location.append(location_tag.text)
    except:
            job_location.append('-')
job_location = job_location[:10]
job_location

['Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru']

In [13]:
# Scraping data for companies name
company_name = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        company_tag = driver.find_element_by_xpath("//a[@class='pad-rt-8']")
        company_name.append(company_tag.text)
    except:
        company_name.append('-')
company_name = company_name[:10]
company_name

['Rapido -bike taxi',
 'Hitachi ABB Power Grids',
 'Innovsource Services Private Limited',
 'ICF Next',
 'VIMEO TECHNOLOGIES PRIVATE LIMITED',
 'Connexions',
 'Teamware Solutions ( A division of Quantum Leap Co nsulting Private LTD).',
 'Bion',
 'CAREERLABS TECHNOLOGIES PRIVATE LIMITED',
 'Gojek Tech']

In [14]:
# Scraping data for experience required
experience_req = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        experience_tag = driver.find_element_by_xpath("//div[@class='exp']/span")
        experience_req.append(experience_tag.text)
    except:
        experience_req.append('-')
experience_req = experience_req[:10]
experience_req

['2 - 4 years',
 '2 - 6 years',
 '1 - 6 years',
 '1 - 2 years',
 '1 - 5 years',
 '1 - 4 years',
 '2 - 5 years',
 '1 - 3 years',
 '0 - 3 years',
 '2 - 5 years']

In [15]:
# Checking length of the collected data
print(len(job_titles),len(job_location),len(company_name),len(experience_req))

10 10 10 10


In [16]:
# Creating the DataFrame for the scraped data

jobs = pd.DataFrame({})
jobs["Job Title"] = job_titles
jobs["Job Location"] = job_location
jobs["Company Name"] = company_name
jobs["Experience Required"] = experience_req
jobs

Unnamed: 0,Job Title,Job Location,Company Name,Experience Required
0,Data Analyst,Bangalore/Bengaluru,Rapido -bike taxi,2 - 4 years
1,Process Data Analyst,Bangalore/Bengaluru,Hitachi ABB Power Grids,2 - 6 years
2,Data Analyst,Bangalore/Bengaluru,Innovsource Services Private Limited,1 - 6 years
3,Data Analyst,Bangalore/Bengaluru,ICF Next,1 - 2 years
4,Data Analyst II - Customer Support,Bangalore/Bengaluru,VIMEO TECHNOLOGIES PRIVATE LIMITED,1 - 5 years
5,Product/Data Analyst - SQL/Tableau/Qlikview,Bangalore/Bengaluru,Connexions,1 - 4 years
6,Data Analyst _ 2-5 Years _ Bangalore,Bangalore/Bengaluru,Teamware Solutions ( A division of Quantum Lea...,2 - 5 years
7,Data Analyst,Bangalore/Bengaluru,Bion,1 - 3 years
8,Data Analyst,Bangalore/Bengaluru,CAREERLABS TECHNOLOGIES PRIVATE LIMITED,0 - 3 years
9,Data Analyst - Transportation,Bangalore/Bengaluru,Gojek Tech,2 - 5 years


### Q2: Write a python program to scrape data for “Data Scientist” Job position in “Bangalore” location. You have to scrape the job-title, job-location,company_name, full job-description. You have to scrape first 10 jobs data.

In [97]:
driver = webdriver.Chrome("chromedriver.exe")

In [98]:
# Getting the webpage of mentioned url
url = 'https://www.naukri.com/'
driver.get(url)

In [99]:
# Searching the web element for job title
search_job = driver.find_element_by_id('qsb-keyword-sugg')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="4d4c39db14b87b3da1aa314401764db3", element="ce23afa0-16ce-449b-aa18-e947bcca6295")>

In [100]:
# Entering “Data Scientist” in “Skill,Designations,Companies” field 
search_job.send_keys('Data Scientist')

In [101]:
# Searching the web element for job location
search_location = driver.find_element_by_id('qsb-location-sugg')
search_location

<selenium.webdriver.remote.webelement.WebElement (session="4d4c39db14b87b3da1aa314401764db3", element="140f621f-fe93-46d5-8a68-7a4bc18c3e77")>

In [102]:
# Entering "Bangalore" in “enter the location” field
search_location.send_keys('Bangalore')

In [103]:
# Clicking on Search button
search_btn = driver.find_element_by_xpath("/html/body/div[1]/div[3]/div[2]/section/div/form/div[3]/button")
search_btn.click()

In [104]:
# Fetching all URLs
url_tag = driver.find_elements_by_xpath("//a[@class='title fw500 ellipsis']")

urls=[]   #empty list
for i in url_tag:
    urls.append(i.get_attribute("href"))

In [105]:
# Extracting the data for the first 10 jobs results
# Scraping data for job titles
job_titles = []   # empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        title_tag = driver.find_element_by_xpath("//h1[@class='jd-header-title']")
        job_titles.append(title_tag.text)
    except:
        job_titles.append('-')
job_titles = job_titles[:10]
job_titles

['Data Scientist- Senior Business Analyst/Lead Analyst',
 '-',
 'Senior/ Lead Data Scientist',
 '-',
 '-',
 'Senior Data Scientist (Analytics),',
 '-',
 'Senior Data Scientist',
 'Senior Data Scientist',
 'Senior Data Scientist']

In [21]:
# Scraping data for job location 
job_location = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        location_tag = driver.find_element_by_xpath("//span[@class='location ']//a")
        job_location.append(location_tag.text)
    except:
            job_location.append('-')
job_location = job_location[:10]
job_location

['Gurgaon/Gurugram',
 '-',
 'Bangalore/Bengaluru',
 '-',
 '-',
 'Bangalore/Bengaluru',
 '-',
 'Chennai',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru']

In [22]:
# Scraping data for companies name
company_name = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        company_tag = driver.find_element_by_xpath("//a[@class='pad-rt-8']")
        company_name.append(company_tag.text)
    except:
        company_name.append('-')
company_name = company_name[:10]
company_name

['Evalueserve.com Pvt. Ltd',
 '-',
 'Superior Group',
 '-',
 '-',
 'Luxoft',
 '-',
 'Tiger Analytics India LLP',
 'Signify',
 'Cognizer India Private Limited']

In [26]:
# Scraping data for job discription
job_description = []  #empty list
for i in urls[:10]:
    driver.get(i)
    time.sleep(5)
    try:
        description_tag = driver.find_element_by_xpath("//section[@class='job-desc']")
        job_description.append(description_tag.text)
    except:
            job_description.append("-")

In [27]:
# Checking length of the scraped data
print(len(job_titles),len(job_location),len(company_name),len(job_description))

10 10 10 10


In [28]:
# Creating DataFrame for scraped data
jobs = pd.DataFrame({})
jobs["Job Title"] = job_titles
jobs["Job Location"] = job_location
jobs["company Name"] = company_name
jobs["Job Description"] = job_description
jobs

Unnamed: 0,Job Title,Job Location,company Name,Job Description
0,Data Scientist- Senior Business Analyst/Lead A...,Gurgaon/Gurugram,Evalueserve.com Pvt. Ltd,Job description\nJob Description\nUnderstand a...
1,-,-,-,-
2,Senior/ Lead Data Scientist,Bangalore/Bengaluru,Superior Group,Job description\nProvide advanced analytical c...
3,-,-,-,-
4,-,-,-,-
5,"Senior Data Scientist (Analytics),",Bangalore/Bengaluru,Luxoft,Job description\n\n Data and Analytics Propos...
6,-,-,-,-
7,Senior Data Scientist,Chennai,Tiger Analytics India LLP,Job description\nRoles and Responsibilities\nT...
8,Senior Data Scientist,Bangalore/Bengaluru,Signify,"Job description\nWhat you ll do\nResearch, des..."
9,Senior Data Scientist,Bangalore/Bengaluru,Cognizer India Private Limited,Job description\n\nRoles and Responsibilities\...


### Q3:Scraping the data using filters available on the webpage
- **Use the location and salary filter.**
- **Scrape data for “Data Scientist” designation for first 10 job results.**
- **Scrape the job-title, job-location, company_name,experience_required.**

In [29]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [30]:
# Getting the webpage of mentioned url
url = 'https://www.naukri.com/'
driver.get(url)

In [31]:
# Searching the web element for job title
search_job = driver.find_element_by_id('qsb-keyword-sugg')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="d112399884b581e6c721e35220756569", element="32b51f08-5cbe-48eb-ba83-17b6fdfde273")>

In [32]:
# Entering “Data Scientist” in “Skill,Designations,Companies” field 
search_job.send_keys('Data Scientist')

In [33]:
# Clicking on Search button
search_btn = driver.find_element_by_xpath("//button[@class='btn']")
search_btn.click()

In [34]:
# Applying location filter for the location "Delhi/NCR"
location_filter = driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section[1]/div[2]/div[3]/div[2]/div[3]/label/p/span[1]')
location_filter.click()

In [35]:
# Applying salary filter 3-6 lakhs
salary_filter = driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section[1]/div[2]/div[4]/div[2]/div[2]/label/p/span[1]')
salary_filter.click()

In [36]:
# Fetching all URLs
url_tag = driver.find_elements_by_xpath("//a[@class='title fw500 ellipsis']")

urls=[]   #empty list
for i in url_tag:
    urls.append(i.get_attribute("href"))

In [37]:
# Extracting the data for “Data Scientist” designation for first 10 job results.
# Scraping data for job titles
job_titles = []   # empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        title_tag = driver.find_element_by_xpath("//h1[@class='jd-header-title']")
        job_titles.append(title_tag.text)
    except:
        job_titles.append('-')
job_titles = job_titles[:10]
job_titles

['Data Scientist Internship',
 'Data Scientist',
 'Immediate Openings For DATA Scientist with 6 To 7 yrs of Experience',
 'Data Scientist',
 'Data Scientist / Sr. Data Scientist',
 'Only Fresher / Data Scientist / Data Analyst / Analytics - MNC Jobs',
 'Senior Data Scientist - Noida',
 'Data Scientist',
 'Data Scientist',
 '-']

In [38]:
# Scraping data for job location 
job_location = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        location_tag = driver.find_element_by_xpath("//span[@class='location ']/a")
        job_location.append(location_tag.text)
    except:
            job_location.append('-')
job_location = job_location[:10]
job_location

['New Delhi',
 'Gurgaon/Gurugram',
 'Kolkata',
 'Noida',
 'Noida',
 'Noida',
 'Noida',
 'Gurgaon/Gurugram',
 'Mumbai',
 '-']

In [39]:
# Scraping data for companies name
company_name = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        company_tag = driver.find_element_by_xpath("//a[@class='pad-rt-8']")
        company_name.append(company_tag.text)
    except:
        company_name.append('-')
company_name = company_name[:10]
company_name

['iHackers Inc',
 'CBRE South Asia Pvt Ltd',
 'Entune IT Consulting Private Limited',
 'Agreeya',
 'WEGARNER SOLUTIONS PRIVATE LIMITED',
 'GABA Consultancy services',
 'Optum Global Solutions (India) Private Limited',
 'Decimal Technologies Pvt Ltd.',
 'Fractal Analytics',
 '-']

In [40]:
# Scraping data for experience required
experience_req = []   #empty list
for i in urls:
    driver.get(i)
    time.sleep(5)
    try:
        experience_tag = driver.find_element_by_xpath("//div[@class='exp']/span")
        experience_req.append(experience_tag.text)
    except:
        experience_req.append('-')
experience_req = experience_req[:10]
experience_req

['0 - 1 years',
 '2 - 4 years',
 '5 - 8 years',
 '3 - 6 years',
 '0 - 5 years',
 '0 years',
 '2 - 6 years',
 '1 - 3 years',
 '3 - 7 years',
 '-']

In [41]:
# Checking the length of scraped data
print(len(job_titles),len(job_location),len(company_name),len(experience_req))

10 10 10 10


In [42]:
# Creating DataFrame for the scraped data
jobs = pd.DataFrame({})
jobs["Job Title"] = job_titles
jobs["Job Location"] = job_location
jobs["company Name"] = company_name
jobs["Experience Required"] = experience_req
jobs

Unnamed: 0,Job Title,Job Location,company Name,Experience Required
0,Data Scientist Internship,New Delhi,iHackers Inc,0 - 1 years
1,Data Scientist,Gurgaon/Gurugram,CBRE South Asia Pvt Ltd,2 - 4 years
2,Immediate Openings For DATA Scientist with 6 T...,Kolkata,Entune IT Consulting Private Limited,5 - 8 years
3,Data Scientist,Noida,Agreeya,3 - 6 years
4,Data Scientist / Sr. Data Scientist,Noida,WEGARNER SOLUTIONS PRIVATE LIMITED,0 - 5 years
5,Only Fresher / Data Scientist / Data Analyst /...,Noida,GABA Consultancy services,0 years
6,Senior Data Scientist - Noida,Noida,Optum Global Solutions (India) Private Limited,2 - 6 years
7,Data Scientist,Gurgaon/Gurugram,Decimal Technologies Pvt Ltd.,1 - 3 years
8,Data Scientist,Mumbai,Fractal Analytics,3 - 7 years
9,-,-,-,-


### Q4: Write a python program to scrape data for first 10 job results for Data scientist Designation in Noida location. You have to scrape company_name, No. of days ago when job was posted, Rating of the company.

In [53]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [35]:
# Getting the webpage of mentioned url
url = 'https://www.glassdoor.co.in/index.htm'
driver.get(url)

In [36]:
#Searching web elemnt for job title
search_job = driver.find_element_by_id('scKeyword')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="5472d449150eef19d255ab17b0b5ce30", element="95dc920e-f940-4399-a0c5-067a7a41261f")>

In [37]:
# Entering “Data Scientist” in “Job Title,Keyword,Company” field
search_job.send_keys('Data Scientist')

In [38]:
# Searching the web element for job location
search_location = driver.find_element_by_id('scLocation')
search_location

<selenium.webdriver.remote.webelement.WebElement (session="5472d449150eef19d255ab17b0b5ce30", element="71a970a1-ed53-4030-9e6e-1cc5a8be114f")>

In [41]:
# Entering “Noida” in “location” field
search_location.send_keys('Noida')

In [42]:
# Clicking on Search button
search_btn = driver.find_element_by_xpath("//button[@class='pl-0 pr-xsm SearchStyles__searchKeywordSubmit']")
search_btn.click()

In [43]:
# Scraping data for first 10 jobs

# Scraping data for company name
company_tag = driver.find_elements_by_xpath("//div[@class='d-flex justify-content-between align-items-start']")

company_name = []   #empty list
for i in company_tag:
    company_name.append(i.text)
company_name = company_name[:10]
company_name

['Liberin Technologies Private Limited',
 'Pixel Vision',
 'Newgen Software',
 'Crowe',
 'Salasar New Age Technologies',
 'NatWest Group',
 'Ericsson',
 'Techlive',
 'Biz2Credit Inc',
 'Uncodemy']

In [46]:
# Scraping data for No of days ago when job was posted
job_posted = driver.find_elements_by_xpath("//div[@class='d-flex align-items-end pl-std css-mi55ob']")

No_of_Days = []  #empty list
for i in job_posted:
    No_of_Days.append(i.text)
No_of_Days = No_of_Days[:10]
No_of_Days

['9d', '19d', '20d', '30d+', '30d+', '2d', '4d', '30d+', '30d+', '24h']

In [48]:
# Scraping data for rating of the company
company_rating = driver.find_elements_by_xpath("//span[@class='css-19pjha7 e1cjmv6j1']")

Ratings = []  #empty list
for i in company_rating:
    Ratings.append(i.text)
Ratings = Ratings[:10]
Ratings

['3.3', '3.8', '3.9', '4.1', '5.0', '4.1', '3.8', '3.8', '3.1', '3.7']

In [50]:
# Checking length of scraped data
print(len(company_name),len(No_of_Days),len(Ratings))

10 10 10


In [53]:
# Creating DataFrame for scraped data
glass_door_jobs = pd.DataFrame({})
glass_door_jobs["Company Name"] = company_name
glass_door_jobs["No of Days Ago"] = No_of_Days
glass_door_jobs["Company Ratings"] = Ratings
glass_door_jobs

Unnamed: 0,Company Name,No of Days Ago,Company Ratings
0,Liberin Technologies Private Limited,9d,3.3
1,Pixel Vision,19d,3.8
2,Newgen Software,20d,3.9
3,Crowe,30d+,4.1
4,Salasar New Age Technologies,30d+,5.0
5,NatWest Group,2d,4.1
6,Ericsson,4d,3.8
7,Techlive,30d+,3.8
8,Biz2Credit Inc,30d+,3.1
9,Uncodemy,24h,3.7


### Q5:Write a python program to scrape the salary data for Data Scientist designation in Noida location.

### You have to scrape Company name, Number of salaries, Average salary, Minsalary, Max Salary and ratings of the company

In [17]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [18]:
# Getting the webpage of mentioned url
url = 'https://www.glassdoor.co.in/Salaries/index.htm'
driver.get(url)

In [19]:
#Searching web elemnt for job title
search_job = driver.find_element_by_id('scKeyword')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="180a114d09bb9d193308d1a8342ab92b", element="aed00610-15c9-4dc5-ad4b-f0c7c9668509")>

In [20]:
# Entering “Data Scientist” in “Job Title,Keyword,Company” field
search_job.send_keys('Data Scientist')

In [21]:
# Searching the web element for job location
search_location = driver.find_element_by_id('scLocation')
search_location

<selenium.webdriver.remote.webelement.WebElement (session="180a114d09bb9d193308d1a8342ab92b", element="2b7d1316-ebbe-437b-9db7-58d1011b296c")>

In [22]:
# Entering “Noida” in “location” field
search_location.send_keys('Noida')

In [23]:
# Clicking on Search button
search_btn = driver.find_element_by_xpath("//button[@class='pl-0 pr-xsm SearchStyles__searchKeywordSubmit']")

search_btn.click()

In [27]:
# Scraping data for first 10 companies

company_names=[]
companies = driver.find_elements_by_xpath("//h3[@class='m-0 css-g261rn']/a")
for i in companies:
    company_names.append(i.text)
company_names = company_names[:10]
company_names

['Tata Consultancy Services',
 'IBM',
 'Accenture',
 'Delhivery',
 'Ericsson-Worldwide',
 'UnitedHealth Group',
 'Valiance Solutions',
 'EXL Service',
 'Optum',
 'Optum Global Solutions']

In [29]:
# Scraping data for number of salaries
salary = driver.find_elements_by_xpath("//span[@class='m-0 css-1b6bxoo']")

No_of_salaries = []   #empty list
for i in salary:
    No_of_salaries.append(i.text)
No_of_salaries = No_of_salaries[:10]
No_of_salaries

['18 salaries',
 '18 salaries',
 '15 salaries',
 '15 salaries',
 '14 salaries',
 '14 salaries',
 '10 salaries',
 '9 salaries',
 '9 salaries',
 '9 salaries']

In [30]:
# extracting Average salary data
salary=driver.find_elements_by_xpath('//div[@class="col-12 col-lg-4 px-lg-0 d-flex align-items-baseline"]/h3')

average_salary=[]
for i in salary:
    average_salary.append(i.text)
average_salary = average_salary[:10]
average_salary

['₹6,12,205',
 '₹9,00,000',
 '₹11,63,336',
 '₹12,18,244',
 '₹7,39,238',
 '₹12,80,000',
 '₹8,63,750',
 '₹11,10,000',
 '₹14,23,677',
 '₹13,28,697']

In [37]:
# extracting minimum and maximum salary data
salary=driver.find_elements_by_xpath('//div[@class="d-flex mt-xxsm css-79elbk epuxyqn0"]')

min_max_salary=[]
for i in salary:
    min_max_salary.append(i.text.replace("\n","--"))
min_max_salary = min_max_salary[:10]
min_max_salary

['₹3L--₹13L',
 '₹6L--₹27L',
 '₹6L--₹22L',
 '₹5L--₹1Cr',
 '₹4L--₹16L',
 '₹8L--₹15L',
 '₹5L--₹15L',
 '₹6L--₹15L',
 '₹8L--₹20L',
 '₹4L--₹22L']

In [60]:
# extracting minimum salary data
minsalary=driver.find_elements_by_xpath('//div[@class="d-flex mt-xxsm css-79elbk epuxyqn0"]/p[1]')

min_salary=[]
for i in minsalary:
    min_salary.append(i.text.replace("\n","--"))
min_salary = min_salary[:12]
min_salary

['₹3L',
 '₹6L',
 '₹6L',
 '₹5L',
 '₹4L',
 '₹8L',
 '₹5L',
 '₹6L',
 '₹8L',
 '₹4L',
 '₹2L',
 '₹6L']

In [46]:
# extracting maximun salary data
maxsalary=driver.find_elements_by_xpath('//div[@class="d-flex mt-xxsm css-79elbk epuxyqn0"]/p[2]')

max_salary=[]
for i in maxsalary:
    max_salary.append(i.text.replace("\n","--"))
max_salary = max_salary[:12]
max_salary

['₹13L',
 '₹27L',
 '₹22L',
 '₹1Cr',
 '₹16L',
 '₹15L',
 '₹15L',
 '₹15L',
 '₹20L',
 '₹22L',
 '₹18L',
 '₹17L']

In [49]:
# extracting rating of the company data 
company_rating=driver.find_elements_by_xpath('//span[@class="m-0 css-kyx745"]')

Rating=[]
for i in company_rating:
    Rating.append(i.text)
Rating = Rating[:10]
Rating

['3.9', '3.9', '4.1', '3.9', '4', '3.6', '4.2', '3.6', '3.7', '3.9']

In [61]:
# Creating DataFrame for scraped data
glass_door_jobs = pd.DataFrame({})
glass_door_jobs["Company Name"] = company_names[:10]
glass_door_jobs["No of Salaries"] = No_of_salaries[:10]
glass_door_jobs["Average Salary"] = average_salary[:10]
glass_door_jobs["Min_Max Salary"] = min_max_salary[:10]
glass_door_jobs["Min Salary"] = min_salary[:10]
glass_door_jobs["Max Salary"] = max_salary[:10]
glass_door_jobs["Ratings"] = Rating[:10]
glass_door_jobs

Unnamed: 0,Company Name,No of Salaries,Average Salary,Min_Max Salary,Min Salary,Max Salary,Ratings
0,Tata Consultancy Services,18 salaries,"₹6,12,205",₹3L--₹13L,₹3L,₹13L,3.9
1,IBM,18 salaries,"₹9,00,000",₹6L--₹27L,₹6L,₹27L,3.9
2,Accenture,15 salaries,"₹11,63,336",₹6L--₹22L,₹6L,₹22L,4.1
3,Delhivery,15 salaries,"₹12,18,244",₹5L--₹1Cr,₹5L,₹1Cr,3.9
4,Ericsson-Worldwide,14 salaries,"₹7,39,238",₹4L--₹16L,₹4L,₹16L,4.0
5,UnitedHealth Group,14 salaries,"₹12,80,000",₹8L--₹15L,₹8L,₹15L,3.6
6,Valiance Solutions,10 salaries,"₹8,63,750",₹5L--₹15L,₹5L,₹15L,4.2
7,EXL Service,9 salaries,"₹11,10,000",₹6L--₹15L,₹6L,₹15L,3.6
8,Optum,9 salaries,"₹14,23,677",₹8L--₹20L,₹8L,₹20L,3.7
9,Optum Global Solutions,9 salaries,"₹13,28,697",₹4L--₹22L,₹4L,₹22L,3.9


### Q6 : Scrape data of first 100 sunglasses listings on flipkart.com. You have to scrape four attributes:

**1. Brand**

**2. Product Description**

**3. Price**

**4. Discount %**

In [126]:
# Connecting to the web driver
driver = webdriver.Chrome("chromedriver.exe")

In [129]:
# Getting the webpage of mentioned url
url = 'https://www.flipkart.com/'
driver.get(url)

In [131]:
#Searching web elemnt for job title
search_glasses = driver.find_element_by_xpath("//input[@class='_3704LK']")
search_glasses

<selenium.webdriver.remote.webelement.WebElement (session="a6335d882b841e6158802f677f54679d", element="3ec87d79-fa85-4897-848a-37b8eb65299a")>

In [132]:
#Enter “sunglasses” in the search field where “search for products, brands and more"
search_glasses.send_keys("sunglasses")

In [138]:
# Clicking on Search button
search_btn = driver.find_element_by_xpath("//button[@class='L0Z3Pu']")

search_btn.click()

In [141]:
# Fetching all the URLs 
url = driver.find_elements_by_xpath("//nav[@class='yFHi8N']/a")

urls = []  #empty list
for i in url:
    urls.append(i.get_attribute('href'))
urls

['https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=1',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=2',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=3',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=4',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=5',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=6',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=7',
 'https://www.flipkart.com/search?q=sunglasses&otracker=search&otracker1=search&marketplace=FLIPKART&as-

In [148]:
# Scraping data for sunglasses brand
glass_brand = []  #empty list
for i in urls:
    driver.get(i)
    Brand= driver.find_elements_by_xpath("//div[@class='_2WkVRV']")
    for i in Brand:
        glass_brand.append(i.text)
glass_brand = glass_brand[:100]
glass_brand

['Wrogn',
 'hipe',
 'Elligator',
 'Fastrack',
 'Fastrack',
 'Fastrack',
 'kingsunglasses',
 'Villain',
 'PIRASO',
 'Fastrack',
 'kingsunglasses',
 'HAMIW COLLECTION',
 'PIRASO',
 'Fastrack',
 'kingsunglasses',
 'ROYAL SON',
 'Fastrack',
 'ROZZETTA CRAFT',
 'Fastrack',
 'ROYAL SON',
 'Silver Kartz',
 'PHENOMENAL',
 'ROZZETTA CRAFT',
 'ROYAL SON',
 'kingsunglasses',
 'Elligator',
 'Singco India',
 'Wrogn',
 'Fastrack',
 'hipe',
 'Singco India',
 'ROYAL SON',
 'Elligator',
 'Fastrack',
 'Elligator',
 'ROYAL SON',
 'Fastrack',
 'Elligator',
 'NuVew',
 'ROYAL SON',
 'PHENOMENAL',
 'ROZZETTA CRAFT',
 'Fastrack',
 'Wrogn',
 'GANSTA',
 'PIRASO',
 'DEIXELS',
 'Villain',
 'Singco',
 'Fastrack',
 'HIPPON',
 'HAMIW COLLECTION',
 'NuVew',
 'Fastrack',
 'AISLIN',
 'hipe',
 'Fastrack',
 'Fastrack',
 'GANSTA',
 'ROYAL SON',
 'PIRASO',
 'hipe',
 'hipe',
 'ROYAL SON',
 'ROYAL SON',
 'Singco India',
 'DEIXELS',
 'ROYAL SON',
 'hipe',
 'Singco India',
 'GANSTA',
 'Wrogn',
 'kingsunglasses',
 'ROYAL SON',


In [149]:
len(glass_brand)

100

In [156]:
# Scraping data for product discription
product_desc = []  #empty list
for i in urls:
    driver.get(i)
    Product = driver.find_elements_by_xpath("//a[@class='IRpwTa']")
    for i in Product:
        product_desc.append(i.text)
product_desc = product_desc[:100]
product_desc

['Mirrored Wayfarer Sunglasses (51)',
 'UV Protection Round Sunglasses (Free Size)',
 'UV Protection Round Sunglasses (54)',
 'Gradient, UV Protection Wayfarer Sunglasses (Free Size)',
 'UV Protection Rectangular Sunglasses (Free Size)',
 'UV Protection Wayfarer Sunglasses (Free Size)',
 'Mirrored, UV Protection Wayfarer Sunglasses (Free Size)',
 'Mirrored Wayfarer Sunglasses (55)',
 'UV Protection Aviator Sunglasses (54)',
 'Mirrored, UV Protection Wayfarer Sunglasses (Free Size)',
 'UV Protection Aviator Sunglasses (58)',
 'Polarized, UV Protection Round Sunglasses (49)',
 'UV Protection Aviator Sunglasses (54)',
 'UV Protection Aviator Sunglasses (Free Size)',
 'UV Protection Rectangular Sunglasses (Free Size)',
 'UV Protection Aviator Sunglasses (60)',
 'UV Protection Wayfarer Sunglasses (56)',
 'UV Protection Retro Square Sunglasses (Free Size)',
 'UV Protection Round Sunglasses (54)',
 'UV Protection Rectangular Sunglasses (58)',
 'UV Protection Wayfarer Sunglasses (Free Size)',


In [158]:
len(product_desc)

100

In [160]:
# Scraping data for product price
product_price = []  #empty list
for i in urls:
    driver.get(i)
    Price = driver.find_elements_by_xpath("//div[@class='_30jeq3']")
    for i in Price:
        product_price.append(i.text)
product_price = product_price[:100]
product_price

['₹739',
 '₹549',
 '₹295',
 '₹455',
 '₹509',
 '₹669',
 '₹299',
 '₹219',
 '₹225',
 '₹299',
 '₹969',
 '₹599',
 '₹225',
 '₹539',
 '₹210',
 '₹179',
 '₹599',
 '₹499',
 '₹225',
 '₹664',
 '₹599',
 '₹246',
 '₹969',
 '₹1,009',
 '₹379',
 '₹404',
 '₹331',
 '₹474',
 '₹599',
 '₹214',
 '₹210',
 '₹699',
 '₹334',
 '₹426',
 '₹349',
 '₹219',
 '₹398',
 '₹949',
 '₹379',
 '₹664',
 '₹379',
 '₹609',
 '₹474',
 '₹739',
 '₹295',
 '₹257',
 '₹1,479',
 '₹179',
 '₹239',
 '₹225',
 '₹224',
 '₹219',
 '₹629',
 '₹609',
 '₹969',
 '₹664',
 '₹516',
 '₹239',
 '₹309',
 '₹664',
 '₹319',
 '₹299',
 '₹211',
 '₹699',
 '₹331',
 '₹202',
 '₹474',
 '₹1,009',
 '₹209',
 '₹217',
 '₹269',
 '₹426',
 '₹249',
 '₹599',
 '₹225',
 '₹664',
 '₹314',
 '₹225',
 '₹669',
 '₹219',
 '₹189',
 '₹430',
 '₹499',
 '₹739',
 '₹255',
 '₹487',
 '₹639',
 '₹179',
 '₹282',
 '₹293',
 '₹209',
 '₹219',
 '₹295',
 '₹599',
 '₹399',
 '₹599',
 '₹349',
 '₹426',
 '₹559',
 '₹664']

In [161]:
len(product_price)

100

In [166]:
# Scraping data for product Discount %
Product_Discount= []  #empty list
for i in urls:
    driver.get(i)
    Discount = driver.find_elements_by_xpath("//div[@class='_3Ay6Sb']/span")
    for i in Discount:
        Product_Discount.append(i.text)
Product_Discount =Product_Discount[:100]
Product_Discount

['71% off',
 '88% off',
 '88% off',
 '43% off',
 '36% off',
 '25% off',
 '88% off',
 '45% off',
 '85% off',
 '88% off',
 '25% off',
 '78% off',
 '85% off',
 '32% off',
 '82% off',
 '66% off',
 '33% off',
 '77% off',
 '79% off',
 '66% off',
 '33% off',
 '83% off',
 '25% off',
 '68% off',
 '81% off',
 '79% off',
 '74% off',
 '68% off',
 '25% off',
 '69% off',
 '85% off',
 '53% off',
 '72% off',
 '80% off',
 '76% off',
 '77% off',
 '73% off',
 '26% off',
 '81% off',
 '78% off',
 '81% off',
 '32% off',
 '78% off',
 '71% off',
 '85% off',
 '78% off',
 '26% off',
 '88% off',
 '76% off',
 '85% off',
 '62% off',
 '78% off',
 '30% off',
 '32% off',
 '25% off',
 '66% off',
 '66% off',
 '84% off',
 '66% off',
 '53% off',
 '83% off',
 '76% off',
 '78% off',
 '68% off',
 '83% off',
 '83% off',
 '68% off',
 '66% off',
 '88% off',
 '78% off',
 '82% off',
 '78% off',
 '79% off',
 '33% off',
 '85% off',
 '55% off',
 '80% off',
 '85% off',
 '25% off',
 '78% off',
 '81% off',
 '82% off',
 '66% off',
 '71

In [168]:
len(Product_Discount)

100

In [169]:
# Checking length of scraped data
print(len(glass_brand),len(product_desc),len(product_price),len(Product_Discount))

100 100 100 100


In [170]:
# Creating DataFrame for the scraped data
sunglasses = pd.DataFrame({})
sunglasses["Brand"] = glass_brand
sunglasses["Product Description"] = product_desc
sunglasses["Price"] = product_price
sunglasses["Discount%"] = Product_Discount
sunglasses

Unnamed: 0,Brand,Product Description,Price,Discount%
0,Wrogn,Mirrored Wayfarer Sunglasses (51),₹739,71% off
1,hipe,UV Protection Round Sunglasses (Free Size),₹549,88% off
2,Elligator,UV Protection Round Sunglasses (54),₹295,88% off
3,Fastrack,"Gradient, UV Protection Wayfarer Sunglasses (F...",₹455,43% off
4,Fastrack,UV Protection Rectangular Sunglasses (Free Size),₹509,36% off
...,...,...,...,...
95,Villain,Others Retro Square Sunglasses (Free Size),₹599,66% off
96,Flizz,"UV Protection, Riding Glasses Rectangular, Ret...",₹349,76% off
97,ROYAL SON,"Gradient, UV Protection Round Sunglasses (Free...",₹426,78% off
98,ROZZETTA CRAFT,UV Protection Wayfarer Sunglasses (Free Size),₹559,30% off


### Q7: Scrape 100 reviews data from flipkart.com for iphone11 phone. You have to go the 
link: https://www.flipkart.com/apple-iphone-11-black-64-gb-includesearpods-poweradapter/p/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace

**1. Rating** 

**2. Review_summary** 

**3. Full review**

**You have to scrape this data for first 100 reviews**

In [None]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [179]:
# Getting the webpage of mentioned url
url = ' https://www.flipkart.com/apple-iphone-11-black-64-gb-includesearpods-poweradapter/p/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace'
driver.get(url)

In [180]:
#Searching web elemnt for reviews
search_reviews = driver.find_element_by_xpath("//div[@class='_3UAT2v _16PBlm']")
search_reviews

<selenium.webdriver.remote.webelement.WebElement (session="497c58cabe896bd59367ec735137ede7", element="64272a4b-2b04-415b-9772-5959f0a6ae74")>

In [181]:
# Cliking on view all
search_all = driver.find_element_by_xpath("//div[@class='_3UAT2v _16PBlm']/span")

search_all.click()

In [182]:
# Fetching all the URLs 
url = driver.find_elements_by_xpath("//nav[@class='yFHi8N']/a")

urls = []  #empty list
for i in url:
    urls.append(i.get_attribute('href'))
urls

['https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-power-adapter/product-reviews/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace=FLIPKART&page=1',
 'https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-power-adapter/product-reviews/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace=FLIPKART&page=2',
 'https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-power-adapter/product-reviews/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace=FLIPKART&page=3',
 'https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-power-adapter/product-reviews/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace=FLIPKART&page=4',
 'https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-power-adapter/product-reviews/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace=FLIPKART&page=5',


In [183]:
# Scraping data for iphone ratings
iphone_ratings= []  #empty list
for i in urls:
    driver.get(i)
    Ratings = driver.find_elements_by_xpath("//div[@class='_3LWZlK _1BLPMq']")
    for i in Ratings:
        iphone_ratings.append(i.text)
iphone_ratings =iphone_ratings[:100]
iphone_ratings

['5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '4',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '4',
 '5',
 '3',
 '5',
 '5',
 '5',
 '4',
 '5',
 '5',
 '5',
 '5',
 '3',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5',
 '5']

In [186]:
len(iphone_ratings)

100

In [184]:
# Scraping data for iphone review summary
review_sum= []  #empty list
for i in urls:
    driver.get(i)
    rev_sum = driver.find_elements_by_xpath("//p[@class='_2-N8zT']")
    for i in rev_sum:
        review_sum.append(i.text)
review_sum =review_sum[:100]
review_sum

['Simply awesome',
 'Brilliant',
 'Fabulous!',
 'Perfect product!',
 'Worth every penny',
 'Great product',
 'Worth every penny',
 'Good choice',
 'Highly recommended',
 'Perfect product!',
 'Perfect product!',
 'Best in the market!',
 'Highly recommended',
 'Classy product',
 'Highly recommended',
 'Worth every penny',
 'Perfect product!',
 'Simply awesome',
 'Worth every penny',
 'Terrific',
 'Nice product',
 'Wonderful',
 'Classy product',
 'Brilliant',
 'Must buy!',
 'Good choice',
 'Perfect product!',
 'Wonderful',
 'Terrific purchase',
 'Great product',
 'Simply awesome',
 'Mind-blowing purchase',
 'Terrific purchase',
 'Simply awesome',
 'Good quality product',
 'Very poor',
 'Perfect product!',
 'Must buy!',
 'Fabulous!',
 'Mind-blowing purchase',
 'Mind-blowing purchase',
 'Wonderful',
 'Worth every penny',
 'Great product',
 'Very Good',
 'Perfect product!',
 'Classy product',
 'Perfect product!',
 'Just wow!',
 'Awesome',
 'Terrific',
 'Terrific purchase',
 'Awesome',
 'Clas

In [187]:
len(review_sum)

100

In [185]:
# Scraping data for iphone full review summary
full_review_sum= []  #empty list
for i in urls:
    driver.get(i)
    Review = driver.find_elements_by_xpath("//div[@class='t-ZTKy']")
    for i in Review:
        full_review_sum.append(i.text)
full_review_sum = full_review_sum[:100]
full_review_sum

['Really satisfied with the Product I received... It’s totally genuine and the packaging was also really good so if ur planning to buy just go for it.',
 'The Best Phone for the Money\n\nThe iPhone 11 offers superb cameras, a more durable design and excellent battery life for an affordable price.\n\nCompelling ultra-wide camera\nNew Night mode is excellent\nLong battery life',
 'This is my first iOS phone. I am very happy with this product. Very much satisfied with this. I love this phone.',
 'Amazing phone with great cameras and better battery which gives you the best performance. I just love the camera .',
 'Previously I was using one plus 3t it was a great phone\nAnd then I decided to upgrade I am stuck between Samsung s10 plus or iPhone 11\nI have seen the specs and everything were good except the display it’s somewhere between 720-1080 and it’s not even an amoled it’s an LCD display\nBut I decided to go with iPhone because I have never used an IOS device I have Been an android use

In [188]:
len(full_review_sum)

100

In [189]:
# Creating DataFrame for scraped data
iphone_review = pd.DataFrame({})
iphone_review["Ratings"] = iphone_ratings
iphone_review["Review Summary"] = review_sum
iphone_review["Full Review"] = full_review_sum
iphone_review

Unnamed: 0,Ratings,Review Summary,Full Review
0,5,Simply awesome,Really satisfied with the Product I received.....
1,5,Brilliant,The Best Phone for the Money\n\nThe iPhone 11 ...
2,5,Fabulous!,This is my first iOS phone. I am very happy wi...
3,5,Perfect product!,Amazing phone with great cameras and better ba...
4,5,Worth every penny,Previously I was using one plus 3t it was a gr...
...,...,...,...
95,5,Super!,This is my first ever iPhone.\nAnd I truly don...
96,5,Fabulous!,Everything is perfect pictures come out so cle...
97,5,Just wow!,The ultimate performance\nCamera is superb\nTh...
98,5,Mind-blowing purchase,Excellent camera 📸 And Display touching very N...


### Q8: Scrape data for first 100 sneakers you find when you visit flipkart.com and search for “sneakers” in the search field.

**You have to scrape 4 attributes of each sneaker :**
    
**1. Brand**

**2. Product Description**

**3. Price**

**4. discount %**

In [194]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [195]:
# Getting the webpage of mentioned url
url = 'https://www.flipkart.com/'
driver.get(url)

In [197]:
#Searching web elemnet for first 100 sneakers
search_sneakers = driver.find_element_by_xpath("//input[@class='_3704LK']")
search_sneakers

<selenium.webdriver.remote.webelement.WebElement (session="f8c62ab64cec0c682c40333b394cc6cc", element="b919d401-2588-4cd4-af72-d251a7474d2f")>

In [198]:
# Entering sneakers in the search field
search_sneakers.send_keys('sneakers')

In [201]:
# Clicking on search button
search_btn = driver.find_element_by_xpath("//button[@class='L0Z3Pu']")
search_btn.click()

In [202]:
#Fetching all the URLs
url = driver.find_elements_by_xpath("//nav[@class='yFHi8N']/a")

urls = []  #empty list
for i in url:
    urls.append(i.get_attribute('href'))
urls

['https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=1',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=2',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=3',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=4',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=5',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=6',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=7',
 'https://www.flipkart.com/search?q=sneakers&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&p

In [203]:
# Scraping data for sneakers brand
sneakers_brand= []  #empty list
for i in urls:
    driver.get(i)
    Brand = driver.find_elements_by_xpath("//div[@class='_2WkVRV']")
    for i in Brand:
        sneakers_brand.append(i.text)
sneakers_brand =sneakers_brand[:100]
sneakers_brand

['DUCATI',
 'DUCATI',
 'Numenzo',
 'Chevit',
 'bluemaker',
 'Magnolia',
 'Chevit',
 'Labbin',
 'World Wear Footwear',
 'Red Rose',
 'Numenzo',
 'DUCATI',
 'Chevit',
 'ASTEROID',
 'BRUTON',
 'Strollin',
 'Shoes Bank',
 'India hub',
 'Chevit',
 'aadi',
 'PEHANOSA',
 'Robbie jones',
 'SCATCHITE',
 'DUCATI',
 'BRUTON',
 'Chevit',
 'believe',
 'ROCKFIELD',
 'luxury fashion',
 'RODDICK SHOES',
 'Nilatin',
 'DUCATI',
 'Creer',
 'ROCKFIELD',
 'World Wear Footwear',
 'RODDICK SHOES',
 'Nilatin',
 'Kraasa',
 'SPARX',
 'Robbie jones',
 'D-SNEAKERZ',
 'Arohi',
 'CALCADOS',
 'DUCATI',
 'T-ROCK',
 'SPARX',
 'India hub',
 'DUCATI',
 'BIRDE',
 'bluemaker',
 'World Wear Footwear',
 'RODDICK SHOES',
 'Ktiz',
 'PUMA',
 'PUMA',
 'Labbin',
 'Zsyto',
 'World Wear Footwear',
 'restinfoot',
 'DUCATI',
 'D-SNEAKERZ',
 'Edoeviv',
 'Zixer',
 'Strollin',
 'tigonis',
 'Xylus',
 'Nilatin',
 'aadi',
 'Chevit',
 'Magnolia',
 'Fzzirok',
 'BRUTON',
 'D-SNEAKERZ',
 'SPARX',
 'Robbie jones',
 'DUCATI',
 'Jokatoo',
 'Jack

In [204]:
len(sneakers_brand)

100

In [208]:
# Scraping data for sneakers product description
sneakers_desc= []  #empty list
for i in urls:
    driver.get(i)
    description = driver.find_elements_by_xpath("//a[@class='IRpwTa']")
    for i in description:
        sneakers_desc.append(i.text)
sneakers_desc =sneakers_desc[:100]
sneakers_desc

['Sneakers For Men',
 'Unique & Perfect Collection Combo Pack of 02 Shoes for ...',
 'casual for men (blue 06) Sneakers For Men',
 'Sneakers For Men',
 'Super Stylish & Trendy Combo Pack of 02 Pairs Sneakers ...',
 '5011-Latest Collection Stylish Casual Loafer Sneakers S...',
 'Sneakers For Men',
 'Sneakers For Men',
 '494 Perfect Sports Shoes for Running Training Hikking &...',
 "Original Luxury Branded Fashionable Men's Casual Walkin...",
 'Combo Pack Of 4 Casual Shoes Loafer Shoes Sneakers For ...',
 "White Sneaker For Men's/Boy's Sneakers For Men",
 'Fashionable casual sneakers shoes Sneakers For Men',
 'Speed Set of 5 Pairs Sneakers Outdoors Casuals for Men ...',
 'Sneakers For Men',
 'Sneakers For Men',
 'Sneakers Sneakers For Men',
 'Modern & Trendy Collection Combo Pack of 02 Shoes for M...',
 'Perfect & Affordable Combo Pack of 03 Pairs Casual Snea...',
 'Luxury Fashionable casual sneaker shoes Sneakers For Me...',
 'Sneakers for men(black_6) Sneakers For Men',
 'Fashion Outdo

In [209]:
len(sneakers_desc)

100

In [210]:
# Scraping data for sneakers price
sneakers_price= []  #empty list
for i in urls:
    driver.get(i)
    price = driver.find_elements_by_xpath("//div[@class='_30jeq3']")
    for i in price:
        sneakers_price.append(i.text)
sneakers_price =sneakers_price[:100]
sneakers_price

['₹1,289',
 '₹399',
 '₹378',
 '₹420',
 '₹399',
 '₹356',
 '₹536',
 '₹1,259',
 '₹240',
 '₹299',
 '₹426',
 '₹449',
 '₹254',
 '₹474',
 '₹399',
 '₹1,259',
 '₹331',
 '₹449',
 '₹679',
 '₹399',
 '₹424',
 '₹474',
 '₹398',
 '₹299',
 '₹499',
 '₹594',
 '₹399',
 '₹449',
 '₹399',
 '₹474',
 '₹407',
 '₹1,209',
 '₹398',
 '₹299',
 '₹240',
 '₹399',
 '₹416',
 '₹426',
 '₹635',
 '₹1,259',
 '₹348',
 '₹464',
 '₹748',
 '₹1,289',
 '₹378',
 '₹625',
 '₹449',
 '₹1,259',
 '₹624',
 '₹399',
 '₹429',
 '₹474',
 '₹240',
 '₹2,730',
 '₹1,350',
 '₹449',
 '₹555',
 '₹378',
 '₹341',
 '₹1,259',
 '₹240',
 '₹346',
 '₹474',
 '₹399',
 '₹359',
 '₹599',
 '₹407',
 '₹399',
 '₹348',
 '₹311',
 '₹360',
 '₹299',
 '₹384',
 '₹379',
 '₹506',
 '₹399',
 '₹806',
 '₹635',
 '₹378',
 '₹1,209',
 '₹449',
 '₹378',
 '₹379',
 '₹1,289',
 '₹279',
 '₹474',
 '₹635',
 '₹1,259',
 '₹474',
 '₹730',
 '₹951',
 '₹474',
 '₹4,999',
 '₹424',
 '₹379',
 '₹1,259',
 '₹378',
 '₹449',
 '₹449',
 '₹399']

In [211]:
len(sneakers_price)

100

In [212]:
# Scraping data for sneakers discount %
sneakers_discount= []  #empty list
for i in urls:
    driver.get(i)
    discount = driver.find_elements_by_xpath("//div[@class='_3Ay6Sb']/span")
    for i in discount:
        sneakers_discount.append(i.text)
sneakers_discount =sneakers_discount[:100]
sneakers_discount

['65% off',
 '65% off',
 '62% off',
 '71% off',
 '60% off',
 '64% off',
 '66% off',
 '55% off',
 '51% off',
 '70% off',
 '78% off',
 '65% off',
 '74% off',
 '76% off',
 '88% off',
 '60% off',
 '66% off',
 '85% off',
 '72% off',
 '85% off',
 '57% off',
 '52% off',
 '60% off',
 '68% off',
 '80% off',
 '77% off',
 '69% off',
 '60% off',
 '60% off',
 '52% off',
 '68% off',
 '65% off',
 '60% off',
 '70% off',
 '51% off',
 '52% off',
 '58% off',
 '67% off',
 '9% off',
 '62% off',
 '47% off',
 '53% off',
 '62% off',
 '65% off',
 '62% off',
 '16% off',
 '85% off',
 '65% off',
 '68% off',
 '60% off',
 '66% off',
 '52% off',
 '51% off',
 '57% off',
 '59% off',
 '55% off',
 '44% off',
 '70% off',
 '65% off',
 '65% off',
 '51% off',
 '65% off',
 '40% off',
 '60% off',
 '80% off',
 '62% off',
 '68% off',
 '85% off',
 '30% off',
 '68% off',
 '63% off',
 '88% off',
 '61% off',
 '62% off',
 '66% off',
 '68% off',
 '15% off',
 '36% off',
 '62% off',
 '65% off',
 '55% off',
 '70% off',
 '87% off',
 '65%

In [213]:
len(sneakers_discount)

100

In [215]:
# Creating DataFrame for the scraped data
sneakers = pd.DataFrame({})
sneakers["Brand"] = sneakers_brand
sneakers["Product Description"] = sneakers_desc
sneakers["Price"] = sneakers_price
sneakers["Discount %"] = sneakers_discount
sneakers

Unnamed: 0,Brand,Product Description,Price,Discount %
0,DUCATI,Sneakers For Men,"₹1,289",65% off
1,DUCATI,Unique & Perfect Collection Combo Pack of 02 S...,₹399,65% off
2,Numenzo,casual for men (blue 06) Sneakers For Men,₹378,62% off
3,Chevit,Sneakers For Men,₹420,71% off
4,bluemaker,Super Stylish & Trendy Combo Pack of 02 Pairs ...,₹399,60% off
...,...,...,...,...
95,DUCATI,Walking Shoes For Men (Black) Sneakers For Men,"₹1,259",62% off
96,SPARX,Puma Rebound LayUp SL Sneakers For Men,₹378,55% off
97,Creer,Sneakers For Men,₹449,55% off
98,Labbin,Sneakers For Men,₹449,65% off


### Q9: Go to the link - https://www.myntra.com/shoes. Set Price filter to “Rs. 6649 to Rs. 13099” , Color filter to “Black”, And then scrape First 100 shoes data you get. The data should include “Brand” of the shoes , Short Shoe description, price of the shoe


In [19]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [20]:
# Getting the webpage of mentioned url
url = "https://www.myntra.com/shoes"
driver.get(url)

In [21]:
# Searching for filter element
search_filter = driver.find_element_by_xpath("//span[@class='header-title']")
search_filter.click()

In [222]:
# Setting Price filter to Rs.6649 to Rs.13099
price_filter = driver.find_element_by_xpath("/html/body/div[2]/div/div[1]/main/div[3]/div[1]/section/div/div[5]/ul/li[2]/label")
price_filter.click()

In [223]:
# Setting Color filter to “Black”
color_filter = driver.find_element_by_xpath("/html/body/div[2]/div/div[1]/main/div[3]/div[1]/section/div/div[6]/ul/li[1]/label")
color_filter.click()

In [233]:
#Fetching all the URLs
url = driver.find_elements_by_xpath("//li[@class='pagination-active'or @class='pagination-number']/a")

urls = []  #empty list
for i in url:
    urls.append(i.get_attribute('href'))
urls

['https://www.myntra.com/shoes?f=Color%3ABlack_36454f&plaEnabled=false&rf=Price%3A5342.0_10495.0_5342.0%20TO%2010495.0%2C5387.0_10525.0_5387.0%20TO%2010525.0',
 'https://www.myntra.com/shoes?f=Color%3ABlack_36454f&plaEnabled=false&rf=Price%3A5342.0_10495.0_5342.0%20TO%2010495.0%2C5387.0_10525.0_5387.0%20TO%2010525.0&p=2',
 'https://www.myntra.com/shoes?f=Color%3ABlack_36454f&plaEnabled=false&rf=Price%3A5342.0_10495.0_5342.0%20TO%2010495.0%2C5387.0_10525.0_5387.0%20TO%2010525.0&p=3',
 'https://www.myntra.com/shoes?f=Color%3ABlack_36454f&plaEnabled=false&rf=Price%3A5342.0_10495.0_5342.0%20TO%2010495.0%2C5387.0_10525.0_5387.0%20TO%2010525.0&p=4',
 'https://www.myntra.com/shoes?f=Color%3ABlack_36454f&plaEnabled=false&rf=Price%3A5342.0_10495.0_5342.0%20TO%2010495.0%2C5387.0_10525.0_5387.0%20TO%2010525.0&p=5',
 'https://www.myntra.com/shoes?f=Color%3ABlack_36454f&plaEnabled=false&rf=Price%3A5342.0_10495.0_5342.0%20TO%2010495.0%2C5387.0_10525.0_5387.0%20TO%2010525.0&p=6',
 'https://www.myntra

In [234]:
# Scraping data for first 100 shoes

# Scraping data for brand of the shoes
shoes_brand= []  #empty list
for i in urls:
    driver.get(i)
    brand = driver.find_elements_by_xpath("//h3[@class='product-brand']")
    for i in brand:
        shoes_brand.append(i.text)
shoes_brand =shoes_brand[:100]
shoes_brand

['Nike',
 'UNDER ARMOUR',
 'UNDER ARMOUR',
 'Puma',
 'UNDER ARMOUR',
 'Hush Puppies',
 'Nike',
 'ADIDAS',
 'ADIDAS',
 'UNDER ARMOUR',
 'Puma',
 'UNDER ARMOUR',
 'UNDER ARMOUR',
 'UNDER ARMOUR',
 'UNDER ARMOUR',
 'UNDER ARMOUR',
 'UNDER ARMOUR',
 'Puma',
 'UNDER ARMOUR',
 'Hush Puppies',
 'Skechers',
 'Puma',
 'Hush Puppies',
 'Geox',
 'Hush Puppies',
 'FORCLAZ By Decathlon',
 'Quechua By Decathlon',
 'PUMA Hoops',
 'Puma',
 'Geox',
 'Puma',
 'UNDER ARMOUR',
 'Bugatti',
 'Hush Puppies',
 'UNDER ARMOUR',
 'Quechua By Decathlon',
 'Hush Puppies',
 'Hush Puppies',
 'Geox',
 'Puma',
 'Nike',
 'PUMA Hoops',
 'Puma',
 'Puma',
 'Nike',
 'Puma',
 'Geox',
 'UNDER ARMOUR',
 'Hush Puppies',
 'Xtep',
 'Hush Puppies',
 'DAVINCHI',
 'Xtep',
 'Lacoste',
 'Jack & Jones',
 'Hush Puppies',
 'RARE RABBIT',
 'UNDER ARMOUR',
 'Ruosh',
 'FORCLAZ By Decathlon',
 'TARMAK By Decathlon',
 'PUMA Motorsport',
 'Ruosh',
 'Puma',
 'UNDER ARMOUR',
 'RARE RABBIT',
 'one8 Select BY VIRAT KOHLI',
 'Geox',
 'Geox',
 'Kal

In [235]:
len(shoes_brand)

100

In [236]:
# Scraping data for short description of shoes
shoes_desc= []  #empty list
for i in urls:
    driver.get(i)
    description = driver.find_elements_by_xpath("//h4[@class='product-product']")
    for i in description:
        shoes_desc.append(i.text)
shoes_desc =shoes_desc[:100]
shoes_desc

['Men ZOOM SPAN 3 Running Shoes',
 'Men Charged Bandit 6 Running',
 'Men HOVR Sonic STRT Tech Shoes',
 'Axelion Block Running Shoes',
 'Men Charged Vantage Running',
 'Men Solid Leather Formal Oxfords',
 'Men Printed Slip-On Sneakers',
 'Men Crater Remixa Sneakers',
 'Women Response SR Running',
 'Women SUPERNOVA Running Shoes',
 'Unisex Project Rock Recruit',
 'Men Cell Fraction Fade Running',
 'Men Charged Pursuit 2 SE Shoes',
 'Men Perforated Slip On Sneaker',
 'Men Charged Assert 8 Marble',
 'Men Charged Pursuit 2 SE',
 'Women Charged Vantage ClrShft',
 'Men Equalizer 4.0 Trail',
 'Men Charged Commit 3 Training',
 'GS SC 3Zero IV Basketball',
 'Women Deviate Nitro Running',
 'Men Charged Rogue 2.5 RFLCT',
 'Men Formal Derbys',
 'Men SOFTRIDE Rift Shoes',
 'Men Leather Slip-On Sneakers',
 'Men Solid Leather Formal Slip-Ons',
 'TREKKING 100 Boots',
 'Men Textured Leather Loafers',
 'Unisex Clyde Basketball Shoes',
 'Women Waterproof Hiking Shoes',
 'Men Leader VT Running Shoes',
 'Wo

In [237]:
len(shoes_desc)

100

In [241]:
# Scraping data for price of the shoes
shoes_price= []  #empty list
for i in urls:
    driver.get(i)
    price = driver.find_elements_by_xpath("//div[@class='product-price']/span[1]")
    for i in price:
        shoes_price.append(i.text)
shoes_price =shoes_price[:100]
shoes_price

['Rs. 5396Rs. 7195',
 'Rs. 8999',
 'Rs. 9999',
 'Rs. 5999',
 'Rs. 7999',
 'Rs. 5399Rs. 5999',
 'Rs. 9899Rs. 10999',
 'Rs. 5495',
 'Rs. 6079Rs. 7599',
 'Rs. 6999Rs. 9999',
 'Rs. 9999',
 'Rs. 6999',
 'Rs. 6999',
 'Rs. 8099Rs. 8999',
 'Rs. 6999',
 'Rs. 6999',
 'Rs. 7999',
 'Rs. 6499',
 'Rs. 7999',
 'Rs. 7499',
 'Rs. 9749Rs. 14999',
 'Rs. 7999',
 'Rs. 8999Rs. 9999',
 'Rs. 6499',
 'Rs. 6299Rs. 6999',
 'Rs. 6299Rs. 8999',
 'Rs. 6999',
 'Rs. 6990',
 'Rs. 8999',
 'Rs. 5499',
 'Rs. 5999',
 'Rs. 5499',
 'Rs. 6999',
 'Rs. 9999',
 'Rs. 5999',
 'Rs. 6299Rs. 8999',
 'Rs. 5599Rs. 6999',
 'Rs. 6999Rs. 9999',
 'Rs. 6299Rs. 8999',
 'Rs. 9999',
 'Rs. 5399Rs. 5999',
 'Rs. 6299Rs. 8999',
 'Rs. 5995',
 'Rs. 8449Rs. 12999',
 'Rs. 7149Rs. 10999',
 'Rs. 7195',
 'Rs. 6999',
 'Rs. 8121Rs. 12495',
 'Rs. 6499',
 'Rs. 8099Rs. 8999',
 'Rs. 6499',
 'Rs. 7999',
 'Rs. 8099Rs. 8999',
 'Rs. 5399Rs. 5999',
 'Rs. 5699',
 'Rs. 5990',
 'Rs. 5399Rs. 5999',
 'Rs. 5950Rs. 8500',
 'Rs. 5999',
 'Rs. 7499',
 'Rs. 8099Rs. 8999',
 '

In [243]:
len(shoes_price)

100

In [244]:
# Creating DataFrame for the scraped data
shoes = pd.DataFrame({})
shoes["Brand"] = shoes_brand
shoes["Short Description"] = shoes_desc
shoes["Price"] = shoes_price
shoes

Unnamed: 0,Brand,Short Description,Price
0,Nike,Men ZOOM SPAN 3 Running Shoes,Rs. 5396Rs. 7195
1,UNDER ARMOUR,Men Charged Bandit 6 Running,Rs. 8999
2,UNDER ARMOUR,Men HOVR Sonic STRT Tech Shoes,Rs. 9999
3,Puma,Axelion Block Running Shoes,Rs. 5999
4,UNDER ARMOUR,Men Charged Vantage Running,Rs. 7999
...,...,...,...
95,Clarks,Basketweave Block Heels,Rs. 8999
96,Quechua By Decathlon,Men Textured Leather Formal Derbys,Rs. 5803Rs. 8290
97,UNDER ARMOUR,Women Running Shoes,Rs. 6299Rs. 6999
98,UNDER ARMOUR,Men Solid Leather Formal Derbys,Rs. 5599Rs. 7999


### Q10: Go to webpage https://www.amazon.in/ 
**Scrape first 10 laptops data. You have to scrape**

**1. title**

**2. Ratings**

**3. Price**

In [82]:
# Connecting to the webdriver
driver = webdriver.Chrome('chromedriver.exe')

In [83]:
# Getting the webpage of mentioned url
url = " https://www.amazon.in/"
driver.get(url)

In [84]:
# Searching webelement for laptops
search_laptops = driver.find_element_by_xpath("/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[2]/div[1]/input")
search_laptops

<selenium.webdriver.remote.webelement.WebElement (session="c7e99ce30280e93e321a7d549fa84876", element="027c91a4-6333-4cf1-a27c-2970b589f2a1")>

In [85]:
# Entering “Laptop” in the search field
search_laptops.send_keys("Laptop")

In [86]:
# Clicking on the search button
search_btn = driver.find_element_by_xpath("/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[3]/div/span/input")
search_btn.click()

In [87]:
# Setting CPU Type filter to “Intel Core i7” and “Intel Core i9” 
cpu_filter = driver.find_element_by_xpath("//span[@class='a-size-base a-color-base a-text-bold']")
cpu_filter

<selenium.webdriver.remote.webelement.WebElement (session="c7e99ce30280e93e321a7d549fa84876", element="c02aecd3-45f1-4642-bae2-969df0935193")>

In [89]:
# Setting CPU Type filter to "Intel Core i7"
intel_i7 = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div[2]/div/div[3]/span/div[1]/span/div/div/div[6]/ul[4]/li[26]/span/a/span")
intel_i7.click()

In [90]:
# Setting CPU Type filter to "Intel Core i9"
intel_i9 = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div[2]/div/div[3]/span/div[1]/span/div/div/div[6]/ul[4]/li[28]/span/a/span")
intel_i9.click()

In [91]:
# Scraping data for laptop titles
titles = driver.find_elements_by_xpath("//h2[@class='a-size-mini a-spacing-none a-color-base s-line-clamp-2']")

laptop_titles = []  #empty list
for i in titles:
        laptop_titles.append(i.text)
laptop_titles =laptop_titles[:10]
laptop_titles

['MSI GF65 Thin, Intel i7-10750H, 15.6" FHD (39.6 cm) IPS-Level 144Hz Panel Laptop (16GB/512GB NVMe SSD/Windows 10 Home/Nvidia GTX1660 Ti 6GB GDDR6/Black/1.86Kg), 10SDR-1280IN',
 'ASUS TUF Dash F15 (2021), 15.6" (39.62 cms) FHD 144Hz, Intel Core i7-11370H 11th Gen, RTX 3050 Ti 4GB Graphics, Gaming Laptop (16GB/1TB SSD/Office 2019/Windows 10/Eclipse Gray/2 kg), FX516PE-HN088TS',
 'Mi Notebook Horizon Edition 14 Intel Core i7-10510U 10th Gen 14-inch (35.56 cms) Thin and Light Laptop(8GB/512GB SSD/Windows 10/Nvidia MX350 2GB Graphics/Grey/1.35Kg), XMA1904-AF+Webcam',
 'HP Pavilion (2021) Thin & Light 11th Gen Core i7 Laptop, 16 GB RAM, 1TB SSD, Iris Xe Graphics, 14" (35.56cms) FHD Screen, Windows 10, MS Office, Backlit Keyboard (14-dv0058TU)',
 'MSI GF65 Thin, Intel i7-10750H, 15.6" FHD (39.6 cm) IPS-Level 144Hz Panel Laptop (16GB/512GB NVMe SSD/Windows 10 Home/Nvidia GTX1660 Ti 6GB GDDR6/Black/1.86Kg), 10SDR-1280IN',
 'ASUS TUF Gaming F15 (2020), 15.6-inch (39.62 cms) FHD 144Hz, Intel Co

In [92]:
# Scraping data for laptop price
price = driver.find_elements_by_xpath("//span[@class='a-price-whole']")

laptop_price= []  #empty list
for i in price:
    laptop_price.append(i.text)
laptop_price =laptop_price[:10]
laptop_price

['81,990',
 '1,07,990',
 '59,490',
 '84,990',
 '81,990',
 '71,990',
 '34,990',
 '74,990',
 '82,990',
 '90,490']

In [93]:
#Fetching all the URLs
url = driver.find_elements_by_xpath("//a[@class='a-link-normal a-text-normal']")

urls = []  #empty list
for i in url[0:20]:
    urls.append(i.get_attribute('href'))

In [94]:
# Scraping data for laptop ratings
laptop_ratings= []  #empty list
for i in urls:
    driver.get(i)
    try:
        driver.find_element_by_xpath("//span[@id='acrCustomerReviewText']").click()
        ratings = driver.find_element_by_xpath("//span[@class='a-size-medium a-color-base']")
        laptop_ratings.append(ratings.text)
    except NoSuchElementException as e :
        laptop_ratings.append('-')
laptop_ratings =laptop_ratings[:10]
laptop_ratings

['3.7 out of 5',
 '4.5 out of 5',
 '4.4 out of 5',
 '4.3 out of 5',
 '3.7 out of 5',
 '4 out of 5',
 '3.8 out of 5',
 '4.6 out of 5',
 '4.4 out of 5',
 '4 out of 5']

In [95]:
# Checking the length of the scraped data
print(len(laptop_titles),len(laptop_ratings),len(laptop_price))

10 10 10


In [96]:
# Creating DataFrame for the scraped data
laptops = pd.DataFrame({})
laptops["Laptop Title"] = laptop_titles
laptops["Laptop Ratings"] = laptop_ratings
laptops["Laptop Price"] = laptop_price
laptops

Unnamed: 0,Laptop Title,Laptop Ratings,Laptop Price
0,"MSI GF65 Thin, Intel i7-10750H, 15.6"" FHD (39....",3.7 out of 5,81990
1,"ASUS TUF Dash F15 (2021), 15.6"" (39.62 cms) FH...",4.5 out of 5,107990
2,Mi Notebook Horizon Edition 14 Intel Core i7-1...,4.4 out of 5,59490
3,HP Pavilion (2021) Thin & Light 11th Gen Core ...,4.3 out of 5,84990
4,"MSI GF65 Thin, Intel i7-10750H, 15.6"" FHD (39....",3.7 out of 5,81990
5,"ASUS TUF Gaming F15 (2020), 15.6-inch (39.62 c...",4 out of 5,71990
6,Life Digital Laptop 15.6-inch (39.62 cms) (Int...,3.8 out of 5,34990
7,"MSI GF75 Thin, Intel i7-10750H, 17.3"" (43.9 cm...",4.6 out of 5,74990
8,Lenovo Legion 5 10th Gen Intel Core i7-10750H ...,4.4 out of 5,82990
9,Lenovo IdeaPad Flex 5 11th Gen Intel Core i7 1...,4 out of 5,90490
