# Web Scrapping using SELENIUM

In [1]:
#install selenium
!pip install selenium



# Q1: Write a python program to scrape data for “Data Analyst” Job position in “Bangalore” location. 
Youhave to scrape the job-title, job-location, company_name, experience_required. You have to scrape first 10
jobs data.
This task will be done in following steps:
1. First get the webpage https://www.naukri.com/
2. Enter “Data Analyst” in “Skill, Designations, Companies” field and enter “Bangalore” in “enter the
location” field.
3. Then click the search button.
4. Then scrape the data for the first 10 jobs results you get.
5. Finally create a dataframe of the scraped data

In [2]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [3]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [4]:
driver=webdriver.Chrome('chromedriver.exe')

In [5]:
url="https://www.naukri.com/"
driver.get(url)

In [6]:
#finding web element for job bar using id
search_job=driver.find_element_by_id("qsb-keyword-sugg")
search_job

<selenium.webdriver.remote.webelement.WebElement (session="fcabd763d79f9493ce9bf04fa6ee31b0", element="fb97b226-7129-44fd-ad8f-3c03198ca636")>

In [7]:
#write on search bar
search_job.send_keys("Data Analyst")

In [8]:
#finding web element for search location using absolute Xpath
search_locn=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section/div/form/div[2]/div/div/div/div[1]/div[2]/input')
search_locn

<selenium.webdriver.remote.webelement.WebElement (session="fcabd763d79f9493ce9bf04fa6ee31b0", element="e3103dc4-47d7-4a18-b7bb-c353e8b18311")>

In [9]:
#finding web element for job location bar
search_locn.send_keys("Bangalore")

In [10]:
#clicking using absolute xpath
search_btn=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section/div/form/div[3]/button')
search_btn

<selenium.webdriver.remote.webelement.WebElement (session="fcabd763d79f9493ce9bf04fa6ee31b0", element="d0e4cc1d-c740-46bc-af6b-5ae98fb01bf6")>

In [11]:
search_btn.click()

clicking filters by selecting check boxes using absolute xpath

# Extracting job titles

In [12]:
#let's extract all web elements having job titles
title_tags=driver.find_elements_by_xpath('//a[@class="title fw500 ellipsis"]')
len(title_tags)


20

In [14]:
#now the text of the job title is inside the web element extracted above
#so now we will run a for loop to extract the text

job_titles=[]
for i in title_tags:
    job_titles.append(i.text)
len(job_titles)
job_titles

['BUSINESS ANALYST -DATA SCIENCE-CONSUMER',
 'Lead - Data Analyst / Scientist',
 'Data Analyst',
 'Senior Data Analyst - Supporting Audits',
 'Data Analyst',
 'Data Analyst',
 'Senior Data Analyst - KPO',
 'Data Analyst',
 'Financial Data Analyst',
 'Tcs Hiring For Data Analyst / Engineers',
 'Business Data Analyst - Database Design/Mining',
 'Lead Data Analyst',
 'Data Analyst',
 'Financial Data Analyst',
 'Data Analyst',
 'Senior Data Analyst - KPO',
 'Senior Data Analyst',
 'Senior Data Analyst II',
 'Senior Data Analyst II',
 'Senior Data Analyst']

# Extracting company names

In [15]:
company_tags=driver.find_elements_by_xpath('//a[@class="subTitle ellipsis fleft"]')
len(company_tags)

20

In [16]:
company_names=[]
for i in company_tags:
    company_names.append(i.text)
len(company_names)
company_names


['BRIDGEi2i Analytics Solutions Private Limited',
 'Axim Technologies',
 'Jeeva Organic',
 'Visa',
 'Flipkart',
 'Novel Office',
 'Huquo Consulting Pvt. Ltd',
 'Snaphunt',
 "Moody's",
 'TCS',
 'AugmatrixGo',
 'Nike India',
 'Venuestring Events Management Pvt Ltd',
 "Moody's",
 'WEIWO Communication Pvt. Ltd.',
 'Huquo Consulting Pvt. Ltd',
 'Flipkart',
 'Flipkart',
 'Flipkart',
 'Glance']

# Extracting experience

In [17]:
exp_tags=driver.find_elements_by_xpath('//li[@class="fleft grey-text br2 placeHolderLi experience"]')
len(exp_tags)

20

In [18]:
experience=[]
for i in exp_tags:
    experience.append(i.text)
len(experience)
experience

['0-2 Yrs',
 '12-14 Yrs',
 '3-8 Yrs',
 '5-8 Yrs',
 '1-2 Yrs',
 '0-3 Yrs',
 '7-12 Yrs',
 '0-2 Yrs',
 '0-2 Yrs',
 '4-9 Yrs',
 '2-5 Yrs',
 '1-6 Yrs',
 '1-2 Yrs',
 '0-2 Yrs',
 '2-7 Yrs',
 '7-12 Yrs',
 '2-3 Yrs',
 '3-7 Yrs',
 '5-7 Yrs',
 '1-6 Yrs']

# Extracting location

In [19]:
locn_tags=driver.find_elements_by_xpath('//li[@class="fleft grey-text br2 placeHolderLi location"]')
len(locn_tags)

20

In [20]:
location=[]
for i in locn_tags:
    location.append(i.text)
len(location)
location

['Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bengaluru/Bangalore',
 'Bangalore/Bengaluru',
 'Gurgaon/Gurugram, Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Pune, Bangalore/Bengaluru, Delhi / NCR',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru, Delhi / NCR, Mumbai (All Areas)\n(WFH during Covid)',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru(Ulsoor)',
 'Gurgaon/Gurugram, Bangalore/Bengaluru',
 'Bengaluru/Bangalore',
 'Bengaluru/Bangalore',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru']

In [21]:
#create dataframe
data=pd.DataFrame()
data['Job']=job_titles
data['Company']=company_names
data['Experience']=experience
data['Location']=location
data.head(10)

Unnamed: 0,Job,Company,Experience,Location
0,BUSINESS ANALYST -DATA SCIENCE-CONSUMER,BRIDGEi2i Analytics Solutions Private Limited,0-2 Yrs,Bangalore/Bengaluru
1,Lead - Data Analyst / Scientist,Axim Technologies,12-14 Yrs,Bangalore/Bengaluru
2,Data Analyst,Jeeva Organic,3-8 Yrs,Bangalore/Bengaluru
3,Senior Data Analyst - Supporting Audits,Visa,5-8 Yrs,Bangalore/Bengaluru
4,Data Analyst,Flipkart,1-2 Yrs,Bengaluru/Bangalore
5,Data Analyst,Novel Office,0-3 Yrs,Bangalore/Bengaluru
6,Senior Data Analyst - KPO,Huquo Consulting Pvt. Ltd,7-12 Yrs,"Gurgaon/Gurugram, Bangalore/Bengaluru"
7,Data Analyst,Snaphunt,0-2 Yrs,Bangalore/Bengaluru
8,Financial Data Analyst,Moody's,0-2 Yrs,Bangalore/Bengaluru
9,Tcs Hiring For Data Analyst / Engineers,TCS,4-9 Yrs,"Pune, Bangalore/Bengaluru, Delhi / NCR"


# Q2: Write a python program to scrape data for “Data Scientist” Job position in “Bangalore” location.
Youhave to scrape the job-title, job-location, company_name. You have to scrape first 10 jobs data.
This task will be done in following steps:
1. First get the webpage https://www.naukri.com/
2. Enter “Data Scientist” in “Skill, Designations, Companies” field and enter “Bangalore” in “enter the
location” field.
3. Then click the search button.
4. Then scrape the data for the first 10 jobs results you get.
5. Finally create a dataframe of the scraped data.
Note: All of the above steps have to be done in code. No step is to be done manually.

In [22]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [23]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [24]:
url="https://www.naukri.com/"
driver.get(url)

In [25]:
#finding web element for job bar using id
search_job=driver.find_element_by_id("qsb-keyword-sugg")
search_job

<selenium.webdriver.remote.webelement.WebElement (session="f69f56f3b4cd1954f5d04a11fbc966da", element="b08cfab2-d746-4f0f-a885-cda24db91269")>

In [26]:
#write on search bar
search_job.send_keys("Data scientist")

In [27]:
#finding web element for search location using absolute Xpath
search_locn=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section/div/form/div[2]/div/div/div/div[1]/div[2]/input')
search_locn

<selenium.webdriver.remote.webelement.WebElement (session="f69f56f3b4cd1954f5d04a11fbc966da", element="72fd8316-15dc-482f-af3d-fa013c0c8029")>

In [28]:
#finding web element for job location bar
search_locn.send_keys("Bangalore")

In [29]:
#clicking using absolute xpath
search_btn=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section/div/form/div[3]/button')
search_btn

<selenium.webdriver.remote.webelement.WebElement (session="f69f56f3b4cd1954f5d04a11fbc966da", element="b04efb35-fbe3-4c18-bdc0-073ad43e7157")>

In [30]:
search_btn.click()

# Extracting job titles

In [31]:
#let's extract all web elements having job titles
title_tags=driver.find_elements_by_xpath('//a[@class="title fw500 ellipsis"]')
len(title_tags)


20

In [32]:
#now the text of the job title is inside the web element extracted above
#so now we will run a for loop to extract the text

job_titles=[]
for i in title_tags:
    job_titles.append(i.text)
len(job_titles)
job_titles

['Lead/Senior Data Scientist (NLP)',
 'Lead Data Scientist - Prescriptive Analytics/Predictive Modeling',
 'Lead Data Scientist - NLP/OpenCV',
 'Senior Data Scientist - Logistics',
 'Senior Data Scientist',
 'Lead Data Scientist',
 'Lead - Data Analyst / Scientist',
 'Data Scientist - Logistics',
 'Senior Data Scientist',
 'Senior Data Scientist',
 'Senior Data Scientist/Data Scientist Gurgaon Kolkata/Pune/Chennai/Delh',
 'Job opening with Wipro For Data Scientist',
 'Data Scientist I',
 'Spark ML Data Scientist',
 'Spark ML Data Scientist',
 'Azure ML Data Scientist',
 'Lead Data Scientist',
 'Senior Data Scientist',
 'Openings For Jr/mid/Sr level data Scientists',
 'Senior Risk Data Scientist']

# Extracting company names

In [33]:
company_tags=driver.find_elements_by_xpath('//a[@class="subTitle ellipsis fleft"]')
len(company_tags)

20

In [35]:
company_names=[]
for i in company_tags:
    company_names.append(i.text)
len(company_names)
company_names


['Samya.AI A FRACTAL Entity',
 'Codersbrain',
 'Codersbrain',
 'Gojek Tech',
 'Fractal Analytics',
 'First American',
 'Axim Technologies',
 'GO-JEK India',
 'Idexcel Technologies Private Limited',
 'Idexcel Technologies Private Limited',
 'Talent Leads HR Solutions Pvt Ltd',
 'Wipro',
 'Delhivery',
 'Wipro',
 'Wipro',
 'TCS',
 'PEOPLESTRONG TECHNOLOGIES PRIVATE LIMITED',
 'Greenizon Agritech Consultancy',
 'Pluto seven business solutions (p) limited',
 'GSK India']

# Extracting experience

In [36]:
exp_tags=driver.find_elements_by_xpath('//li[@class="fleft grey-text br2 placeHolderLi experience"]')
len(exp_tags)

20

In [37]:
experience=[]
for i in exp_tags:
    experience.append(i.text)
len(experience)
experience

['3-8 Yrs',
 '6-11 Yrs',
 '6-11 Yrs',
 '5-8 Yrs',
 '5-10 Yrs',
 '9-14 Yrs',
 '12-14 Yrs',
 '4-10 Yrs',
 '2-7 Yrs',
 '2-7 Yrs',
 '1-4 Yrs',
 '5-10 Yrs',
 '2-5 Yrs',
 '11-18 Yrs',
 '8-12 Yrs',
 '3-8 Yrs',
 '8-13 Yrs',
 '1-2 Yrs',
 '2-6 Yrs',
 '4-9 Yrs']

# Extracting location

In [38]:
locn_tags=driver.find_elements_by_xpath('//li[@class="fleft grey-text br2 placeHolderLi location"]')
len(locn_tags)

20

In [39]:
location=[]
for i in locn_tags:
    location.append(i.text)
len(location)
location

['Bangalore/Bengaluru\n(WFH during Covid)',
 'Hyderabad/Secunderabad, Bangalore/Bengaluru',
 'Hyderabad/Secunderabad, Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Gurgaon/Gurugram, Bangalore/Bengaluru, Mumbai (All Areas)',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Pune, Bangalore/Bengaluru',
 'Hyderabad/Secunderabad, Pune, Chennai, Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Hyderabad/Secunderabad, Pune, Bangalore/Bengaluru\n(WFH during Covid)',
 'Hyderabad/Secunderabad, Pune, Bangalore/Bengaluru',
 'Hyderabad/Secunderabad, Bangalore/Bengaluru',
 'Bangalore/Bengaluru(Electronic City)',
 'Bangalore/Bengaluru',
 'Kolkata, Hyderabad/Secunderabad, Pune, Ahmedabad, Chennai, Bangalore/Bengaluru, Delhi / NCR, Mumbai (All Areas)\n(WFH during Covid)',
 'Bangalore/Bengaluru']

In [40]:
#create dataframe
data=pd.DataFrame()
data['Job']=job_titles
data['Company']=company_names
data['Experience']=experience
data['Location']=location
data.head(10)

Unnamed: 0,Job,Company,Experience,Location
0,Lead/Senior Data Scientist (NLP),Samya.AI A FRACTAL Entity,3-8 Yrs,Bangalore/Bengaluru\n(WFH during Covid)
1,Lead Data Scientist - Prescriptive Analytics/P...,Codersbrain,6-11 Yrs,"Hyderabad/Secunderabad, Bangalore/Bengaluru"
2,Lead Data Scientist - NLP/OpenCV,Codersbrain,6-11 Yrs,"Hyderabad/Secunderabad, Bangalore/Bengaluru"
3,Senior Data Scientist - Logistics,Gojek Tech,5-8 Yrs,Bangalore/Bengaluru
4,Senior Data Scientist,Fractal Analytics,5-10 Yrs,"Gurgaon/Gurugram, Bangalore/Bengaluru, Mumbai ..."
5,Lead Data Scientist,First American,9-14 Yrs,Bangalore/Bengaluru
6,Lead - Data Analyst / Scientist,Axim Technologies,12-14 Yrs,Bangalore/Bengaluru
7,Data Scientist - Logistics,GO-JEK India,4-10 Yrs,Bangalore/Bengaluru
8,Senior Data Scientist,Idexcel Technologies Private Limited,2-7 Yrs,Bangalore/Bengaluru
9,Senior Data Scientist,Idexcel Technologies Private Limited,2-7 Yrs,Bangalore/Bengaluru


# Q3: In this question you have to scrape data using the filters available on the webpage
    
    You have to use the location and salary filter.
You have to scrape data for “Data Scientist” designation for first 10 job results.
You have to scrape the job-title, job-location, company name, experience required.
The location filter to be used is “Delhi/NCR”. The salary filter to be used is “3-6” lakhs
The task will be done as shown in the below steps:
1. first get the webpage https://www.naukri.com/
2. Enter “Data Scientist” in “Skill, Designations, and Companies” field.
3. Then click the search button.
4. Then apply the location filter and salary filter by checking the respective boxes
5. Then scrape the data for the first 10 jobs results you get.
6. Finally create a dataframe of the scraped data.
Note: All of the above steps have to be done in code. No step is to be done manually.

In [41]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [42]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [43]:
url="https://www.naukri.com/"
driver.get(url)

In [44]:
#finding web element for job bar using id
search_job=driver.find_element_by_id("qsb-keyword-sugg")
search_job

<selenium.webdriver.remote.webelement.WebElement (session="4e4a5801be73595626e49d88034fb128", element="1e45510f-c533-4b3a-8970-4b19d5a284f5")>

In [45]:

#write on search bar
search_job.send_keys("Data scientist")

In [46]:
#clicking using absolute xpath
search_btn=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section/div/form/div[3]/button')
search_btn

<selenium.webdriver.remote.webelement.WebElement (session="4e4a5801be73595626e49d88034fb128", element="73ff71d1-057e-4870-a5e1-d5682ded6bf1")>

In [47]:
search_btn.click()

In [50]:
location_check=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section[1]/div[2]/div[3]/div[2]/div[3]/label/i')
location_check

<selenium.webdriver.remote.webelement.WebElement (session="4e4a5801be73595626e49d88034fb128", element="bef4ff91-60c7-4e79-82fe-6f2f2fcdb637")>

In [51]:
location_check.click()

In [52]:
salary_check=driver.find_element_by_xpath('/html/body/div[1]/div[3]/div[2]/section[1]/div[2]/div[4]/div[2]/div[2]/label/i')
salary_check.click()

# Extracting Job Titles

In [53]:
#let's extract all web elements having job titles
title_tags=driver.find_elements_by_xpath('//a[@class="title fw500 ellipsis"]')
len(title_tags)


20

In [54]:
job_titles=[]
for i in title_tags:
    job_titles.append(i.text)
len(job_titles)
job_titles

['Data Scientist | Python | Machine Learning | Deep Learning- Fresher',
 'Senior Data Scientist I',
 'Openings For Jr/mid/Sr level data Scientists',
 'Urgent Hiring For Data Scientist',
 'Urgent Hiring For Data Scientist',
 'Data Scientist role',
 'Data Scientist role',
 'Data Scientist Internship',
 'Data Scientist',
 'Data Scientist',
 'Data Analyst / Data Scientist / Business Analytics / Fresher - MNC',
 'Data Scientist',
 'Senior Data Scientist',
 'Data Scientist',
 'Data Scientist',
 'Data Scientist',
 'Data Scientist',
 'Data Scientist',
 'Associate Data Scientist',
 'Associate Data Scientist']

# Extracting company

In [55]:
company_tags=driver.find_elements_by_xpath('//a[@class="subTitle ellipsis fleft"]')
len(company_tags)

20

In [56]:
company_names=[]
for i in company_tags:
    company_names.append(i.text)
len(company_names)
company_names


['Schlesinger Group',
 'Delhivery',
 'Pluto seven business solutions (p) limited',
 'Mount Talent Consulting Private Limited',
 'Mount Talent Consulting Private Limited',
 'Mount Talent Consulting Private Limited',
 'Mount Talent Consulting Private Limited',
 'iHackers Inc',
 'LG',
 'Teleperformance',
 'GABA Consultancy services',
 'Boston Consulting Group',
 'iNICU',
 'Sentieo',
 'BlackBuck',
 'Country Veggie',
 'Mahajan Imaging',
 'Mahajan Imaging',
 'Right Step Consulting',
 'Blow Trumpet Solutions']

# Extracting experience

In [57]:
exp_tags=driver.find_elements_by_xpath('//li[@class="fleft grey-text br2 placeHolderLi experience"]')
len(exp_tags)

20

In [58]:
experience=[]
for i in exp_tags:
    experience.append(i.text)
len(experience)
experience

['0-3 Yrs',
 '3-7 Yrs',
 '2-6 Yrs',
 '1-6 Yrs',
 '1-6 Yrs',
 '1-3 Yrs',
 '1-3 Yrs',
 '0-1 Yrs',
 '0-2 Yrs',
 '4-9 Yrs',
 '0-0 Yrs',
 '2-5 Yrs',
 '1-5 Yrs',
 '2-7 Yrs',
 '3-7 Yrs',
 '1-3 Yrs',
 '2-6 Yrs',
 '2-6 Yrs',
 '3-6 Yrs',
 '1-5 Yrs']

# Extracting job location

In [59]:
locn_tags=driver.find_elements_by_xpath('//li[@class="fleft grey-text br2 placeHolderLi location"]')
len(locn_tags)

20

In [60]:
location=[]
for i in locn_tags:
    location.append(i.text)
len(location)
location

['Noida, Gurgaon/Gurugram, Delhi / NCR',
 'Gurgaon/Gurugram',
 'Kolkata, Hyderabad/Secunderabad, Pune, Ahmedabad, Chennai, Bangalore/Bengaluru, Delhi / NCR, Mumbai (All Areas)\n(WFH during Covid)',
 'Gurgaon/Gurugram, Bangalore/Bengaluru',
 'Gurgaon/Gurugram, Bangalore/Bengaluru',
 'Noida, Hyderabad/Secunderabad, Pune, Gurgaon/Gurugram, Bangalore/Bengaluru, Delhi / NCR\n(WFH during Covid)',
 'Noida, Hyderabad/Secunderabad, Pune, Gurgaon/Gurugram, Bangalore/Bengaluru, Delhi / NCR\n(WFH during Covid)',
 'New Delhi',
 'Noida, New Delhi, Faridabad, Gurgaon/Gurugram, Delhi / NCR',
 'Gurgaon/Gurugram, Chennai',
 'Noida, New Delhi, Delhi / NCR',
 'New Delhi',
 'Delhi',
 'Delhi',
 'Gurgaon, Bengaluru',
 'Bharuch, Jaipur, Bhopal, Mumbai, Jhansi, Nagpur, Ghaziabad, Jaunpur, Kanpur, Delhi, Lucknow, Agra, Gurgaon, Rajkot, Bengaluru',
 'Delhi',
 'Delhi',
 'Noida',
 'Delhi']

In [61]:
#create dataframe
data=pd.DataFrame()
data['Job']=job_titles
data['Company']=company_names
data['Experience']=experience
data['Location']=location
data.head(10)

Unnamed: 0,Job,Company,Experience,Location
0,Data Scientist | Python | Machine Learning | D...,Schlesinger Group,0-3 Yrs,"Noida, Gurgaon/Gurugram, Delhi / NCR"
1,Senior Data Scientist I,Delhivery,3-7 Yrs,Gurgaon/Gurugram
2,Openings For Jr/mid/Sr level data Scientists,Pluto seven business solutions (p) limited,2-6 Yrs,"Kolkata, Hyderabad/Secunderabad, Pune, Ahmedab..."
3,Urgent Hiring For Data Scientist,Mount Talent Consulting Private Limited,1-6 Yrs,"Gurgaon/Gurugram, Bangalore/Bengaluru"
4,Urgent Hiring For Data Scientist,Mount Talent Consulting Private Limited,1-6 Yrs,"Gurgaon/Gurugram, Bangalore/Bengaluru"
5,Data Scientist role,Mount Talent Consulting Private Limited,1-3 Yrs,"Noida, Hyderabad/Secunderabad, Pune, Gurgaon/G..."
6,Data Scientist role,Mount Talent Consulting Private Limited,1-3 Yrs,"Noida, Hyderabad/Secunderabad, Pune, Gurgaon/G..."
7,Data Scientist Internship,iHackers Inc,0-1 Yrs,New Delhi
8,Data Scientist,LG,0-2 Yrs,"Noida, New Delhi, Faridabad, Gurgaon/Gurugram,..."
9,Data Scientist,Teleperformance,4-9 Yrs,"Gurgaon/Gurugram, Chennai"


# Q4: Scrape data of first 100 sunglasses listings on flipkart.com. You have to scrape four attributes:
1. Brand
2. Product Description
3. Price

To scrape the data you have to go through following steps:
1. Go to Flipkart webpage by url : https://www.flipkart.com/
2. Enter “sunglasses” in the search field where “search for products, brands andmore” is written andclick the search icon
3. After that you will reach to the page having a lot of sunglasses. From this pageyou can scrap therequired data as usual
4. After scraping data from the first page, go to the “Next” Button at the bottom ofthe page , thenclick on it.
5. Now scrape data from this page as usual
6. Repeat this until you get data for 100 sunglasses.


Note: That all of the above steps have to be done by coding only and not manually

In [1]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [3]:
url="https://www.flipkart.com/"
driver.get(url)

In [4]:
search_bar=driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div[1]/div[2]/div[2]/form/div/div/input')
search_bar

<selenium.webdriver.remote.webelement.WebElement (session="abdd0aec26075871c1a9b523d61c1ecf", element="c1248847-bbe1-4edd-85cd-d272efe3138a")>

In [5]:
#write on search bar
search_bar.send_keys("Sunglasses")

In [8]:
search_btn=driver.find_element_by_xpath('//button[@class="L0Z3Pu"]')
search_btn.click()

In [82]:
product_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(product_tag)

40

In [83]:
product=[]
for i in product_tag:
    product.append(i.text)
len(product)
#product

40

In [84]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

40

In [85]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
#price

40

In [86]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

40

In [87]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
#discount

40

In [88]:
company_tag=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(company_tag)

40

In [89]:
company=[]
for i in company_tag:
    company.append(i.text)
len(company)
#company

40

In [90]:
data1=pd.DataFrame()
data1['Product']=product
data1['Price']=price
data1['Discount']=discount
data1['Type']=company
#data1

Unnamed: 0,Product,Price,Discount,Type
0,ROYAL SON,₹449,70% off,UV Protection Rectangular Sunglasses (Free Size)
1,VINCENT CHASE,₹999,60% off,"by Lenskart Polarized, UV Protection Aviator S..."
2,SRPM,₹188,85% off,UV Protection Wayfarer Sunglasses (56)
3,Elligator,₹248,90% off,UV Protection Round Sunglasses (54)
4,Fastrack,₹599,33% off,UV Protection Wayfarer Sunglasses (Free Size)
5,PIRASO,₹149,90% off,UV Protection Aviator Sunglasses (54)
6,kingsunglasses,₹188,81% off,UV Protection Round Sunglasses (54)
7,ROYAL SON,₹449,70% off,UV Protection Retro Square Sunglasses (58)
8,SHAAH COLLECTIONS,₹165,90% off,"UV Protection, Polarized, Mirrored Rectangular..."
9,ROZZETTA CRAFT,₹474,78% off,UV Protection Retro Square Sunglasses (Free Size)


In [20]:
next_page=driver.find_element_by_xpath('/html/body/div[1]/div/div[3]/div[1]/div[2]/div[12]/div/div/nav/a[11]/span')
next_page.click()

In [53]:
product_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(product_tag)

40

In [58]:
product=[]
for i in product_tag:
    product.append(i.text)
len(product)
product=product[:30]
#product

In [59]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

40

In [60]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
price=price[:30]
#price

In [61]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

40

In [62]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
discount=discount[:30]
#discount

In [63]:
company_tag=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(company_tag)

40

In [64]:
company=[]
for i in company_tag:
    company.append(i.text)
len(company)
company=company[:30]
#company


In [66]:
data2=pd.DataFrame()
data2['Product']=product
data2['Price']=price
data2['Discount']=discount
data2['Type']=company
#data2

In [67]:
next_page=driver.find_element_by_xpath('/html/body/div[1]/div/div[3]/div[1]/div[2]/div[12]/div/div/nav/a[12]/span')
next_page.click()

In [68]:
product_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(product_tag)

40

In [69]:
product=[]
for i in product_tag:
    product.append(i.text)
len(product)
product=product[:30]
#product

In [70]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

40

In [71]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
price=price[:30]
#price

In [72]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

40

In [73]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
discount=discount[:30]
#discount

In [74]:
company_tag=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(company_tag)

39

In [75]:
company=[]
for i in company_tag:
    company.append(i.text)
len(company)
company=company[:30]
#company


In [77]:
data3=pd.DataFrame()
data3['Product']=product
data3['Price']=price
data3['Discount']=discount
data3['Type']=company
#data3

In [91]:
pd.concat([data1,data2,data3],axis=0)

Unnamed: 0,Product,Price,Discount,Type
0,ROYAL SON,₹449,70% off,UV Protection Rectangular Sunglasses (Free Size)
1,VINCENT CHASE,₹999,60% off,"by Lenskart Polarized, UV Protection Aviator S..."
2,SRPM,₹188,85% off,UV Protection Wayfarer Sunglasses (56)
3,Elligator,₹248,90% off,UV Protection Round Sunglasses (54)
4,Fastrack,₹599,33% off,UV Protection Wayfarer Sunglasses (Free Size)
...,...,...,...,...
25,elegante,₹389,74% off,UV Protection Aviator Sunglasses (Free Size)
26,Singco,₹498,75% off,"by Lenskart Polarized, UV Protection Aviator S..."
27,VINCENT CHASE,₹999,60% off,Polarized Rectangular Sunglasses (65)
28,ROYAL SON,₹664,66% off,UV Protection Retro Square Sunglasses (Free Size)


# Q5: Scrape 100 reviews data from flipkart.com for iphone11 phone.
You have to go the link:
https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-poweradapter/p/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace.
 you have to scrape the attributes.These are:
1. Rating
2. Review summary
3. Full review
4. You have to scrape this data for first 100 reviews.
Note: All the steps required during scraping should be done through code only and not manually

In [26]:
#import libraries
import selenium
from selenium import webdriver
import pandas

import warnings
warnings.filterwarnings("ignore")


In [27]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [28]:
url="https://www.flipkart.com/apple-iphone-11-black-64-gb-includes- earpods-poweradapter/p/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace"
driver.get(url)

In [67]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data1=pd.DataFrame()
data1['Review_Summary']=review
data1['Full_Review']=full_review1
data1['Ratings']=Ratings
data1

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Brilliant,The Best Phone for the Money\n\nThe iPhone 11 ...,5
1,Simply awesome,Really satisfied with the Product I received.....,5
2,Perfect product!,Amazing phone with great cameras and better ba...,5
3,Best in the market!,Great iPhone very snappy experience as apple k...,5
4,Worth every penny,Previously I was using one plus 3t it was a gr...,5
5,Fabulous!,This is my first iOS phone. I am very happy wi...,5
6,Great product,Amazing Powerful and Durable Gadget.\n\nI’m am...,5
7,Good choice,So far it’s been an AMAZING experience coming ...,4
8,Highly recommended,What a camera .....just awesome ..you can feel...,5
9,Worth every penny,"i11 is worthy to buy, too much happy with the ...",5


In [69]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [70]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data2=pd.DataFrame()
data2['Review_Summary']=review
data2['Full_Review']=full_review1
data2['Ratings']=Ratings
data2

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Highly recommended,iphone 11 is a very good phone to buy only if ...,5
1,Perfect product!,It’s a must buy who is looking for an upgrade ...,5
2,Perfect product!,Value for money❤️❤️\nIts awesome mobile phone ...,5
3,Highly recommended,It's my first time to use iOS phone and I am l...,5
4,Perfect product!,Iphone is just awesome.. battery backup is ver...,5
5,Classy product,Totally in love with this ❤ the camera quality...,5
6,Worth every penny,Best budget Iphone till date ❤️ go for it guys...,5
7,Simply awesome,"Excellent camera, good performance, no lag. Th...",5
8,Worth every penny,It’s been almost a month since I have been usi...,5
9,Worth every penny,"Smooth like butter, camera like fantabulous, s...",5


In [78]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [73]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data3=pd.DataFrame()
data3['Review_Summary']=review
data3['Full_Review']=full_review1
data3['Ratings']=Ratings
data3

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Wonderful,This is my first ever I phone. Before this I w...,5
1,Terrific,Really worth of money. i just love it. It is t...,5
2,Nice product,Awesome Phone. Slightly high price but worth. ...,4
3,Perfect product!,"Battery backup is extraordinary, camera is dec...",5
4,Classy product,Superb Product !!!\nA big and worthy upgrade f...,5
5,Brilliant,"I have migrated from OP 7pro... and trust me, ...",5
6,Good choice,Looking so good 👍 😍 super 👌 stylish 😎 phone\nC...,4
7,Must buy!,It’s an amazing product from apple and the cam...,5
8,Wonderful,I just directly switch from iphone 6s to iphon...,5
9,Perfect product!,After 1 month use I found camera quality best ...,5


In [75]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [80]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review=review[:9]
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1=full_review1[:9]
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings=Ratings[:9]
len(Ratings)

data4=pd.DataFrame()
data4['Review_Summary']=review
data4['Full_Review']=full_review1
data4['Ratings']=Ratings
data4

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Terrific purchase,Upgraded from iphone 6 to 11 best phone for ip...,5
1,Simply awesome,It's very high performances devise i am like t...,5
2,Great product,Well you all know the specifications . One of ...,5
3,Terrific purchase,The built quality is not very premium.\nThe ba...,5
4,Mind-blowing purchase,This will help you more. See if you are planni...,5
5,Excellent,Amazing camera and feel super machine I am not...,5
6,Simply awesome,Really good actually this is my first apple pr...,5
7,Good quality product,it is very good at performance. Camera is best...,4
8,Very poor,Reviewing again after 5 months. At the beginni...,5


In [None]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [81]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data5=pd.DataFrame()
data5['Review_Summary']=review
data5['Full_Review']=full_review1
data5['Ratings']=Ratings
data5

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Perfect product!,It is just awesome mobile for this price from ...,5
1,Must buy!,Best Apple iPhone that i have bought at a very...,5
2,Fabulous!,I can say I'm damn impressed with iPhone 11. A...,5
3,Mind-blowing purchase,"Best Quality Product OF iPhone Series , Sound ...",5
4,Mind-blowing purchase,First thanks to Flipkart for this amazing deal...,5
5,Wonderful,Superfast delivery by Flipkart. Thanks.\n\n1. ...,5
6,Must buy!,I rate this product 5* as it has got amazing u...,5
7,Great product,Again back to apple iphone after a gap of 2-3 ...,5
8,Very Good,impressive super phone and best in class camer...,4
9,Classy product,Well while switching from android to iOS the f...,5


In [None]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [82]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data6=pd.DataFrame()
data6['Review_Summary']=review
data6['Full_Review']=full_review1
data6['Ratings']=Ratings
data6

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Perfect product!,"In love with this iphone great performance,cam...",5
1,Just wow!,Best in class. Battery backup is good especial...,5
2,Awesome,Very excited to have this phone. This phone ha...,5
3,Pretty good,I was using Iphone 6s and also Oneplus 6t. Bot...,4
4,Terrific purchase,Its Very awesome product working and good came...,5
5,Awesome,I am using this phone for 5 days and its one o...,5
6,Classy product,Best and amazing product.....phone looks so pr...,5
7,Brilliant,Excellent camera and display touching very nic...,5
8,Terrific,I got this beast today. And I must say the pic...,5
9,Perfect product!,Awesome purchase. Amazing phone with good batt...,5


In [None]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [84]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review=review[:9]
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1=full_review1[:9]
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings=Ratings[:9]
len(Ratings)

data7=pd.DataFrame()
data7['Review_Summary']=review
data7['Full_Review']=full_review1
data7['Ratings']=Ratings
data7

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Awesome,"Always love the apple products, upgraded from ...",5
1,Perfect product!,Value for money\n5 star rating\nExcellent came...,5
2,Very poor,"Please DONT buy from here,\n\nI purchased Appl...",5
3,Fabulous!,awesome phone to have. it has got many smart f...,5
4,Excellent,Its a fabulous mobile look awesome performance...,5
5,Must buy!,Dear friends... I want to share my experience ...,4
6,Value-for-money,Just got this iphone 11\nAnd it is most powerf...,5
7,Best in the market!,The best all rounder iphone. Flipkart is doing...,5
8,Brilliant,Amazing phone and on great deal I received wit...,5


In [85]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [87]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review=review[:9]
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1=full_review1[:9]
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings=Ratings[:9]
len(Ratings)


data8=pd.DataFrame()
data8['Review_Summary']=review
data8['Full_Review']=full_review1
data8['Ratings']=Ratings
data8

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Good quality product,I'm switching this phone to oppo reno 10x zoom...,4
1,Terrific,Simply Awesome\n\nI have upgraded from iPhone ...,5
2,Best in the market!,Damn this phone is a blast . Upgraded from and...,5
3,Perfect product!,Worth the money’ starting first from its perfo...,5
4,Awesome,I dreamt about this day from a long time.... G...,5
5,Value-for-money,I'm Really happy with the product\nDelivery wa...,4
6,Just wow!,Great iphone.\nI am writing this review after ...,5
7,Worthless,Phone is awesome no doubt. But Camera & Displa...,5
8,Super!,Did an upgrade from 6s plus to iphone 11.\nAo ...,5


In [None]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [88]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data9=pd.DataFrame()
data9['Review_Summary']=review
data9['Full_Review']=full_review1
data9['Ratings']=Ratings
data9

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Wonderful,Super and marvellous phone look very cute and ...,5
1,Nice product,If you are looking for a premium phone under 5...,4
2,Mind-blowing purchase,awesome Phone Smooth Touch Too good Sexyy look...,5
3,Classy product,Gifted my man on his 30th birthday 🎂 He loves ...,5
4,Terrific,"I bought iPhone 11 On March 2021, And I am Wri...",5
5,Worth every penny,Here is the thing\n\nThe only reason why you s...,5
6,Excellent,Just go for it.\nThis phone is really amazing....,5
7,Fabulous!,Everything is perfect pictures come out so cle...,5
8,Pretty good,I've used this phone for over a month now and ...,4
9,Highly recommended,Thanks Flipkart For this amazing deal! I had a...,5


In [89]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [91]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data10=pd.DataFrame()
data10['Review_Summary']=review
data10['Full_Review']=full_review1
data10['Ratings']=Ratings
data10

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Excellent,A perfect phone and a good battery super camer...,5
1,Worth every penny,Undoubtedly Iphone 11 is the most successful m...,5
2,Fabulous!,I purchased the iPhone 11 a month back. I must...,5
3,Brilliant,A wort full value for money decision it’s . Si...,5
4,Just wow!,The ultimate performance\nCamera is superb\nTh...,5
5,Terrific purchase,I use a Note10+ and have been using both iOS a...,5
6,Awesome,The phone is completely good\nAs far as camera...,5
7,Decent product,Everything u ll like it when u use this iPhone...,3
8,Mind-blowing purchase,Excellent camera 📸 And Display touching very N...,5
9,Wonderful,Nice value for money good and best price I pho...,5


In [None]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [93]:
review_tag=driver.find_elements_by_xpath('//p[@class="_2-N8zT"]')
len(review_tag)

review=[]
for i in review_tag:
    review.append(i.text)
review
len(review)

full_review=driver.find_elements_by_xpath('//div[@class="t-ZTKy"]')
len(full_review)

full_review1=[]
for i in full_review:
    full_review1.append(i.text)
full_review1
len(full_review1)

ratings_tag=driver.find_elements_by_xpath('//div[@class="_3LWZlK _1BLPMq"]')
len(ratings_tag)

Ratings=[]
for i in ratings_tag:
    Ratings.append(i.text)
Ratings
len(Ratings)

data11=pd.DataFrame()
data11['Review_Summary']=review
data11['Full_Review']=full_review1
data11['Ratings']=Ratings
data11

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Worth every penny,iPhone is delivered on time. Display is great ...,5
1,Wonderful,Nice value for money good and best price I pho...,5
2,Super!,This is my first ever iPhone.\nAnd I truly don...,5
3,Classy product,I got this phone for good price hence 5 star f...,5
4,Must buy!,Camera is excellent just lack of telephoto mod...,5
5,Wonderful,I genuinely liked it. One of the best mobile p...,4
6,Mind-blowing purchase,"Flipkart honoured on time delivery, I have use...",5
7,Good quality product,"Awesome camera, smooth and fast UI, display is...",4
8,Fabulous!,Good product nice experience with Flipkart but...,5
9,Worth every penny,Here is the thing\n\nThe only reason why you s...,5


In [98]:
data=pd.concat([data1,data2,data3,data4,data5,data6,data7,data8,data9,data10,data11],axis=0)
data.head(100)

Unnamed: 0,Review_Summary,Full_Review,Ratings
0,Brilliant,The Best Phone for the Money\n\nThe iPhone 11 ...,5
1,Simply awesome,Really satisfied with the Product I received.....,5
2,Perfect product!,Amazing phone with great cameras and better ba...,5
3,Best in the market!,Great iPhone very snappy experience as apple k...,5
4,Worth every penny,Previously I was using one plus 3t it was a gr...,5
...,...,...,...
8,Fabulous!,Good product nice experience with Flipkart but...,5
9,Worth every penny,Here is the thing\n\nThe only reason why you s...,5
0,Worth every penny,iPhone is delivered on time. Display is great ...,5
1,Wonderful,Nice value for money good and best price I pho...,5


# Q6: Scrape data for first 100 sneakers you find when you visit flipkart.com andsearch for “sneakers” in thesearch field.
You have to scrape 4 attributes of each sneaker:
1. Brand
2. Product Description
3. Price


In [83]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [84]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [85]:
url=" https://www.flipkart.com/"
driver.get(url)

In [86]:
search_bar=driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div[1]/div[2]/div[2]/form/div/div/input')
search_bar

<selenium.webdriver.remote.webelement.WebElement (session="7265ee601f556c513fe2f6479fd6e76b", element="f23c59ab-dcf7-4538-b034-46841f270c36")>

In [87]:
search_bar.send_keys("sneakers")

In [88]:
search_btn=driver.find_element_by_xpath('//button[@class="L0Z3Pu"]')
search_btn.click()

In [89]:
product_detail=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(product_detail)

32

In [90]:
Product_detail=[]
for i in product_detail:
    Product_detail.append(i.text)
len(Product_detail)

32

In [91]:
brand_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(brand_tag)

40

In [92]:
brand=[]
for i in brand_tag:
    brand.append(i.text)
len(brand)
brand=brand[:32]
len(brand)

32

In [93]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

39

In [94]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
price=price[:32]
len(price)

32

In [95]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

39

In [96]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
discount=discount[:32]
len(discount)

32

In [97]:
data1=pd.DataFrame()
data1['Brand']=brand
data1['Price']=price
data1['Product_detail']=Product_detail
data1['Discount']=discount
#data1

In [98]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [99]:
product_detail=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(product_detail)

35

In [100]:
Product_detail=[]
for i in product_detail:
    Product_detail.append(i.text)
len(Product_detail)

35

In [101]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

40

In [102]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
price=price[:35]
len(price)

35

In [103]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

39

In [104]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
discount=discount[:35]
len(discount)

35

In [105]:
brand_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(brand_tag)

40

In [106]:
brand=[]
for i in brand_tag:
    brand.append(i.text)
len(brand)
brand=brand[:35]
len(brand)

35

In [107]:
data2=pd.DataFrame()
data2['Brand']=brand
data2['Price']=price
data2['Product_detail']=Product_detail
data2['Discount']=discount
#data2

In [114]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [116]:
product_detail=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(product_detail)

27

In [117]:
Product_detail=[]
for i in product_detail:
    Product_detail.append(i.text)
len(Product_detail)

27

In [118]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

40

In [119]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
price=price[:27]
len(price)

27

In [120]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

40

In [121]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
discount=discount[:27]
len(discount)

27

In [122]:
brand_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(brand_tag)

40

In [123]:
brand=[]
for i in brand_tag:
    brand.append(i.text)
len(brand)
brand=brand[:27]
len(brand)

27

In [127]:
data3=pd.DataFrame()
data3['Brand']=brand
data3['Price']=price
data3['Product_detail']=Product_detail
data3['Discount']=discount
#data3

In [126]:
next_btn=driver.find_element_by_xpath('//a[@class="_1LKTO3"]')
next_btn.click()

In [128]:
product_detail=driver.find_elements_by_xpath('//a[@class="IRpwTa"]')
len(product_detail)

27

In [129]:
Product_detail=[]
for i in product_detail:
    Product_detail.append(i.text)
len(Product_detail)

27

In [130]:
price_tag=driver.find_elements_by_xpath('//div[@class="_30jeq3"]')
len(price_tag)

40

In [131]:
price=[]
for i in price_tag:
    price.append(i.text)
len(price)
price=price[:27]
len(price)

27

In [132]:
discount_tag=driver.find_elements_by_xpath('//div[@class="_3Ay6Sb"]')
len(discount_tag)

38

In [133]:
discount=[]
for i in discount_tag:
    discount.append(i.text)
len(discount)
discount=discount[:27]
len(discount)

27

In [134]:
brand_tag=driver.find_elements_by_xpath('//div[@class="_2WkVRV"]')
len(brand_tag)

40

In [135]:
brand=[]
for i in brand_tag:
    brand.append(i.text)
len(brand)
brand=brand[:27]
len(brand)

27

In [136]:
data4=pd.DataFrame()
data4['Brand']=brand
data4['Price']=price
data4['Product_detail']=Product_detail
data4['Discount']=discount
#data4

In [140]:
data=pd.concat([data1,data2,data3,data4],axis=0)
data[:100]

Unnamed: 0,Brand,Price,Product_detail,Discount
0,Advick,₹920,Sneakers For Men,63% off
1,Advick,₹920,Sneakers For Men,63% off
2,Magnolia,₹398,Sneakers For Men,60% off
3,luxury fashion,₹449,Luxury Fashionable casual shoes Sneakers For Men,65% off
4,URBANBOX,₹220,Sneakers For Men,77% off
...,...,...,...,...
1,ESSENCE,₹449,Sneakers For Men,55% off
2,PUMA,"₹2,499",BMW MMS Speedcat M Sneakers For Men,50% off
3,Pro,₹999,Electron Street Speckle Sneakers For Men,56% off
4,PUMA,"₹3,959",Stylish Lace Up Sneakers For Men,42% off


# Q7: Go to the link - https://www.myntra.com/shoes
Set Price filter to “Rs. 7149 to Rs. 14099 ” , Color filter to “Black”,

And then scrape First 100 shoes data you get. The data should include “Brand” of the shoes , Short Shoe
description, price of the shoe 


In [14]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [56]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [57]:
url="https://www.myntra.com/shoes"
driver.get(url)

In [58]:
price_check=driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/main/div[3]/div[1]/section/div/div[5]/ul/li[2]/label/div')
price_check.click()

In [59]:
color_check=driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/main/div[3]/div[1]/section/div/div[6]/ul/li[1]/label/div')
color_check.click()

In [60]:
brand_tag=driver.find_elements_by_xpath('//h3[@class="product-brand"]')
len(brand_tag)

50

In [61]:
brand=[]
for i in brand_tag:
    brand.append(i.text)
brand
len(brand)

50

In [62]:
price_tag=driver.find_elements_by_xpath('//div[@class="product-price"]')
len(price_tag)

50

In [63]:
price=[]
for i in price_tag:
    price.append(i.text)
price
len(price)

50

In [64]:
detail_tag=driver.find_elements_by_xpath('//h4[@class="product-product"]')
len(detail_tag)

50

In [65]:
detail=[]
for i in detail_tag:
    detail.append(i.text)
detail
len(detail)

50

In [66]:
data1=pd.DataFrame()
data1['Brand']=brand
data1['Price']=price
data1['Detail']=detail
data1

Unnamed: 0,Brand,Price,Detail
0,Puma,Rs. 8449Rs. 12999(35% OFF),Men Magnify Nitro Running
1,Puma,Rs. 7149Rs. 12999(45% OFF),Men Running Shoes
2,Puma,Rs. 7499Rs. 9999(25% OFF),Men Electrify Nitro Running
3,Saint G,Rs. 9810Rs. 10900(10% OFF),Men Mid-Top Chelsea Boots
4,Puma,Rs. 8449Rs. 12999(35% OFF),Women Magnify Nitro Shoes
5,RARE RABBIT,Rs. 7999Rs. 9999(20% OFF),Men Leather Flat Boots
6,Cole Haan,Rs. 11999,Men GENERATION ZEROGRAND STITCHLITE
7,Calvin Klein,Rs. 7999,Men Solid Suede Sneakers
8,ASICS,Rs. 9999,Men Colourblocked PU Sneakers
9,Bugatti,Rs. 7999Rs. 9999(20% OFF),Men Running Shoes


In [67]:
next_btn=driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/main/div[3]/div[2]/div/div[2]/section/div[2]/ul/li[12]/a')
next_btn.click()

In [78]:
brand_tag=driver.find_elements_by_xpath('//h3[@class="product-brand"]')
len(brand_tag)

50

In [79]:
brand=[]
for i in brand_tag:
    brand.append(i.text)
brand
len(brand)

50

In [80]:
price_tag=driver.find_elements_by_xpath('//div[@class="product-price"]')
len(price_tag)

50

In [81]:
price=[]
for i in price_tag:
    price.append(i.text)
price
len(price)

50

In [82]:
detail_tag=driver.find_elements_by_xpath('//h4[@class="product-product"]')
len(detail_tag)

50

In [83]:
detail=[]
for i in detail_tag:
    detail.append(i.text)
detail
len(detail)

50

In [85]:
data2=pd.DataFrame()
data2['Brand']=brand
data2['Price']=price
data2['Detail']=detail
#data2

In [86]:
pd.concat([data1,data2],axis=0)

Unnamed: 0,Brand,Price,Detail
0,Puma,Rs. 8449Rs. 12999(35% OFF),Men Magnify Nitro Running
1,Puma,Rs. 7149Rs. 12999(45% OFF),Men Running Shoes
2,Puma,Rs. 7499Rs. 9999(25% OFF),Men Electrify Nitro Running
3,Saint G,Rs. 9810Rs. 10900(10% OFF),Men Mid-Top Chelsea Boots
4,Puma,Rs. 8449Rs. 12999(35% OFF),Women Magnify Nitro Shoes
...,...,...,...
45,Geox,Rs. 12599Rs. 17999(30% OFF),Women Leather High-Top Flat Boots
46,Saint G,Rs. 10350Rs. 11500(10% OFF),Suede Block Heeled Boots
47,Saint G,Rs. 8910Rs. 9900(Rs. 990 OFF),Striped Suede Flatform Heeled Boots
48,Onitsuka Tiger,Rs. 8999,Women Perforations Sneakers


# Q8: Go to webpage https://www.amazon.in/Enter “Laptop” in the search field and then click the search icon.Then set CPU Type filter to “Intel Core i7” and “Intel Core i9” 

After setting the filters scrape first 10 laptops data. You have to scrape 3 attributesfor each laptop:
1. Title
2. Ratings
3. Price


In [43]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [44]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [45]:
url=" https://www.amazon.in/"
driver.get(url)

In [46]:
#finding web element for search bar using id
search_laptop=driver.find_element_by_id("twotabsearchtextbox")
search_laptop

<selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="aff426ba-3a67-43e1-b4d6-f442eec1c451")>

In [47]:
#write on search bar
search_laptop.send_keys("laptop")

In [48]:
#clicking on search icon using absolute xpath
search_btn=driver.find_element_by_xpath('/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[3]/div/span/input')
search_btn.click()

clicking filters by  selecting check boxes using absolute xpath

In [50]:
#scraping laptop for intel i7 CPU type
CPU_check=driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[1]/div[2]/div/div[3]/span/div[1]/span/div/div/div[6]/div/div/div[1]/div/span[1]/a[12]/div/span')
CPU_check.click()

# extracting titles

In [51]:
laptop_tags=driver.find_elements_by_xpath('//span[@class="a-size-medium a-color-base a-text-normal"]')
len(laptop_tags)

25

In [52]:
laptop=[]

for i in laptop_tags:
    laptop.append(i.text)
len(laptop)
laptop=laptop[:10]
laptop

['Acer Nitro 5 AN515-57 Gaming Laptop | Intel Core i7-11800H |NVIDIA GeForce RTX 3050 Ti Laptop Graphics |15.6" FHD 144Hz IPS Display |16GB DDR4 |256GB SSD+1TB HDD |Killer Wi-Fi 6 |RGB Backlit Keyboard',
 'Lenovo ThinkBook 13s Intel 11th Gen Core i7 13.3"(33.78 cm) WQXGA IPS 300 nits 100% sRGB Dolby Vision Thin and Light Laptop (16GB/512GB SSD/Windows 10/MS Office/FPR/Mineral Grey/1.26 Kg), 20V9A05FIH',
 'ASUS VivoBook 14 (2021), 14-inch (35.56 cms) FHD, Intel Core i7-1065G7 10th Gen, Thin and Light Laptop (16GB/512GB SSD/Integrated Graphics/Office 2021/Windows 11/Silver/1.6 Kg), X415JA-EK701WS',
 'ASUS TUF Gaming F15 (2021) 15.6-inch (39.62 cms) FHD 144Hz, Intel Core i7-11800H 11th Gen, RTX 3060 6GB Graphics, Gaming Laptop (16GB/1TB SSD/Windows 10/Gray/2.3 Kg), FX566HM-HN100T',
 'ASUS TUF Dash F15 (2021), 15.6-inch (39.62 cms) FHD 144Hz, Intel Core i7-11370H 11th Gen, RTX 3050 4GB Graphics Gaming Laptop (16GB RAM/512GB SSD/Windows 10/Gray/2 kg), FX516PC-HN063T',
 'HP Pavilion 13, 11th

# extracting ratings


In [53]:
urls=driver.find_elements_by_xpath('//a[@class="a-link-normal s-link-style a-text-normal"]')#collecting urls of all the laptop
UR=[]
urls[:10]

[<selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="c37d7d62-1137-4a33-a56d-4c2435c5ee30")>,
 <selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="bd253eda-96c1-4e33-8767-34f2fe6997fc")>,
 <selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="dd30e8ff-a7f3-4aa8-9f79-180c46a0756d")>,
 <selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="76643b3b-78c0-47a1-957b-88ce883716d4")>,
 <selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="c261f619-85f9-4f8d-a0f4-bd78239cebe7")>,
 <selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="04633b63-8f6b-488a-93b3-074111d70046")>,
 <selenium.webdriver.remote.webelement.WebElement (session="93f477372eec5d561c5749d0739ee19d", element="ec791e53-dea8-4f0c-8086-5a

In [54]:
Ratings = []

for i in urls[:10]:
    UR.append(i.get_attribute('href'))#getting the url of first 10 laptops
for url in UR:#loop for every laptop in the list
    driver.get(url)
    try:                  #exception handling for nosuchelementexception                                                    #click the rating link found
        rating=driver.find_element_by_xpath("//span[@class='a-size-base a-nowrap']//span")#locating the rating
        Ratings.append(rating.text)#appending the ratings in Ratings list
        
    except:
        Ratings.append("NO rating")#appending the No rating if no rating is there


In [55]:
Ratings

['4.6 out of 5',
 '4.3 out of 5',
 'NO rating',
 '4.7 out of 5',
 '4 out of 5',
 '4.6 out of 5',
 '4.6 out of 5',
 '4.2 out of 5',
 '4.6 out of 5',
 '3.9 out of 5']

# extracting price

In [56]:
price_tag=driver.find_elements_by_xpath('//span[@class="a-price-whole"]')
len(price_tag)

24

In [57]:
Price=[]
for i in price_tag:
    Price.append(i.text)
len(Price)
Price=Price[:10]
Price

['89,990',
 '79,990',
 '59,990',
 '1,14,990',
 '82,990',
 '86,990',
 '99,990',
 '57,990',
 '89,990',
 '89,745']

In [58]:
data=pd.DataFrame()
data['Laptop']=laptop
data['Price']=Price
data['RAatings']=Ratings
data

Unnamed: 0,Laptop,Price,RAatings
0,Acer Nitro 5 AN515-57 Gaming Laptop | Intel Co...,89990,4.6 out of 5
1,Lenovo ThinkBook 13s Intel 11th Gen Core i7 13...,79990,4.3 out of 5
2,"ASUS VivoBook 14 (2021), 14-inch (35.56 cms) F...",59990,NO rating
3,ASUS TUF Gaming F15 (2021) 15.6-inch (39.62 cm...,114990,4.7 out of 5
4,"ASUS TUF Dash F15 (2021), 15.6-inch (39.62 cms...",82990,4 out of 5
5,"HP Pavilion 13, 11th Gen Intel Core i7, 13.3-i...",86990,4.6 out of 5
6,LG Gram 17-inches Ultra-Light Intel Evo 11th G...,99990,4.6 out of 5
7,Mi Notebook Horizon Edition 14 Intel Core i7-1...,57990,4.2 out of 5
8,Acer Nitro 5 AN515-57 Gaming Laptop | Intel Co...,89990,4.6 out of 5
9,ASUS ZenBook 14 (2020) Intel Core i7-1165G7 11...,89745,3.9 out of 5


# Q9: Write a python program to scrape data for first 10 job results for Data Scientist Designation in Noidalocation. You have to scrape company name, No. of days ago when job was posted, Rating of the company.
This task will be done in following steps:
    
    1. First get the webpage https://www.ambitionbox.com/
2. Click on the Job option
3. After reaching to the next webpage, In place of “Search by Designations, Companies, Skills” enter
“Data Scientist” and click on search button
4. You will reach to the following web page click on location and in place of “Search location” enter
“Noida” and select location “Noida”.
5. Then scrape the data for the first 10 jobs results you get on the above shown page.
6. Finally create a dataframe of the scraped data.
Note: All the steps required during scraping should be done through code only and not manually

In [1]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [3]:
url=" https://www.ambitionbox.com/"
driver.get(url)

In [4]:
#finding web element for job using xpath
job_btn=driver.find_element_by_xpath('/html/body/div[1]/nav/nav/a[6]')
job_btn

<selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="440fd4a5-6ef1-48c2-ac4d-39827e67e9e0")>

In [5]:
job_btn.click()

In [6]:
#finding web element for search bar using xpath
search_job=driver.find_element_by_xpath('/html/body/div/div/div/div[2]/div[1]/div/div/div/div/span/input')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="1505b2a1-8f5f-4026-ac8f-e37552ea687b")>

In [7]:
#write on search job
search_job.send_keys("Data Scientist")

In [8]:
#clicking on search icon using absolute xpath
search_btn=driver.find_element_by_xpath('/html/body/div/div/div/div[2]/div[1]/div/div/div/button/span')
search_btn.click()

In [12]:
location_btn=driver.find_element_by_xpath('/html/body/div/div/div/div[2]/div[2]/div[1]/div/div/div/div[2]/div[1]/p')
location_btn.click()

In [13]:
location_check=driver.find_element_by_xpath('/html/body/div/div/div/div[2]/div[2]/div[1]/div/div/div/div[2]/div[2]/div/div[3]/div[1]/div[5]/div/label')
location_check.click()

In [14]:
job_tags=driver.find_elements_by_xpath('//a[@class="title noclick"]')
len(job_tags)

10

In [15]:
jobs=[]
for i in job_tags:
    jobs.append(i.text)
len(jobs)
jobs

['Manager Data Scientist',
 'Vice President - Data Science',
 'Data Scientist',
 'CogniTensor - Data Scientist (2-5 yrs)',
 'CogniTensor - Data Scientist (2-5 yrs)',
 'Data Scientist - Data Science/Model Development (0-6 yrs)',
 'Manager - Data Scientist - Retail/BFSI (8-15 yrs)',
 'Data Scientist - Consulting Firm (8-15 yrs)',
 'RMS Risk Management - Data Scientist - CNN/Deep Learning (2-8 yrs)',
 'Data Science - Senior Data Scientist']

In [16]:
company_tag=driver.find_elements_by_xpath('//p[@class="company body-medium"]')
len(company_tag)

10

In [17]:
company_name=[]
for i in company_tag:
    company_name.append(i.text)
len(company_name)
company_name

['Ameriprise Financial',
 'Paytm',
 'Jubilant Foodworks Limited',
 'CHT Sapiense',
 'CHT Sapiense',
 'GI Group',
 'GI Group',
 'GI Group',
 'RMS Risk Management Solutions',
 'Paytm']

In [18]:
day_tag=driver.find_elements_by_xpath('//span[@class="body-small-l"]')
len(day_tag)

20

In [19]:
days=[]
for i in day_tag:
    days.append(i.text)
len(days)
days=days[0::2]
days

['2d ago',
 '8d ago',
 '25d ago',
 '9d ago',
 '10d ago',
 '21d ago',
 '21d ago',
 '21d ago',
 '3d ago',
 '28d ago']

In [20]:
#scrap ratings
urls=driver.find_elements_by_xpath('//span[@class="body-small"]')#collecting urls of all the laptop
UR=[]
urls[:10]

[<selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="3b158884-a7b1-4e05-b325-c8372e3374c9")>,
 <selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="cdd9a52b-cd91-42a0-90a1-16a32d94a0e7")>,
 <selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="a625e626-d9f5-45d4-9ff1-819a260e986a")>,
 <selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="a6bd2b99-b1b3-4baf-a785-6d1a39090e3c")>,
 <selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="e7349b99-3080-4789-807d-ec60ac4e324b")>,
 <selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="fc74a21c-025a-41db-80e9-b5498fc0935c")>,
 <selenium.webdriver.remote.webelement.WebElement (session="46fd5d3cf8e74bc07e725d2710a0949d", element="2460a300-50fd-43c2-8a7c-ba

In [24]:
Ratings = []

for i in urls[:10]:
    Ratings.append(i.text)#getting the url of first 10 laptops
Ratings=Ratings[:10]
Ratings

['4.0', '3.7', '3.9', '3.7', '3.7', '4.1', '4.1', '4.1', '3.5', '3.7']

In [25]:
data=pd.DataFrame()
data['Jobs']=jobs
data['Company_name']=company_name
data['Time']=days
data['Ratings']=Ratings
data

Unnamed: 0,Jobs,Company_name,Time,Ratings
0,Manager Data Scientist,Ameriprise Financial,2d ago,4.0
1,Vice President - Data Science,Paytm,8d ago,3.7
2,Data Scientist,Jubilant Foodworks Limited,25d ago,3.9
3,CogniTensor - Data Scientist (2-5 yrs),CHT Sapiense,9d ago,3.7
4,CogniTensor - Data Scientist (2-5 yrs),CHT Sapiense,10d ago,3.7
5,Data Scientist - Data Science/Model Developmen...,GI Group,21d ago,4.1
6,Manager - Data Scientist - Retail/BFSI (8-15 yrs),GI Group,21d ago,4.1
7,Data Scientist - Consulting Firm (8-15 yrs),GI Group,21d ago,4.1
8,RMS Risk Management - Data Scientist - CNN/Dee...,RMS Risk Management Solutions,3d ago,3.5
9,Data Science - Senior Data Scientist,Paytm,28d ago,3.7


# Q10: Write a python program to scrape the salary data for Data Scientist designation.
You have to scrape Company name, Number of salaries, Average salary, Minsalary, Max Salary.
The above task will be, done as shown in the below steps:
1. First get the webpage https://www.ambitionbox.com
2. Click on the salaries option
3. After reaching to the following webpage, In place of “Search Job Profile” enters “Data Scientist” andthen click on “Data Scientist”.
4. Scrape the data for the first 10 companies. Scrape the company name, total salary record, averagesalary, minimum salary, maximum salary, experience required.
5. Store the data in a dataframe.

Note: All the steps required during scraping should be done through code only and not manually



In [133]:
#import all required libraries
import selenium
from selenium import webdriver
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [134]:
#let's first connect to web driver
driver=webdriver.Chrome(r"C:\Users\ankus\Downloads\chromedriver_win32\chromedriver.exe") 

In [135]:
url=" https://www.ambitionbox.com/"
driver.get(url)

In [136]:
#finding web element for salary using xpath
salary_btn=driver.find_element_by_xpath('/html/body/div[1]/nav/nav/a[4]')
salary_btn.click()

In [140]:
#finding web element for search bar using xpath
search_job=driver.find_element_by_xpath('/html/body/div/div/div/main/section[1]/div[2]/div[1]/span/input')
search_job

<selenium.webdriver.remote.webelement.WebElement (session="d897f2278c624cd654285ea0fd3b290c", element="90c20ab6-848e-4d4e-8603-baf3633e0467")>

In [141]:
#write on search job
search_job.send_keys("Data Scientist")

In [142]:
search_btn=driver.find_element_by_xpath('/html/body/div/div/div/main/section[1]/div[2]/div[1]/i[1]')
search_btn.click()

In [143]:
company_tag=driver.find_elements_by_xpath('//div[@class="name"]')
len(company_tag)

10

In [149]:
company=[]
for i in company_tag:
    company.append(i.text.replace('\n'," "))
len(company)
#company

['Tekion based on 10 salaries',
 'Microsoft Corporation based on 224 salaries',
 'Goldman Sachs based on 11 salaries',
 'Flipkart based on 54 salaries',
 'Amazon based on 85 salaries',
 'Arcesium based on 37 salaries',
 'PayPal based on 12 salaries',
 'Servicenow Software Development India based on 27 salaries',
 'Walmart based on 79 salaries',
 'ServiceNow based on 16 salaries']

In [151]:
average_salary=driver.find_elements_by_xpath('//p[@class="averageCtc"]')
len(average_salary)

10

In [153]:
Average_Salary=[]
for i in average_salary:
    Average_Salary.append(i.text)
len(Average_Salary)
#Average_Salary

10

In [155]:
minimum_salary=driver.find_elements_by_xpath('//div[@class="value body-medium"]')
len(minimum_salary)

20

In [167]:
Minimum_Salary=[]
for i in minimum_salary:
    Minimum_Salary.append(i.text)
len(Minimum_Salary)
Minimum_Salary=Minimum_Salary[0::2]
Minimum_Salary

['₹ 14.0L',
 '₹ 12.0L',
 '₹ 16.0L',
 '₹ 7.0L',
 '₹ 8.0L',
 '₹ 12.0L',
 '₹ 12.0L',
 '₹ 12.0L',
 '₹ 10.0L',
 '₹ 11.2L']

In [163]:
maximum_salary=driver.find_elements_by_xpath('//div[@class="value body-medium"]')
len(maximum_salary)

20

In [168]:
Maximum_Salary=[]
for i in maximum_salary:
    Maximum_Salary.append(i.text)
len(Maximum_Salary)
Maximum_Salary=Maximum_Salary[1::2]
Maximum_Salary

['₹ 32.0L',
 '₹ 45.0L',
 '₹ 30.0L',
 '₹ 30.0L',
 '₹ 41.5L',
 '₹ 30.0L',
 '₹ 25.0L',
 '₹ 25.0L',
 '₹ 32.0L',
 '₹ 23.0L']

In [169]:
experience_tag=driver.find_elements_by_xpath('//div[@class="salaries sbold-list-header"]')
len(experience_tag)

10

In [171]:
experience=[]
for i in experience_tag:
    experience.append(i.text.replace('\n'," "))
len(experience)
experience

['Software Engineer  .  4 yrs exp',
 'Software Engineer  .  1-4 yrs exp',
 'Software Engineer  .  2 yrs exp',
 'Software Engineer  .  1-4 yrs exp',
 'Software Engineer  .  1-4 yrs exp',
 'Software Engineer  .  1 yr exp',
 'Software Engineer  .  1 yr exp',
 'Software Engineer  .  2-3 yrs exp',
 'Software Engineer  .  1-4 yrs exp',
 'Software Engineer  .  3 yrs exp']

In [172]:
data=pd.DataFrame()
data['Company']=company
data['Experience']=experience
data['Average_Salary']=Average_Salary
data['Minimun_Salary']=Minimun_Salary
data['Maximum_Salary']=Maximum_Salary
data

Unnamed: 0,Company,Experience,Average_Salary,Minimun_Salary,Maximum_Salary
0,Tekion based on 10 salaries,Software Engineer . 4 yrs exp,₹ 23.3L,₹ 14.0L,₹ 32.0L
1,Microsoft Corporation based on 224 salaries,Software Engineer . 1-4 yrs exp,₹ 22.4L,₹ 32.0L,₹ 45.0L
2,Goldman Sachs based on 11 salaries,Software Engineer . 2 yrs exp,₹ 21.9L,₹ 12.0L,₹ 30.0L
3,Flipkart based on 54 salaries,Software Engineer . 1-4 yrs exp,₹ 20.6L,₹ 45.0L,₹ 30.0L
4,Amazon based on 85 salaries,Software Engineer . 1-4 yrs exp,₹ 19.4L,₹ 16.0L,₹ 41.5L
5,Arcesium based on 37 salaries,Software Engineer . 1 yr exp,₹ 18.5L,₹ 30.0L,₹ 30.0L
6,PayPal based on 12 salaries,Software Engineer . 1 yr exp,₹ 18.4L,₹ 7.0L,₹ 25.0L
7,Servicenow Software Development India based on...,Software Engineer . 2-3 yrs exp,₹ 18.3L,₹ 30.0L,₹ 25.0L
8,Walmart based on 79 salaries,Software Engineer . 1-4 yrs exp,₹ 17.9L,₹ 8.0L,₹ 32.0L
9,ServiceNow based on 16 salaries,Software Engineer . 3 yrs exp,₹ 17.8L,₹ 41.5L,₹ 23.0L
