Skip to content

Commit 31de4f7

Browse files
committed
scraping implemented
1 parent cd99401 commit 31de4f7

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

Udemy Scraper/fetcher.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
from selenium import webdriver
4+
from selenium.webdriver.common.keys import Keys
5+
import time
6+
7+
# Get chrome driver path
8+
driver_path = 'C:\Webdrivers\chromedriver'
9+
10+
# Get input for course category to scrape
11+
category = input("Enter course category:")
12+
13+
url = 'https://www.udemy.com/courses/search/?src=ukw&q={}'.format(category)
14+
15+
# initiating the webdriver. Parameter includes the path of the webdriver.
16+
driver = webdriver.Chrome(driver_path)
17+
driver.get(url)
18+
19+
# this is just to ensure that the page is loaded
20+
time.sleep(5)
21+
html = driver.page_source
22+
23+
# Now apply bs4 to html variable
24+
soup = BeautifulSoup(html, "html.parser")
25+
job_divs = soup.find_all("div", {"class": "course-card--container--3w8Zm course-card--large--1BVxY"})
26+
27+
for job_div in job_divs:
28+
title = job_div.find("div",{"class":"udlite-focus-visible-target udlite-heading-md course-card--course-title--2f7tE"}).text.strip()
29+
description = job_div.find("p",{"class":"udlite-text-sm course-card--course-headline--yIrRk"}).text.strip()
30+
instructor = job_div.find("div",{"class":"udlite-text-xs course-card--instructor-list--lIA4f"}).text.strip()
31+
32+
current_price = job_div.find("div",{"class":"price-text--price-part--Tu6MH course-card--discount-price--3TaBk udlite-heading-md"}).text.strip()
33+
current_price = current_price.replace("Current price₹","")
34+
35+
original_price = job_div.find("div",{"class":"price-text--price-part--Tu6MH price-text--original-price--2e-F5 course-card--list-price--2AO6G udlite-text-sm"}).text.strip()
36+
original_price = original_price.replace("Original Price₹","")
37+
38+
rating = job_div.find("span",{"class":"udlite-heading-sm star-rating--rating-number--3lVe8"}).text.strip()
39+
40+
hours = job_div.find_all("span",{"class":"course-card--row--1OMjg"})[0].text.strip().split()[0]
41+
42+
lectures = job_div.find_all("span",{"class":"course-card--row--1OMjg"})[1].text.strip().split()[0]
43+
44+
45+
driver.close() # closing the webdriver

0 commit comments

Comments
 (0)