Skip to content
This repository was archived by the owner on Dec 22, 2023. It is now read-only.

Commit 34372c0

Browse files
authored
Merge pull request #442 from Anshul275/Anshul275
DevFest India 2020 Schedule
2 parents b869962 + 06654e5 commit 34372c0

File tree

5 files changed

+94
-0
lines changed

5 files changed

+94
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<h1 align=center>DevFest India 2020 Schedule</h1>
2+
3+
Scrapes all the event schedules of DevFest India 2020 and stores them in a csv file
4+
5+
## *Author Name*
6+
[Anshul Pandey](https://github.com/Anshul275)
7+
8+
## Pre-Requisites
9+
10+
Run The Command `pip install -r requirements.txt`
11+
12+
### To install the chrome web-driver:
13+
`1.` Check the chrome-version you are currently using `chrome://settings/help`
14+
`2.` Download the desired Chrome web-driver for your version `https://chromedriver.chromium.org/downloads` and extract the zip file
15+
16+
`IMP` - Add the full path of the `chromedriver.exe` file in `driver_path` variable of `devfest_schedule.py` file
17+
18+
## To Run the File
19+
20+
For Windows - `python devfest_schedule.py`
21+
22+
For Ubuntu/Linux - ` ./devfest_schedule.py`
23+
24+
## Screenshots -
25+
26+
### Working Screenshot
27+
28+
![Screenshot](working.png)
29+
30+
### Generated DevFest Schedule csv file
31+
32+
![Screenshot](schedule.png)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import csv
2+
from selenium import webdriver
3+
from bs4 import BeautifulSoup
4+
5+
# Method to scrape and store DevFest Schedule in csv file
6+
def devfest_schedule():
7+
url = "https://devfestindia.com/schedule"
8+
9+
# Running the driver in headless mode
10+
options = webdriver.ChromeOptions()
11+
options.add_argument("headless")
12+
13+
# Change the driver_path to where your chrome driver is installed
14+
driver_path = '/Users/pc/Desktop/Rough/DevFest_India_2020_Schedule/chromedriver/chromedriver.exe'
15+
driver = webdriver.Chrome(executable_path = driver_path, options=options)
16+
17+
# Requesting the desired webpage through selenium Chrome driver
18+
driver.get(url)
19+
select_page_2 = "/html/body/div/div/div[3]/main/div/div[1]/div/div/header/div[2]/div/div/div[2]/div/a[2]"
20+
select_page_3 = "/html/body/div/div/div[3]/main/div/div[1]/div/div/header/div[2]/div/div/div[2]/div/a[3]"
21+
driver.find_element_by_xpath(select_page_2).click()
22+
driver.find_element_by_xpath(select_page_3).click()
23+
24+
# Storing the entire devfest schedule webpage in html variable
25+
html = driver.page_source
26+
driver.quit()
27+
28+
soup = BeautifulSoup(html, "lxml")
29+
30+
day_wise_schedule = soup.find_all("div", attrs={"class": "v-window-item"})
31+
32+
with open("devfest_schedule.csv", "w") as csv_file:
33+
writer = csv.writer(csv_file)
34+
35+
# Initializing the first row with the column title
36+
writer.writerow(["Name of Event", "Date", "Timings", "Tag", "Author"])
37+
38+
starting_date = 16
39+
for schedule in day_wise_schedule:
40+
events = schedule.find_all("div", attrs={"class": "row pa-0 my-0 align-center justify-center row-border-white"})
41+
for event in events:
42+
event_details = event.find("div", attrs={"class": "py-3 ma-1 fill-height"})
43+
event_timings = event.find("div", attrs={"class": "text-right my-0 py-0 col-md-2 col-3"}).find_all("p")
44+
45+
event_name = event_details.find("p").text
46+
event_date = "October "+str(starting_date)
47+
event_time = event_timings[0].text.replace(" ","") + "-" + event_timings[1].text + " " + event_timings[2].text.replace(" ","")
48+
event_tag = event_details.find("span", attrs={"class": "mt-2 mr-2 v-chip v-chip--label v-chip--no-color theme--light v-size--small"}).text
49+
authors = event_details.find_all("span", attrs={"class": "mt-2 mr-2 v-chip v-chip--label v-chip--no-color v-chip--outlined theme--light v-size--small"})
50+
event_authors = ""
51+
for author in authors:
52+
event_authors = event_authors + author.text.replace(" ","") + " "
53+
54+
#Adding each event to csv file
55+
writer.writerow([event_name, event_date, event_time, event_tag, event_authors])
56+
57+
starting_date = starting_date + 1
58+
59+
if __name__ == "__main__":
60+
# Scraping the DevFest India 2020 Schedule and storing it in csv file
61+
devfest_schedule()
62+
print("devfest_schedule.csv file has been generated")
106 Bytes
Binary file not shown.
83.7 KB
Loading
19.4 KB
Loading

0 commit comments

Comments
 (0)