1+ import csv
2+ from selenium import webdriver
3+ from bs4 import BeautifulSoup
4+
5+ # Method to scrape and store DevFest Schedule in csv file
6+ def devfest_schedule ():
7+ url = "https://devfestindia.com/schedule"
8+
9+ # Running the driver in headless mode
10+ options = webdriver .ChromeOptions ()
11+ options .add_argument ("headless" )
12+
13+ # Change the driver_path to where your chrome driver is installed
14+ driver_path = '/Users/pc/Desktop/Rough/DevFest_India_2020_Schedule/chromedriver/chromedriver.exe'
15+ driver = webdriver .Chrome (executable_path = driver_path , options = options )
16+
17+ # Requesting the desired webpage through selenium Chrome driver
18+ driver .get (url )
19+ select_page_2 = "/html/body/div/div/div[3]/main/div/div[1]/div/div/header/div[2]/div/div/div[2]/div/a[2]"
20+ select_page_3 = "/html/body/div/div/div[3]/main/div/div[1]/div/div/header/div[2]/div/div/div[2]/div/a[3]"
21+ driver .find_element_by_xpath (select_page_2 ).click ()
22+ driver .find_element_by_xpath (select_page_3 ).click ()
23+
24+ # Storing the entire devfest schedule webpage in html variable
25+ html = driver .page_source
26+ driver .quit ()
27+
28+ soup = BeautifulSoup (html , "lxml" )
29+
30+ day_wise_schedule = soup .find_all ("div" , attrs = {"class" : "v-window-item" })
31+
32+ with open ("devfest_schedule.csv" , "w" ) as csv_file :
33+ writer = csv .writer (csv_file )
34+
35+ # Initializing the first row with the column title
36+ writer .writerow (["Name of Event" , "Date" , "Timings" , "Tag" , "Author" ])
37+
38+ starting_date = 16
39+ for schedule in day_wise_schedule :
40+ events = schedule .find_all ("div" , attrs = {"class" : "row pa-0 my-0 align-center justify-center row-border-white" })
41+ for event in events :
42+ event_details = event .find ("div" , attrs = {"class" : "py-3 ma-1 fill-height" })
43+ event_timings = event .find ("div" , attrs = {"class" : "text-right my-0 py-0 col-md-2 col-3" }).find_all ("p" )
44+
45+ event_name = event_details .find ("p" ).text
46+ event_date = "October " + str (starting_date )
47+ event_time = event_timings [0 ].text .replace (" " ,"" ) + "-" + event_timings [1 ].text + " " + event_timings [2 ].text .replace (" " ,"" )
48+ event_tag = event_details .find ("span" , attrs = {"class" : "mt-2 mr-2 v-chip v-chip--label v-chip--no-color theme--light v-size--small" }).text
49+ authors = event_details .find_all ("span" , attrs = {"class" : "mt-2 mr-2 v-chip v-chip--label v-chip--no-color v-chip--outlined theme--light v-size--small" })
50+ event_authors = ""
51+ for author in authors :
52+ event_authors = event_authors + author .text .replace (" " ,"" ) + " "
53+
54+ #Adding each event to csv file
55+ writer .writerow ([event_name , event_date , event_time , event_tag , event_authors ])
56+
57+ starting_date = starting_date + 1
58+
59+ if __name__ == "__main__" :
60+ # Scraping the DevFest India 2020 Schedule and storing it in csv file
61+ devfest_schedule ()
62+ print ("devfest_schedule.csv file has been generated" )
0 commit comments