This repository has been archived by the owner on Dec 22, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 265
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #44 from tauseefmohammed2/COVID-Scraper
Web Scraper to Extract COVID-19 Information as per (Issue #31)
- Loading branch information
Showing
3 changed files
with
95 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# Web Scraping COVID-19 Data for Top 10 Countries Affected (Issue #31) | ||
# https://github.com/Python-World/Python_and_the_Web/issues/31 | ||
# Contributed by @tauseefmohammed2 : https://github.com/tauseefmohammed2 | ||
|
||
# Requirements : | ||
# Selenium (Web Scrapping Python Library. Install : pip install selenium) | ||
# ChromeDriver (Used for Automated Navigation to URLs, which are Provided by Selenium as Input. Download : https://chromedriver.chromium.org/downloads) | ||
# Pandas (Data Manipulation Library. Install : pip install pandas) | ||
|
||
from selenium import webdriver | ||
from selenium.webdriver.common.keys import Keys | ||
import pandas | ||
import datetime, time, os | ||
|
||
td = datetime.date.today() | ||
wait_imp = 10 | ||
CO = webdriver.ChromeOptions() | ||
CO.add_experimental_option('useAutomationExtension', False) | ||
CO.add_argument('--ignore-certificate-errors') | ||
CO.add_argument('--headless') | ||
|
||
# Creating WebDriver Object | ||
wd = webdriver.Chrome(r'C:\\Users\\TEMP\\Downloads\\chromedriver_win32 (1)\\chromedriver.exe',options=CO) | ||
# Replace the Above Location of chromedriver.exe with your Saved Location | ||
|
||
print ("Date:",td.strftime("%b-%d-%Y")) | ||
print ("--------------------------------------------------------------------------------------------") | ||
print (" COVID-19 Statistics From Around the World (Top 10 Countries) ") | ||
print ("--------------------------------------------------------------------------------------------") | ||
|
||
# Using get() method to Open a URL (WHO) | ||
wd.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019") | ||
wd.implicitly_wait(wait_imp) | ||
w_total = wd.find_element_by_id("confirmedCases") | ||
w_death = wd.find_element_by_id("confirmedDeaths") | ||
total_c = wd.find_element_by_id("involvedCountries") | ||
print("WorldWide") | ||
print("Total Cases : ", w_total.text) | ||
print("Total Deaths : ", w_death.text) | ||
print("-------------------------------------------------------") | ||
|
||
# Using get() method to Open a URL (Worldometers) | ||
wd.get("https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/") | ||
|
||
# Creating Empty Lists to Store Information which will be Retrieved | ||
country_list = [] | ||
cases_list = [] | ||
deaths_list = [] | ||
continent_list = [] | ||
|
||
table = wd.find_element_by_id("table3") | ||
count = 0 | ||
for row in table.find_elements_by_xpath(".//tr"): | ||
if count == 0: | ||
count += 1 | ||
continue | ||
lst = [td.text for td in row.find_elements_by_xpath(".//td")] | ||
country_list.append(lst[0]) | ||
cases_list.append(lst[1]) | ||
deaths_list.append(lst[2]) | ||
continent_list.append(lst[3]) | ||
if count < 11 : | ||
print("Country : ", lst[0]) | ||
print("Total Cases : ", lst[1]) | ||
print("Total Deaths : ", lst[2]) | ||
print("-------------------------------------------------------") | ||
count += 1 | ||
|
||
# Closing Chrome After Extraction of Data | ||
wd.quit() | ||
|
||
# Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected | ||
df = pandas.DataFrame(data={"Country": country_list, "Total Cases": cases_list, "Total Deaths": deaths_list, "Continent": continent_list}) | ||
# Using to_csv() Function which Dumps the Data from the DataFrame to a CSV File | ||
df.to_csv("./data.csv", sep=',',index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# COVID-19 Information Scraper | ||
<!--Remove the below lines and add yours --> | ||
Python Web Scraper which Extracts Information Retaled to COVID-19(Number of Cases, Number of Deaths) for Top 10 Affected Countries | ||
|
||
### Prerequisites | ||
<!--Remove the below lines and add yours --> | ||
Selenium (Web Scrapping Python Library) | ||
ChromeDriver (Used for Automated Navigation to URLs, which are Provided by Selenium as Input. Download : https://chromedriver.chromium.org/downloads) | ||
Pandas (Data Manipulation Library) | ||
|
||
### How to run the script | ||
<!--Remove the below lines and add yours --> | ||
It is a single python file. Simply run COVIDWebScraper.py | ||
|
||
## *Author Name* | ||
<!--Remove the below lines and add yours --> | ||
@tauseefmohammed2 | ||
https://github.com/tauseefmohammed2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
selenium==3.141.0 | ||
pandas==1.0.1 |