Merge pull request #44 from tauseefmohammed2/COVID-Scraper

Web Scraper to Extract COVID-19 Information as per (Issue #31)
Python-World · Oct 1, 2020 · ba41abc · ba41abc
2 parents 8bd9148 + aa0e017
commit ba41abc
Show file tree

Hide file tree

Showing 3 changed files with 95 additions and 0 deletions.
diff --git a/Scripts/Web_Scrappers/COVID-19_Scraper/COVIDWebScraper.py b/Scripts/Web_Scrappers/COVID-19_Scraper/COVIDWebScraper.py
@@ -0,0 +1,75 @@
+# Web Scraping COVID-19 Data for Top 10 Countries Affected (Issue #31)
+# https://github.com/Python-World/Python_and_the_Web/issues/31
+# Contributed by @tauseefmohammed2 : https://github.com/tauseefmohammed2
+
+# Requirements : 
+# Selenium (Web Scrapping Python Library. Install : pip install selenium)
+# ChromeDriver (Used for Automated Navigation to URLs, which are Provided by Selenium as Input. Download : https://chromedriver.chromium.org/downloads)
+# Pandas (Data Manipulation Library. Install : pip install pandas)
+
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+import pandas
+import datetime, time, os
+
+td = datetime.date.today()
+wait_imp = 10
+CO = webdriver.ChromeOptions()
+CO.add_experimental_option('useAutomationExtension', False)
+CO.add_argument('--ignore-certificate-errors')
+CO.add_argument('--headless')
+
+# Creating WebDriver Object
+wd = webdriver.Chrome(r'C:\\Users\\TEMP\\Downloads\\chromedriver_win32 (1)\\chromedriver.exe',options=CO)
+# Replace the Above Location of chromedriver.exe with your Saved Location 
+
+print ("Date:",td.strftime("%b-%d-%Y"))
+print ("--------------------------------------------------------------------------------------------")
+print ("               COVID-19 Statistics From Around the World (Top 10 Countries)                 ")
+print ("--------------------------------------------------------------------------------------------")
+
+# Using get() method to Open a URL (WHO)
+wd.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019")
+wd.implicitly_wait(wait_imp)
+w_total = wd.find_element_by_id("confirmedCases")
+w_death = wd.find_element_by_id("confirmedDeaths")
+total_c = wd.find_element_by_id("involvedCountries")
+print("WorldWide")
+print("Total Cases : ", w_total.text)
+print("Total Deaths : ", w_death.text)
+print("-------------------------------------------------------")
+
+# Using get() method to Open a URL (Worldometers)
+wd.get("https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/")
+
+# Creating Empty Lists to Store Information which will be Retrieved
+country_list = []
+cases_list = []
+deaths_list = []
+continent_list = []
+
+table =  wd.find_element_by_id("table3")
+count = 0
+for row in table.find_elements_by_xpath(".//tr"):
+    if count == 0:
+        count += 1
+        continue
+    lst = [td.text for td in row.find_elements_by_xpath(".//td")]
+    country_list.append(lst[0])
+    cases_list.append(lst[1])
+    deaths_list.append(lst[2])
+    continent_list.append(lst[3])
+    if count < 11 :
+        print("Country : ", lst[0])
+        print("Total Cases : ", lst[1])
+        print("Total Deaths : ", lst[2])
+        print("-------------------------------------------------------")
+    count += 1
+
+# Closing Chrome After Extraction of Data
+wd.quit()
+
+# Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected
+df = pandas.DataFrame(data={"Country": country_list, "Total Cases": cases_list, "Total Deaths": deaths_list, "Continent": continent_list})
+# Using to_csv() Function which Dumps the Data from the DataFrame to a CSV File
+df.to_csv("./data.csv", sep=',',index=False)
diff --git a/Scripts/Web_Scrappers/COVID-19_Scraper/README.md b/Scripts/Web_Scrappers/COVID-19_Scraper/README.md
@@ -0,0 +1,18 @@
+# COVID-19 Information Scraper
+<!--Remove the below lines and add yours -->
+Python Web Scraper which Extracts Information Retaled to COVID-19(Number of Cases, Number of Deaths) for Top 10 Affected Countries
+
+### Prerequisites
+<!--Remove the below lines and add yours -->
+Selenium (Web Scrapping Python Library)
+ChromeDriver (Used for Automated Navigation to URLs, which are Provided by Selenium as Input. Download : https://chromedriver.chromium.org/downloads)
+Pandas (Data Manipulation Library)
+
+### How to run the script
+<!--Remove the below lines and add yours -->
+It is a single python file. Simply run COVIDWebScraper.py
+
+## *Author Name*
+<!--Remove the below lines and add yours -->
+@tauseefmohammed2
+https://github.com/tauseefmohammed2
diff --git a/Scripts/Web_Scrappers/COVID-19_Scraper/requirements.txt b/Scripts/Web_Scrappers/COVID-19_Scraper/requirements.txt
@@ -0,0 +1,2 @@
+selenium==3.141.0
+pandas==1.0.1