## Web scraping data about CoronaVirus cases

### Import required libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import lxml.html as lh
import pandas as pd
from time import sleep
from selenium import webdriver

### Getting the page

In [5]:
url='https://www.worldometers.info/coronavirus/'
page=requests.get(url).content
soup=BeautifulSoup(page,'html.parser')
title=soup.title.text
# print(title)
print(title[:25])

Coronavirus Update (Live)


### Taking the total number of cases from the page

In [6]:
ls=soup.find_all('div',class_='col-md-6',limit=2)
totActive=ls[0].find('div',class_='number-table-main').text
totClosed=ls[1].find('div',class_='number-table-main').text
print("Total active Cases = "+totActive+"\nTotal closed Cases = "+totClosed)

Total active Cases = 1,062,464
Total closed Cases = 393,491


### Webscraping table into df 

In [7]:
#Store the contents of the website under doc
doc = lh.fromstring(page)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')
#Check the length of the first 12 rows

tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    # print ('%d:"%s"'%(i,name))
    col.append((name,[]))
# print(col)
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):     

    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 12, the //tr data is not from our table 
    if len(T)!=12:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

Dict={title:column for (title,column) in col}
# df=pd.DataFrame(Dict)
df1=pd.DataFrame(Dict)

# print("Dim of df = " ,end='')
df1=df1.iloc[:,[0,1,3,6,7]]
# print()
print(df1.shape)
print()
print(df1.head(5))

(427, 5)

  Country,Other TotalCases TotalDeaths ActiveCases Serious,Critical
0         World  1,455,955      83,666   1,062,464           48,074
1           USA    404,156     12,988      369,353            9,220
2         Spain    146,690     14,555       84,114            7,069
3         Italy    135,586     17,127       94,067            3,792
4       Germany    109,329      2,096       71,152            4,895


### Cleaning numeric data 

In [8]:
cols=[]
for i in range(df1.shape[0]):
    cols.append(i)
df1.iloc[0:300,:] = df1.iloc[0:300,:].replace({',': ''}, regex=True)
df2=df1[0:201]
df2=df2.astype({'TotalCases': 'int64'})

### Sort the values wrt to the Total Cases

In [9]:
sortedDF=df2.sort_values('TotalCases', ascending=False)
sortedDF.head()

Unnamed: 0,"Country,Other",TotalCases,TotalDeaths,ActiveCases,"Serious,Critical"
0,World,1455955,83666,1062464,48074
1,USA,404156,12988,369353,9220
2,Spain,146690,14555,84114,7069
3,Italy,135586,17127,94067,3792
4,Germany,109329,2096,71152,4895


### Format the message to be sent

In [33]:
msg="COVID-19 CORONAVIRUS PANDEMIC UPDATE"

msg=msg +"\nSource: https://www.worldometers.info/coronavirus/"
msg=msg +"\n \nTotal Active Cases= " + str(totActive)

msg=msg +"\n \nTotal Closed Cases= " + str(totClosed)

msg=msg +"\n \n \nCountry Wise: Top 5" 

msg+="\n------------------------"

for i in range(5):
    msg=msg + "\nCountry: "
    msg=msg + str(sortedDF.iloc[i,0])
    msg=msg +"\n"
    msg=msg + "Total cases : "
    msg=msg + str(sortedDF.iloc[i,1])
    msg=msg +"\n"
    msg=msg + "Total deaths: "
    msg=msg + str(sortedDF.iloc[i,2])
    msg=msg +"\n"
    msg=msg + "Active cases: "
    msg=msg + str(sortedDF.iloc[i,3])
    msg=msg +"\n"
    msg=msg + "Critical cases: "
    msg=msg + str(sortedDF.iloc[i,4])
    msg+="\n------------------------"
    

print(msg[:320])

COVID-19 CORONAVIRUS PANDEMIC UPDATE
Source: https://www.worldometers.info/coronavirus/
 
Total Active Cases= 1,062,464
 
Total Closed Cases= 393,491
 
 
Country Wise: Top 5
------------------------
Country: World
Total cases : 1455955
Total deaths: 83666
Active cases: 1062464
Critical cases: 48074
--------------------


In [37]:
###### Code to send "msg" in Whatsapp using Selenium WebDriver ########

driver= webdriver.Chrome()
url='https://web.whatsapp.com/'
driver.get(url)

input('Enter after Scanning QR')
name='Saved'
user = driver.find_element_by_xpath('//span[@title = "{}"]'.format(name))
user.click()
msg_box= driver.find_element_by_class_name('_1Plpp')
msg_box.send_keys(msg)
button = driver.find_element_by_class_name('_35EW6')
button.click()

print("------Message SENT------")

Enter after Scanning QR
------Message SENT------
