In [1]:
#import libraries

import requests
from bs4 import BeautifulSoup
import glob
from datetime import datetime
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import calmap
%matplotlib inline

In [2]:
# Ministry of Health and Family Welfare ulr
URL = 'https://www.mohfw.gov.in'
page = requests.get(URL)

soup = BeautifulSoup(page.content, "html.parser")

html_thead = soup.find_all('thead')[-1]
html_th = [tr for tr in html_thead.find_all('tr')]

html_tbody = soup.find_all('tbody')[-1]
html_text = [tr for tr in html_tbody.find_all('tr')]

headings = []

for tr in html_th:
    th = tr.find_all(['th'])
    row = [i.text for i in th]
    headings.append(row)

print(headings)

content = []
for tr in html_text:
    td = tr.find_all(['td'])
    row = [i.text for i in td]
    content.append(row)
    
print(content)

[['S. No.', 'Name of State / UT', 'Total Confirmed cases (Including 77 foreign Nationals) ', 'Cured/Discharged/Migrated', 'Death']]
[['1', 'Andaman and Nicobar Islands', '18', '11', '0'], ['2', 'Andhra Pradesh', '813', '120', '24'], ['3', 'Arunachal Pradesh', '1', '1', '0'], ['4', 'Assam', '35', '19', '1'], ['5', 'Bihar', '143', '46', '2'], ['6', 'Chandigarh', '27', '14', '0'], ['7', 'Chhattisgarh', '36', '26', '0'], ['8', 'Delhi', '2248', '724', '48'], ['9', 'Goa', '7', '7', '0'], ['10', 'Gujarat', '2407', '179', '103'], ['11', 'Haryana', '262', '140', '3'], ['12', 'Himachal Pradesh', '40', '18', '1'], ['13', 'Jammu and Kashmir', '407', '92', '5'], ['14', 'Jharkhand', '49', '8', '3'], ['15', 'Karnataka', '427', '131', '17'], ['16', 'Kerala', '438', '323', '3'], ['17', 'Ladakh', '18', '14', '0'], ['18', 'Madhya Pradesh', '1592', '148', '80'], ['19', 'Maharashtra', '5652', '789', '269'], ['20', 'Manipur', '2', '2', '0'], ['21', 'Meghalaya', '12', '0', '1'], ['22', 'Mizoram', '1', '0', '

In [3]:
data= pd.DataFrame(content[:-3], columns=headings[0])

In [4]:
data.head()

Unnamed: 0,S. No.,Name of State / UT,Total Confirmed cases (Including 77 foreign Nationals),Cured/Discharged/Migrated,Death
0,1,Andaman and Nicobar Islands,18,11,0
1,2,Andhra Pradesh,813,120,24
2,3,Arunachal Pradesh,1,1,0
3,4,Assam,35,19,1
4,5,Bihar,143,46,2


In [5]:
# drop S.NO column
data = data.drop('S. No.', axis=1)

In [6]:
data.head()

Unnamed: 0,Name of State / UT,Total Confirmed cases (Including 77 foreign Nationals),Cured/Discharged/Migrated,Death
0,Andaman and Nicobar Islands,18,11,0
1,Andhra Pradesh,813,120,24
2,Arunachal Pradesh,1,1,0
3,Assam,35,19,1
4,Bihar,143,46,2


In [7]:
#rename column names
data = data.rename(columns={'Name of State / UT':'States'})
data = data.rename(columns=lambda x: re.sub('Total Confirmed cases \(Including .. foreign Nationals\) ',
                                                      'Total Cases',x))
data = data.rename(columns={'Cured/Discharged/Migrated':'Recovered'})
data = data.rename(columns={'Deaths':'Deaths'})

In [8]:
data.head()

Unnamed: 0,States,Total Cases,Recovered,Death
0,Andaman and Nicobar Islands,18,11,0
1,Andhra Pradesh,813,120,24
2,Arunachal Pradesh,1,1,0
3,Assam,35,19,1
4,Bihar,143,46,2


In [9]:
#add todays date and create new column
now  = datetime.now()
data['Date'] = now.strftime("%m/%d/%Y") 
data['Date'] = pd.to_datetime(data['Date'], format='%m/%d/%Y')
data.head()

Unnamed: 0,States,Total Cases,Recovered,Death,Date
0,Andaman and Nicobar Islands,18,11,0,2020-04-23
1,Andhra Pradesh,813,120,24,2020-04-23
2,Arunachal Pradesh,1,1,0,2020-04-23
3,Assam,35,19,1,2020-04-23
4,Bihar,143,46,2,2020-04-23


In [10]:
#add latitude and longitude
lat = {'Delhi':28.7041, 'Haryana':29.0588, 'Kerala':10.8505, 'Rajasthan':27.0238,
       'Telengana':18.1124, 'Uttar Pradesh':26.8467, 'Ladakh':34.2996, 'Tamil Nadu':11.1271,
       'Andhra Pradesh':15.9129, 'Odisha':20.9517, 'Uttarakhand':30.0668, 'West Bengal':22.9868, 
       'Puducherry': 11.9416, 'Chandigarh': 30.7333, 'Chhattisgarh':21.2787, 
       'Jammu and Kashmir':33.7782, 'Punjab':31.1471, 'Karnataka':15.3173, 
       'Maharashtra':19.7515, 'Gujarat': 22.2587, 
       'Himachal Pradesh': 31.1048, 'Madhya Pradesh': 22.9734, 'Bihar': 25.0961, 'Manipur':24.6637, 
       'Mizoram':23.1645, 'Goa': 15.2993, 'Andaman and Nicobar Islands': 11.7401, 'Assam' : 26.2006, 
       'Jharkhand': 23.6102, 'Arunachal Pradesh': 28.2180, 'Tripura': 23.9408, 'Nagaland': 26.1584, 
       'Meghalaya' : 25.4670}

long = {'Delhi':77.1025, 'Haryana':76.0856, 'Kerala':76.2711, 'Rajasthan':74.2179,
        'Telengana':79.0193, 'Uttar Pradesh':80.9462, 'Ladakh':78.2932, 'Tamil Nadu':78.6569,
        'Andhra Pradesh':79.7400, 'Odisha':85.0985, 'Uttarakhand':79.0193, 'West Bengal':87.8550, 
        'Puducherry': 79.8083, 'Chandigarh': 76.7794, 'Chhattisgarh':81.8661,
        'Jammu and Kashmir':76.5762, 'Punjab':75.3412, 'Karnataka':75.7139, 
        'Maharashtra':75.7139, 'Gujarat': 71.1924, 
        'Himachal Pradesh': 77.1734, 'Madhya Pradesh': 78.6569, 'Bihar': 85.3131, 'Manipur':93.9063, 
        'Mizoram':92.9376, 'Goa': 74.1240, 'Andaman and Nicobar Islands': 92.6586, 'Assam' : 92.9376, 
        'Jharkhand': 85.2799, 'Arunachal Pradesh': 94.7278, 'Tripura': 91.9882, 'Nagaland': 94.5624,
        'Meghalaya' : 91.3662}

data['Latitude'] = data['States'].map(lat)
data['Longitude'] = data['States'].map(long)

In [11]:
data.head()

Unnamed: 0,States,Total Cases,Recovered,Death,Date,Latitude,Longitude
0,Andaman and Nicobar Islands,18,11,0,2020-04-23,11.7401,92.6586
1,Andhra Pradesh,813,120,24,2020-04-23,15.9129,79.74
2,Arunachal Pradesh,1,1,0,2020-04-23,28.218,94.7278
3,Assam,35,19,1,2020-04-23,26.2006,92.9376
4,Bihar,143,46,2,2020-04-23,25.0961,85.3131


In [12]:
my_folder = './.daily_update/'
if not os.path.exists(my_folder):
    os.makedirs(my_folder)

file_name = my_folder + now.strftime("%Y_%m_%d") + '.csv'

In [13]:
data.to_csv(file_name, index=False)

In [14]:
csv_files = glob.glob(my_folder + '*.csv')

all_data = []
for i in csv_files:
    temp = pd.read_csv(i)
    all_data.append(temp)
    

In [15]:
final_data = pd.concat(all_data, ignore_index=True).sort_values(['Date'], ascending=True).reset_index(drop=True)
final_data['Date'] = pd.to_datetime(final_data['Date'])
final_data = final_data.sort_values(['Date', 'States']).reset_index(drop=True)


In [16]:
final_data

Unnamed: 0,States,Total Cases,Recovered,Death,Date,Latitude,Longitude
0,Andaman and Nicobar Islands,16,11,0.0,2020-04-21,11.7401,92.6586
1,Andhra Pradesh,757,96,22.0,2020-04-21,15.9129,79.7400
2,Arunachal Pradesh,1,1,0.0,2020-04-21,28.2180,94.7278
3,Assam,35,19,1.0,2020-04-21,26.2006,92.9376
4,Bihar,114,42,2.0,2020-04-21,25.0961,85.3131
5,Chandigarh,26,13,0.0,2020-04-21,30.7333,76.7794
6,Chhattisgarh,36,25,0.0,2020-04-21,21.2787,81.8661
7,Delhi,2081,431,47.0,2020-04-21,28.7041,77.1025
8,Goa,7,7,0.0,2020-04-21,15.2993,74.1240
9,Gujarat,2066,131,77.0,2020-04-21,22.2587,71.1924


In [17]:
final_data.to_csv('COVID19INDIA.csv', index=False)