### Import packages

In [230]:
import json
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None  # default='warn'
np.set_printoptions(suppress=True)
pd.set_option('display.float_format', '{:.2f}'.format)

### Import Statewise Covid19 data from api

In [231]:
#import data for statewise total cases till date 

#https://stackoverflow.com/questions/67642291/i-am-not-getting-any-output-in-web-scraping-using-beautiful-soup-and-python-and
#Current day numbers across districts and states.

import requests
import pandas as pd

response = requests.get("https://api.covid19india.org/v4/min/data.min.json")
if response.status_code == 200:
    current_day_numbers = response.json()
    DATE = []
    STATE = []
    POPULATION = []
    TOTAL_CASES = []
    DECEASED = []
    DISCHAGED = []
    TESTED = []
    VACCINATED_DOSE1 = []
    VACCINATED_DOSE2 = []

    for state in current_day_numbers.keys():
        DATE.append(current_day_numbers[state]["meta"]["date"])
        STATE.append(state)
        POPULATION.append(current_day_numbers[state]["meta"]["population"])
        TOTAL_CASES.append(current_day_numbers[state]["total"]["confirmed"])
        DECEASED.append(current_day_numbers[state]["total"]["deceased"])
        DISCHAGED.append(current_day_numbers[state]["total"]["recovered"])
        TESTED.append(current_day_numbers[state]["total"]["tested"])
        VACCINATED_DOSE1.append(current_day_numbers[state]["total"]["vaccinated1"])
        VACCINATED_DOSE2.append(current_day_numbers[state]["total"]["vaccinated2"])

    Covid19_data = pd.DataFrame(
        {
            "DATE": DATE,
            "STATE/UT": STATE,
            "POPULATION":POPULATION,
            "TOTAL_CASES": TOTAL_CASES,
            "DECEASED": DECEASED,
            "RECOVERED": RECOVERED,
            "TESTED": TESTED,
            "VACCINATED_DOSE1": VACCINATED_DOSE1,
            "VACCINATED_DOSE2": VACCINATED_DOSE2,
        }
    )
    #print(data.head())

else:
    print("Error while calling API: {}".format(response.status_code, response.reason))

### Data Check and manipulation

In [232]:
#Print a sample data
Covid19_data.head()

Unnamed: 0,DATE,STATE/UT,POPULATION,TOTAL_CASES,DECEASED,RECOVERED,TESTED,VACCINATED_DOSE1,VACCINATED_DOSE2
0,2021-07-25,AN,397000,7525,129,7382,431486,192718,86477
1,2021-07-25,AP,52221000,1954765,13256,1919354,24134961,14910383,4655273
2,2021-07-25,AR,1504000,45703,213,41025,898083,653436,159286
3,2021-07-25,AS,34293000,557437,5114,536597,17774829,7958759,1711690
4,2021-07-25,BR,119520000,724390,9637,714223,36305828,18654946,3403520


In [233]:
#Note:
#population: This gives the population of the state (based on NCP projections)

#There is some issue with the population as the sum crosses 2 billion which is not correct
Total_population = Covid19_data['POPULATION'].sum()

print(Total_population) # Coming to 2 billion

2665798000


In [234]:
#Print the structure
Covid19_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   DATE              37 non-null     object
 1   STATE/UT          37 non-null     object
 2   POPULATION        37 non-null     int64 
 3   TOTAL_CASES       37 non-null     int64 
 4   DECEASED          37 non-null     int64 
 5   RECOVERED         37 non-null     int64 
 6   TESTED            37 non-null     int64 
 7   VACCINATED_DOSE1  37 non-null     int64 
 8   VACCINATED_DOSE2  37 non-null     int64 
dtypes: int64(7), object(2)
memory usage: 2.7+ KB


In [235]:
#Changing DATE column format
Covid19_data['DATE']= pd.to_datetime(Covid19_data['DATE'], format = '%Y-%m-%d')

In [236]:
#Change state names from abbrevations to full name
dict_states={'WB': 'West Bengal', 'DL': 'Delhi', 'KL': 'Kerala', 'PB': 'Punjab', 'AP':'Andhra Pradesh', 
             'TN': 'Tamil Nadu', 'KA': 'Karnataka', 'JK': 'Jammu and Kashmir', 'UP': 'Uttar Pradesh', 'MP': 'Madhya Pradesh', 
             'MH': 'Maharashtra', 'BR': 'Bihar', 'HR': 'Haryana', 'OR': 'Orissa', 'RJ': 'Rajasthan', 'GJ': 'Gujarat', 
             'HP': 'Himachal Pradesh', 'AS': 'Assam', 'TG': 'Telangana', 'JH': 'Jharkhand', 'ML': 'Meghalaya', 
             'UT': 'Uttarakhand', 'CH': 'Chandigarh', 'CT': 'Chhattisgarh', 'LA': 'Lakshadweep', 'TR': 'Tripura', 
             'PY': 'Pondicherry', 'GA': 'Goa', 'AR': 'Arunachal Pradesh', 'DN': 'Dadra and Nagar Haveli', 
             'NL': 'Nagaland', 'SK':'Sikkim', 'AN': 'Andaman and Nicobar Islands', 'MN': 'Manipur', 'MZ': 'Mizoram', 
             'LD': 'Lakshadweep'}

Covid19_data = Covid19_data.replace({'STATE/UT':dict_states})

In [237]:
#Print a sample data
Covid19_data.head()

Unnamed: 0,DATE,STATE/UT,POPULATION,TOTAL_CASES,DECEASED,RECOVERED,TESTED,VACCINATED_DOSE1,VACCINATED_DOSE2
0,2021-07-25,Andaman and Nicobar Islands,397000,7525,129,7382,431486,192718,86477
1,2021-07-25,Andhra Pradesh,52221000,1954765,13256,1919354,24134961,14910383,4655273
2,2021-07-25,Arunachal Pradesh,1504000,45703,213,41025,898083,653436,159286
3,2021-07-25,Assam,34293000,557437,5114,536597,17774829,7958759,1711690
4,2021-07-25,Bihar,119520000,724390,9637,714223,36305828,18654946,3403520


In [238]:
#Adding column for Total Active cases
Covid19_data['ACTIVE'] = Covid19_data['TOTAL_CASES'] - Covid19_data['DECEASED'] - Covid19_data['RECOVERED']

In [239]:
#Adding column for Total Vaccinated
Covid19_data['TOTAL_VACCINATION'] = Covid19_data['VACCINATED_DOSE1'] + Covid19_data['VACCINATED_DOSE2']

In [240]:
#Add a row with Total values
Covid19_data.loc['Total']= Covid19_data.sum(numeric_only=True)
print(Covid19_data.loc['Total',:])

DATE                          NaT
STATE/UT                      NaN
POPULATION          2665798000.00
TOTAL_CASES           62814190.00
DECEASED                841862.00
RECOVERED             61080136.00
TESTED               948025394.00
VACCINATED_DOSE1     679031966.00
VACCINATED_DOSE2     183984865.00
ACTIVE                  892192.00
TOTAL_VACCINATION    863016831.00
Name: Total, dtype: object


### Create two dataset
    1) Statewise vaccine dataset
    2) Statewise staus dataset

In [241]:
#Statewise Vaccination data
Covid19_vacination_data = Covid19_data[['STATE/UT', 'POPULATION','TOTAL_VACCINATION', 'VACCINATED_DOSE1', 'VACCINATED_DOSE2']]

In [242]:
Covid19_vacination_data.head()

Unnamed: 0,STATE/UT,POPULATION,TOTAL_VACCINATION,VACCINATED_DOSE1,VACCINATED_DOSE2
0,Andaman and Nicobar Islands,397000.0,279195.0,192718.0,86477.0
1,Andhra Pradesh,52221000.0,19565656.0,14910383.0,4655273.0
2,Arunachal Pradesh,1504000.0,812722.0,653436.0,159286.0
3,Assam,34293000.0,9670449.0,7958759.0,1711690.0
4,Bihar,119520000.0,22058466.0,18654946.0,3403520.0


In [243]:
#Creating percentage columns
Covid19_vacination_data['PERC_VACCINATED'] = (Covid19_vacination_data['TOTAL_VACCINATION']/Covid19_vacination_data['POPULATION'])*100


Covid19_vacination_data['PERC_VACCINATED_DOSE1'] = (Covid19_vacination_data['VACCINATED_DOSE1']/Covid19_vacination_data['POPULATION'])*100


Covid19_vacination_data['PERC_VACCINATED_DOSE2'] = (Covid19_vacination_data['VACCINATED_DOSE2']/Covid19_vacination_data['POPULATION'])*100


In [244]:
Covid19_vacination_data.head()

Unnamed: 0,STATE/UT,POPULATION,TOTAL_VACCINATION,VACCINATED_DOSE1,VACCINATED_DOSE2,PERC_VACCINATED,PERC_VACCINATED_DOSE1,PERC_VACCINATED_DOSE2
0,Andaman and Nicobar Islands,397000.0,279195.0,192718.0,86477.0,70.33,48.54,21.78
1,Andhra Pradesh,52221000.0,19565656.0,14910383.0,4655273.0,37.47,28.55,8.91
2,Arunachal Pradesh,1504000.0,812722.0,653436.0,159286.0,54.04,43.45,10.59
3,Assam,34293000.0,9670449.0,7958759.0,1711690.0,28.2,23.21,4.99
4,Bihar,119520000.0,22058466.0,18654946.0,3403520.0,18.46,15.61,2.85


In [245]:
#Add a row with Total values
print(Covid19_vaacination_data.loc['Total',:])

STATE/UT                          NaN
POPULATION              2665798000.00
TOTAL_VACCINATION        863016831.00
VACCINATED_DOSE1         679031966.00
VACCINATED_DOSE2         183984865.00
PERC_VACCINATED                 32.37
PERC_VACCINATED_DOSE1           25.47
PERC_VACCINATED_DOSE2            6.90
Name: Total, dtype: object


In [246]:
Covid19_data.columns

Index(['DATE', 'STATE/UT', 'POPULATION', 'TOTAL_CASES', 'DECEASED',
       'RECOVERED', 'TESTED', 'VACCINATED_DOSE1', 'VACCINATED_DOSE2', 'ACTIVE',
       'TOTAL_VACCINATION'],
      dtype='object')

In [247]:
#Statewise Status data
Covid19_status_data = Covid19_data[['STATE/UT', 'POPULATION','TOTAL_CASES', 'ACTIVE', 'RECOVERED', 'DECEASED']]

In [248]:
#Creating percentage columns
Covid19_status_data['ACTIVE_RATIO'] = (Covid19_status_data['ACTIVE']/Covid19_status_data['TOTAL_CASES'])*100


Covid19_status_data['RECOVERED_RATIO'] = (Covid19_status_data['RECOVERED']/Covid19_status_data['TOTAL_CASES'])*100


Covid19_status_data['DECEASED_RATIO'] = (Covid19_status_data['DECEASED']/Covid19_status_data['TOTAL_CASES'])*100


In [249]:
Covid19_status_data.head()

Unnamed: 0,STATE/UT,POPULATION,TOTAL_CASES,ACTIVE,RECOVERED,DECEASED,ACTIVE_RATIO,RECOVERED_RATIO,DECEASED_RATIO
0,Andaman and Nicobar Islands,397000.0,7525.0,14.0,7382.0,129.0,0.19,98.1,1.71
1,Andhra Pradesh,52221000.0,1954765.0,22155.0,1919354.0,13256.0,1.13,98.19,0.68
2,Arunachal Pradesh,1504000.0,45703.0,4465.0,41025.0,213.0,9.77,89.76,0.47
3,Assam,34293000.0,557437.0,15726.0,536597.0,5114.0,2.82,96.26,0.92
4,Bihar,119520000.0,724390.0,530.0,714223.0,9637.0,0.07,98.6,1.33


In [250]:
#Add a row with Total values
print(Covid19_status_data.loc['Total',:])

STATE/UT                    NaN
POPULATION        2665798000.00
TOTAL_CASES         62814190.00
ACTIVE                892192.00
RECOVERED           61080136.00
DECEASED              841862.00
ACTIVE_RATIO               1.42
RECOVERED_RATIO           97.24
DECEASED_RATIO             1.34
Name: Total, dtype: object
