In [1]:
#IMPORTING REQUIRED MODULES

import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import math
import numpy as np

#READING THE VACCINATION DATA

df=pd.read_csv(r'cowin_vaccine_data_districtwise.csv')

In [2]:
#READING THE CENSUS DATA

censustotal=pd.read_csv(r'DDW_PCA0000_2011_Indiastatedist.csv')

#REMOVING THE UNWANTED COLUMNS FROM THE CENSUS DATAFRAME

censustotal=censustotal.iloc[:,[6,7,8,10,11,12]]

In [3]:
#CREATING DICTIONARY WITH STATE NAME AS KEY AND VALUES AS STATE ID

statechanges={ 'JAMMU & KASHMIR':'JK','HIMACHAL PRADESH':'HP','PUNJAB':'PB','CHANDIGARH':'CH',
              'UTTARAKHAND':'UT','HARYANA':'HR','NCT OF DELHI':'DL','RAJASTHAN':'RJ',
              'UTTAR PRADESH':'UP','BIHAR':'BR','SIKKIM':'SK','ARUNACHAL PRADESH':'AR',
              'NAGALAND':'NL','MANIPUR':'MN','MIZORAM':'MZ','TRIPURA':'TR','MEGHALAYA':'ML',
              'ASSAM':'AS','WEST BENGAL':'WB','JHARKHAND':'JH','ODISHA':'OR','CHHATTISGARH':'CT',
              'MADHYA PRADESH':'MP','GUJARAT':'GJ','DAMAN & DIU':'DN','DADRA & NAGAR HAVELI':'DN',
              'MAHARASHTRA':'MH','ANDHRA PRADESH':'AP','KARNATAKA':'KA','GOA':'GA','LAKSHADWEEP':'LD',
              'KERALA':'KL','TAMIL NADU':'TN','PUDUCHERRY':'PY','ANDAMAN & NICOBAR ISLANDS':'AN'}

#CHANGING STATENAMES IN CENSUS DATAFRAME TO STATEID

for i in range(len(censustotal)):
    for j in statechanges:
        if censustotal.iloc[i,1]==j:
            censustotal.iloc[i,1]=statechanges[j]

In [4]:
#GETTING ONLY ROWS THAT CONTAIN STATE VALUES AND TOTAL VALUES FROM CENSUS DATAFRAME

censustotal=censustotal[censustotal['TRU']=='Total']
census=censustotal[censustotal['Level']=='STATE']

#AS THE CENSUS DATA IS OLD , 'DAMAN & DIU' AND 'DADRA & NAGAR HAVELI' HAVE COMBINED AND FORMED A SINGLE STATE NOW
#SO FROM CENSUS DATA COMBINING THE STATES DATA THAT HAVE STATE ID 'DN' AND MAKING THEM AS A SINGLE STATE

census=census[census['Name']!='DN']
census=census.append({'Level':'STATE','Name':'DN','TRU':'Total','TOT_P':586956,'TOT_M':344061,'TOT_F':242895},ignore_index=True)

In [5]:
#AS THE CENSUS DATA IS OLD , TELANGANA AND AP HAVE NOT YET DIVIDED AND ALSO LADAKH AND JAMMUKASHMIR ARE ALSO NOT DIVIDED

#SO IN VACCINE DATA WE CHANGE THE STATEIDS OF TELANGANA TO AP AND LADAKH TO JK

for i in range(len(df)):
    if df.iloc[i,1]=='LA':
        df.iloc[i,1]='JK'
    if df.iloc[i,1]=='TG':
        df.iloc[i,1]='AP'

In [6]:
#GETTING ONLY THE REQUIRED COLUMNS FROM THE CENSUS DATA

censusreq=census.iloc[:,[1,3]]

#STORING THE INDEXS OF STATEID,LAST BUT 1 WEEK DOSE1 ,LAST WEEK DOSE1 (TO CALCULATE THE RATE OF VACCINATION IN LAST WEEK)

need=[1,2039,2109]

#GETTING ONLY THE ABOVE MENTIONED COLUMNS FROM THE VACCINE DATA

dfreq=df.iloc[:,need]

In [7]:
#REMOVING THE FIRST ROW AS IT IS NOT VALUES(METADATA)

dfreq=dfreq.iloc[1:,:]

#RENAMING THE COLUMNS IN VACCINE DATAFRAME

dfreq.columns=['State_Code', '1dose1','2dose1']

In [8]:
#CONVERTING VALUES OF DOSES WHICH ARE AS STRINGS TO FLOAT

for i in range(len(dfreq)):
    dfreq.iloc[i,1]=float(dfreq.iloc[i,1])
    dfreq.iloc[i,2]=float(dfreq.iloc[i,2])

In [9]:
#GROUPING VACCINE DATA BY STATEID

gdf=dfreq.groupby(['State_Code'])

In [10]:
#CREATING FINAL DATAFRAME

finaldfreq=pd.DataFrame(columns=['stateid','1dose1','2dose1'])

#ADDING STATE ID , SUM OF DOSE 1 IN THE LAST BUT 1 WEEK , SUM OF DOSE1 IN THE LAST WEEK OF THAT PARTICULAR STATE

for i,j in gdf:
   finaldfreq=finaldfreq.append({'stateid':i,'1dose1':sum(j['1dose1']),'2dose1':sum(j['2dose1'])},ignore_index=True)

In [11]:
#MERGING THE STATEVACCINE DATAFRAME AND STATE CENSUS DATAFRAME BASED ON STATEIDS IN BOTH DATAFRAMES

merged = pd.merge(left=censusreq, right=finaldfreq, left_on='Name', right_on='stateid')

In [12]:
#CREATING RATE OF VACCINATION LIST AND STORING THE RATE OF VACCINE OF EACH STATE IN THE LIST

rov=[]

for i in range(len(merged)):
    rov.append(math.ceil((float(merged.iloc[i,4])-float(merged.iloc[i,3]))/7))

In [13]:
#CREATING POPULATION LEFT LIST AND STORING THE POPULATION LEFT TO BE VACCINATED OF EACH STATE IN THE LIST

pl=[]
for i in range(len(merged)):
    pl.append(int(merged.iloc[i,1])-int(merged.iloc[i,4]))
    
#IF THE POPULATION LEFT TO BE VACCINATED IN A STATE IS NEGATIVE ,MAKE IT AS 0(THIS CAN HAPPEN BEACUSE THE CENSUS DATA IS OLD)

for i in range(len(pl)):
    if pl[i]<0:
        pl[i]=0

In [14]:
#ADDING THE ABOVE CREATED RATE OF VACCINATION AND POPULATION LEFT LIST AS COLUMNS INTO THE MERGED DATA FRAME

merged['populationleft']=pl
merged['rateofvaccination']=rov

In [15]:
#CALCULATING THE NO OF DAYS LEFT FOR EACH STATE FOR THE POPULATION LEFT TO GET VACCINATED

days=[]
for i in range(len(merged)):
    days.append(math.ceil(merged.iloc[i,-2]/merged.iloc[i,-1]))

In [16]:

startdate = np.datetime64('2021-08-14')

In [17]:
#CONVERTING THE DAYS NEEDED FOR EACH STATE TO GET ITS POPULATION LEFT VACCINATED INTO DATE TO COMPLETE VACCINATION

dates=[]
for i in range(len(days)):
    dates.append(startdate + np.timedelta64(days[i],'D'))

In [18]:
#ADDING THE DATES LIST AS COLUMN TO MERGED DATA FRAME

merged['date']=dates

In [19]:
#GETTING ONLY THE REQUIRED COLUMNS FROM THE MERGED DATAFRAME

merged=merged.iloc[:,[2,5,6,7]]

In [20]:
#SORTING THE DF BASED ON FIRST FIELD(STATEID)

merged=merged.sort_values(by='stateid')

In [21]:
#WRITING THE MERGED DATAFRAME INTO CSV FILE

merged.to_csv('complete-vaccination.csv',index=False)