<a href="https://colab.research.google.com/github/Akashkunwar/dscodes/blob/master/cases_vaccination.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
## Retreving Data
# Assigning Github URLs
cases_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovered_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

# Reading URLs through Pandas liberary
total_cases = pd.read_csv(cases_url)
total_deaths = pd.read_csv(deaths_url)
total_recovered = pd.read_csv(recovered_url)

## Cleaning Data
# Extreacting and storing column dates in dates
dates = total_cases.columns[4:]

# Reshaping total cases column from wide shape to df shape
total_cases_df = total_cases.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Confirmed'
)
# Reshaping total deaths column from wide shape to df shape
total_deaths_df = total_deaths.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Deaths'
)
# Reshaping total recovered column from wide shape to df shape
total_recovered_df = total_recovered.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Recovered'
)

# Merging total_cases_df and total_deaths_df
covid_df = total_cases_df.merge(
  right=total_deaths_df, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)
# Merging covid_df and total_recovered_df
covid_df = covid_df.merge(
  right=total_recovered_df, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)

# Converting Date to proper datetime format
covid_df["Date"] = pd.to_datetime(covid_df["Date"])

# Inserting converted date to full table
covid_df = covid_df.sort_values(['Country/Region', 'Date'])
#covid_df = covid_df.sort_values(['Country/Region', 'Date'], ascending=[True, True])
# Reseting Index
# covid_df = covid_df.reset_index()

# Copying covid_df to covid_df
#covid_df = covid_df.copy()

# Renaming column names
covid_df = covid_df.rename(columns={'Country/Region': 'Country',
                                    'Province/State':'Province',
                                    'Confirmed': 'Total cases',
                                    'Deaths': 'Total deaths',
                                    'Recovered':'Total recovered'})

# Filling NaN value of provinve with country name and storing it in coiuntry column
covid_df["Province"] = covid_df["Province"].fillna(covid_df["Country"])
covid_df["Country"] = covid_df["Province"]

# Adding New cases, New deaths, New recovered to covid_df
covid_df["New cases"] = covid_df.groupby('Country')['Total cases'].diff().fillna(0)
covid_df["New deaths"] = covid_df.groupby('Country')['Total deaths'].diff().fillna(0)
covid_df["New recovered"] = covid_df.groupby('Country')['Total recovered'].diff().fillna(0)

# Converting float value to int in covid_df columns
covid_df["Total recovered"] = covid_df["Total recovered"].fillna(0)
covid_df["Total recovered"] = covid_df["Total recovered"].astype(int).fillna(0)
covid_df["New cases"] = covid_df["New cases"].astype(int)
covid_df["New deaths"] = covid_df["New deaths"].astype(int)
covid_df["New recovered"] = covid_df["New recovered"].astype(int)

# Arranging Columns
covid_df = covid_df[['Date', 'Province', 'Country','Lat','Long',
 'Total cases', 'Total deaths','Total recovered', 'New cases','New deaths','New recovered']]

# Removind unnecessary columns
covid_df = covid_df.drop(['Province', 'Lat', 'Long'],axis=1)

# Reseting jumbled index in order
covid_df = covid_df.reset_index(drop=True)

covid_df

Unnamed: 0,Date,Country,Total cases,Total deaths,Total recovered,New cases,New deaths,New recovered
0,2020-01-22,Afghanistan,0,0,0,0,0,0
1,2020-01-23,Afghanistan,0,0,0,0,0,0
2,2020-01-24,Afghanistan,0,0,0,0,0,0
3,2020-01-25,Afghanistan,0,0,0,0,0,0
4,2020-01-26,Afghanistan,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
228899,2022-04-02,Zimbabwe,246481,5446,0,67,2,0
228900,2022-04-03,Zimbabwe,246525,5446,0,44,0,0
228901,2022-04-04,Zimbabwe,246612,5451,0,87,5,0
228902,2022-04-05,Zimbabwe,246744,5451,0,132,0,0


In [18]:
print(covid_df.dtypes)
print(vaccine_df.dtypes)

Date               datetime64[ns]
Country                    object
Total cases                 int64
Total deaths                int64
Total recovered             int64
New cases                   int64
New deaths                  int64
New recovered               int64
dtype: object
Date                       datetime64[ns]
Country                            object
Fully vaccinated                    int64
Total vaccinations                  int64
Total boosters                      int64
Daily vaccinations                  int64
Daily people vaccinated             int64
dtype: object


In [4]:
# total_caseshttps://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv

In [19]:
vaccine_data = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv')
vaccine_data['date']=pd.to_datetime(vaccine_data.date)
#Date format change
vaccine_data['date'] = pd.to_datetime(vaccine_data['date'],format='%y-%m-%d').dt.date
#Dropping uncecessary data
vaccine_data.drop(['total_vaccinations_per_hundred',
                   'iso_code',
                   'people_vaccinated_per_hundred',
                   'people_fully_vaccinated_per_hundred',
                   'daily_vaccinations_per_million',
                   'daily_vaccinations_raw',
                   'total_vaccinations',
                   'people_vaccinated',
                   'daily_people_vaccinated_per_hundred',
                   'total_boosters_per_hundred'], inplace=True, axis=1)

#Rename vaccines to vaccine_name
vaccine_data.rename(columns = {'vaccines':'vaccine_name'}, inplace = True) 

#Adding total vaccination column
Total_vaccinations = vaccine_data.groupby('location')['daily_vaccinations'].cumsum()
vaccine_data.insert(5,'Total_vaccinations',Total_vaccinations)

#Renaming columns
vaccine_df = vaccine_data.rename(columns={'location': 'Country',
                                    'date':'Date',
                                    'people_fully_vaccinated': 'Fully vaccinated',
                                    'total_boosters': 'Total boosters',
                                    'Total_vaccinations':'Total vaccinations',
                                    'daily_vaccinations': 'Daily vaccinations',
                                    'daily_people_vaccinated': 'Daily people vaccinated'})
#Arranging columns
vaccine_df = vaccine_df[['Date',
                         'Country',
                         "Fully vaccinated",
                         'Total vaccinations',
                         'Total boosters',
                         'Daily vaccinations',
                         'Daily people vaccinated']]

# Converting Date to proper datetime format
vaccine_df["Date"] = pd.to_datetime(vaccine_df["Date"])
vaccine_df = vaccine_df.fillna(0)
vaccine_df = vaccine_df.astype({"Fully vaccinated":'int',
                                'Total vaccinations':'int',
                                'Total boosters':'int',
                                'Daily vaccinations':'int',
                                'Daily people vaccinated':'int'})
vaccine_df

Unnamed: 0,Date,Country,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
0,2021-02-22,Afghanistan,0,0,0,0,0
1,2021-02-23,Afghanistan,0,1367,0,1367,1367
2,2021-02-24,Afghanistan,0,2734,0,1367,1367
3,2021-02-25,Afghanistan,0,4101,0,1367,1367
4,2021-02-26,Afghanistan,0,5468,0,1367,1367
...,...,...,...,...,...,...,...
93704,2022-04-01,Zimbabwe,3543725,9096280,564632,112793,79751
93705,2022-04-02,Zimbabwe,3553083,9211822,591761,115542,81348
93706,2022-04-03,Zimbabwe,3561400,9333169,615286,121347,85662
93707,2022-04-04,Zimbabwe,3570255,9452575,637510,119406,83858


In [22]:
vaccine_df[vaccine_df.Country=='India']

Unnamed: 0,Date,Country,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
38346,2021-01-15,India,0,0,0,0,0
38347,2021-01-16,India,0,191181,0,191181,191181
38348,2021-01-17,India,0,303331,0,112150,112150
38349,2021-01-18,India,0,454681,0,151350,151350
38350,2021-01-19,India,0,623390,0,168709,168709
...,...,...,...,...,...,...,...
38788,2022-04-02,India,0,1838646862,0,2036805,1048276
38789,2022-04-03,India,0,1840707454,0,2060592,1048678
38790,2022-04-04,India,833703203,1842701858,22260364,1994404,997999
38791,2022-04-05,India,834507741,1844730256,22421880,2028398,990240


In [21]:
vaccine_data.vaccine_name

AttributeError: ignored