<a href="https://colab.research.google.com/github/Akashkunwar/dscodes/blob/master/cases_vaccination.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
###COVID DATA
## Retreving Data
# Assigning Github URLs
cases_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
# recovered_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

# Reading URLs through Pandas liberary
total_cases = pd.read_csv(cases_url)
total_deaths = pd.read_csv(deaths_url)
# total_recovered = pd.read_csv(recovered_url)

## Cleaning Data
# Extreacting and storing column dates in dates
dates = total_cases.columns[4:]

# Reshaping total cases column from wide shape to df shape
total_cases_df = total_cases.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Confirmed'
    )

# Reshaping total deaths column from wide shape to df shape
total_deaths_df = total_deaths.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Deaths'
    )

# # Reshaping total recovered column from wide shape to df shape
# total_recovered_df = total_recovered.melt(
#     id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
#     value_vars=dates, 
#     var_name='Date', 
#     value_name='Recovered'
# )

# Merging total_cases_df and total_deaths_df
covid_df = total_cases_df.merge(
  right=total_deaths_df, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)

# # Merging covid_df and total_recovered_df
# covid_df = covid_df.merge(
#   right=total_recovered_df, 
#   how='left',
#   on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
# )

# Converting Date to proper datetime format
covid_df["Date"] = pd.to_datetime(covid_df["Date"])

# Inserting converted date to full table
covid_df = covid_df.sort_values(['Country/Region', 'Date'])
#covid_df = covid_df.sort_values(['Country/Region', 'Date'], ascending=[True, True])
# Reseting Index
# covid_df = covid_df.reset_index()

# Copying covid_df to covid_df
#covid_df = covid_df.copy()

# Renaming column names
covid_df = covid_df.rename(columns={'Country/Region': 'country',
                                    'Province/State':'province',
                                    'Confirmed': 'total_cases',
                                    'Deaths': 'total_deaths',
                                    'Date':'date'})

# Filling NaN value of provinve with country name and storing it in coiuntry column
covid_df["province"] = covid_df["province"].fillna(covid_df["country"])
covid_df["country"] = covid_df["province"]

# Adding New cases, New deaths, New recovered to covid_df
covid_df["new_cases"] = covid_df.groupby('country')['total_cases'].diff().fillna(0)
covid_df["new_deaths"] = covid_df.groupby('country')['total_deaths'].diff().fillna(0)
# covid_df["New recovered"] = covid_df.groupby('country')['Total recovered'].diff().fillna(0)

# Converting float value to int in covid_df columns
# covid_df["Total recovered"] = covid_df["Total recovered"].fillna(0)
# covid_df["Total recovered"] = covid_df["Total recovered"].astype(int).fillna(0)
covid_df["new_cases"] = covid_df["new_cases"].astype(int)
covid_df["new_deaths"] = covid_df["new_deaths"].astype(int)
# covid_df["New recovered"] = covid_df["New recovered"].astype(int)

# Arranging Columns
covid_df = covid_df[['date', 'province', 'country','Lat','Long',
 'total_cases', 'total_deaths', 'new_cases','new_deaths']]

# Removind unnecessary columns
covid_df = covid_df.drop(['province', 'Lat', 'Long'],axis=1)

covid_df.country = covid_df.country.str.lower().str.strip()

# Reseting jumbled index in order
covid_df = covid_df.reset_index(drop=True)

In [3]:
covid_df

Unnamed: 0,date,country,total_cases,total_deaths,new_cases,new_deaths
0,2020-01-22,afghanistan,0,0,0,0
1,2020-01-23,afghanistan,0,0,0,0
2,2020-01-24,afghanistan,0,0,0,0
3,2020-01-25,afghanistan,0,0,0,0
4,2020-01-26,afghanistan,0,0,0,0
...,...,...,...,...,...,...
255355,2022-07-01,zimbabwe,255586,5555,0,0
255356,2022-07-02,zimbabwe,255586,5555,0,0
255357,2022-07-03,zimbabwe,255586,5555,0,0
255358,2022-07-04,zimbabwe,255726,5558,140,3


In [4]:
###VACCINE DATA
## Retreving Data
# Assigning Github URLs
vaccine_data = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv')
vaccine_data['date']=pd.to_datetime(vaccine_data.date)

#Date format change
vaccine_data['date'] = pd.to_datetime(vaccine_data['date'],format='%y-%m-%d').dt.date

#Dropping uncecessary data
vaccine_data.drop(['total_vaccinations_per_hundred',
                   'iso_code',
                   'people_vaccinated_per_hundred',
                   'people_fully_vaccinated_per_hundred',
                   'daily_vaccinations_per_million',
                   'daily_vaccinations_raw',
                   'total_vaccinations',
                   'people_vaccinated',
                   'daily_people_vaccinated_per_hundred',
                   'total_boosters_per_hundred'], inplace=True, axis=1)

#Rename vaccines to vaccine_name
# vaccine_data.rename(columns = {'vaccines':'vaccine_name'}, inplace = True) 

#Adding total vaccination column
Total_vaccinations = vaccine_data.groupby('location')['daily_vaccinations'].cumsum()
vaccine_data.insert(5,'Total_vaccinations',Total_vaccinations)

#Renaming columns
vaccine_df = vaccine_data.rename(columns={'location': 'country',
                                    # 'date':'Date',
                                    'people_fully_vaccinated': 'fully_vaccinated',
                                    # 'total_boosters': 'Total boosters',
                                    'Total_vaccinations':'total_vaccinations',
                                    # 'daily_vaccinations': 'Daily vaccinations',
                                    # 'daily_people_vaccinated': 'Daily people vaccinated'})
                                    })
#Arranging columns
vaccine_df = vaccine_df[['date',
                         'country',
                         "fully_vaccinated",
                         'total_vaccinations',
                         'total_boosters',
                         'daily_vaccinations',
                         'daily_people_vaccinated']]

# Converting date to proper datetime format
vaccine_df["date"] = pd.to_datetime(vaccine_df["date"])
vaccine_df = vaccine_df.fillna(0)
vaccine_df = vaccine_df.astype({"fully_vaccinated":'int',
                                'total_vaccinations':'int',
                                'total_boosters':'int',
                                'daily_vaccinations':'int',
                                'daily_people_vaccinated':'int'})

vaccine_df.country = vaccine_df.country.str.lower().str.strip()

#Removing unnecessary rows
# vaccine_df = vaccine_df.set_index("Country").drop("World")
# vaccine_df.reset_index(inplace=True)

In [5]:
vaccine_df

Unnamed: 0,date,country,fully_vaccinated,total_vaccinations,total_boosters,daily_vaccinations,daily_people_vaccinated
0,2021-02-22,afghanistan,0,0,0,0,0
1,2021-02-23,afghanistan,0,1367,0,1367,1367
2,2021-02-24,afghanistan,0,2734,0,1367,1367
3,2021-02-25,afghanistan,0,4101,0,1367,1367
4,2021-02-26,afghanistan,0,5468,0,1367,1367
...,...,...,...,...,...,...,...
112479,2022-06-29,zimbabwe,4605821,11945787,1053602,5295,1362
112480,2022-06-30,zimbabwe,0,11951103,0,5316,1483
112481,2022-07-01,zimbabwe,4611113,11956461,1057730,5358,1633
112482,2022-07-02,zimbabwe,4614738,11962651,1060087,6190,2102


In [6]:
a = pd.merge(covid_df, vaccine_df, how="outer", on=["date", "country"])

In [7]:
a[a.country=='India']

Unnamed: 0,date,country,total_cases,total_deaths,new_cases,new_deaths,fully_vaccinated,total_vaccinations,total_boosters,daily_vaccinations,daily_people_vaccinated


In [9]:
cov = list(set(covid_df.country) - set(vaccine_df.country))
vac = list(set(vaccine_df.country) - set(covid_df.country))
uni = set(covid_df.country).union(set(vaccine_df.country))
inter = set(covid_df.country).intersection(set(vaccine_df.country))

print('cov :',len(cov))
print('vac :',len(vac))
print('uni :',len(uni))
print('inter :',len(inter))

cov : 90
vac : 41
uni : 325
inter : 194


In [30]:
cov = list(set(covid_df.country) - set(vaccine_df.country))
vac = list(set(vaccine_df.country) - set(covid_df.country))
uni = set(covid_df.country).union(set(vaccine_df.country))
inter = set(covid_df.country).intersection(set(vaccine_df.country))

print('cov :',len(cov))
print('vac :',len(vac))
print('uni :',len(uni))
print('inter :',len(inter))

cov : 14
vac : 28
uni : 249
inter : 207


In [None]:
covid_df.country = covid_df.country.replace(['India'],'in')
df['column name'] = df['column name'].replace(['old value'],'new value')
df['column name'] = df['column name'].replace(['1st old value','2nd old value',...],'new value')
df['column name'] = df['column name'].replace(['1st old value','2nd old value',...],['1st new value','2nd new value',...])

In [42]:
covid_df.country = covid_df.country.replace(['congo (brazzaville)',
                                             'congo (kinshasa)',
                                             'falkland islands (malvinas)',
                                             'korea, north',
                                             'korea, south',
                                             'taiwan*',
                                             'us',
                                             'timor-leste',
                                             'summer olympics 2020',
                                             'burma',
                                             'west bank and gaza',
                                             'bonaire, sint eustatius and saba',
                                             'faroe islands'],
                                            ['democratic republic of congo',
                                             'congo',
                                             'falkland islands',
                                             'north korea',
                                             'south korea',
                                             'taiwan',
                                             'united states',
                                             'timor',
                                             'japan',
                                             'myanmar',
                                             'palestine',
                                             'bonaire sint eustatius and saba',
                                             'denmark'])


covid_df.country = covid_df.country.replace(['australian capital territory',
                                             'new south wales',
                                             'western australia',
                                             'tasmania',
                                             'south australia',
                                             'queensland',
                                             'northern territory',
                                             'victoria'],'australia')

covid_df.country = covid_df.country.replace(['anhui', 'beijing', 'chongqing', 
                                             'fujian', 'gansu', 'guangdong',
                                             'guangxi', 'guizhou', 'hainan', 
                                             'hebei', 'heilongjiang', 'henan',
                                             'hubei', 'hunan', 'inner mongolia', 
                                             'jiangsu','jiangxi', 'jilin', 
                                             'liaoning', 'macau', 'ningxia', 
                                             'qinghai','shaanxi', 'shandong', 
                                             'shanghai', 'shanxi', 'sichuan', 
                                             'tianjin','tibet', 'unknown', 
                                             'xinjiang', 'yunnan', 'zhejiang'],
                                            'china')

covid_df.country = covid_df.country.replace(['alberta','british columbia',
                                             'diamond princess','yukon',
                                             'saskatchewan','quebec',
                                             'prince edward island',
                                             'northwest territories',
                                             'nova scotia','nunavut',
                                             'ontario','new brunswick',
                                             'newfoundland and labrador',
                                             'manitoba',
                                             'repatriated travellers',
                                             'grand princess'],'canada')

covid_df.country = covid_df.country.replace(['sint maarten',
                                             'saint pierre and miquelon',
                                             'saint barthelemy','martinique',
                                             'mayotte','french guiana',
                                             'guadeloupe','reunion',
                                             'st martin'],'france overseas')

covid_df.country = covid_df.country.replace(['channel islands',
                                             'saint helena, ascension and tristan da cunha'],
                                            'united kingdom')

In [47]:
vaccine_df.country = vaccine_df.country.replace(['wales','england','scotland',
                                                 'northern ireland',],
                                                'united kingdom')

In [62]:
vaccine_df.country = vaccine_df.country.replace(['macao','nauru','tokelau','sint maarten (dutch part)'],
                                            ['china','oceania','new zealand','netherlands'])

In [63]:
vaccine_df[vaccine_df.country=='sint maarten (dutch part)'].tail()

Unnamed: 0,date,country,fully_vaccinated,total_vaccinations,total_boosters,daily_vaccinations,daily_people_vaccinated
90939,2022-06-20,sint maarten (dutch part),0,39456,0,14,4
90940,2022-06-21,sint maarten (dutch part),0,39469,0,13,4
90941,2022-06-22,sint maarten (dutch part),0,39482,0,13,4
90942,2022-06-23,sint maarten (dutch part),0,39494,0,12,5
90943,2022-06-24,sint maarten (dutch part),26350,39505,8845,11,5


In [61]:
vaccine_df[vaccine_df.country=='netherlands'].tail()

Unnamed: 0,date,country,fully_vaccinated,total_vaccinations,total_boosters,daily_vaccinations,daily_people_vaccinated
71110,2022-06-07,netherlands,0,33297023,0,1982,222
71111,2022-06-08,netherlands,0,33298952,0,1929,215
71112,2022-06-09,netherlands,0,33300828,0,1876,208
71113,2022-06-10,netherlands,0,33302650,0,1822,201
71114,2022-06-11,netherlands,11980109,33304419,9295105,1769,194


In [None]:
#- 'china',
  # 'macao',

# 'oceania',
  # 'nauru',

# New Zealand
  # 'tokelau',

# Neatherland
  # 'sint maarten (dutch part)',

In [24]:
covid_df[covid_df.country=='australia'].tail()

Unnamed: 0,date,country,total_cases,total_deaths,new_cases,new_deaths
15227,2022-07-05,australia,1322636,1285,5891,4
15228,2022-07-05,australia,610745,545,4107,5
15229,2022-07-05,australia,195813,97,1673,1
15230,2022-07-05,australia,2156482,4032,9959,23
15231,2022-07-05,australia,933854,400,6255,2


In [43]:
covid_df[covid_df.country=='saint helena, ascension and tristan da cunha'].tail()

Unnamed: 0,date,country,total_cases,total_deaths,new_cases,new_deaths


In [None]:
set(cov)

## Rename

#(Done) 'falkland islands (malvinas)' --- 'falkland islands',
#(Done) 'congo (brazzaville)' --- 'democratic republic of congo',
#(Done) 'korea, north' --- 'north korea'
#(Done) 'korea, south' --- 'south korea' 
#(Done) 'congo (kinshasa)'    --- 'congo',
#(Done) 'taiwan*'.     --- 'taiwan',
#(Done) 'timor-leste'  --- 'timor'
#(Done) 'summer olympics 2020' --- 'japan'
#(Done) 'us'.          --- 'united states',
#(Done) 'burma'      ---  'myanmar',
#(Done) 'west bank and gaza'   --- 'palestine',
#(Done) 'bonaire, sint eustatius and saba' --- 'bonaire sint eustatius and saba',
#(Done) 'faroe islands' --- 'denmark'

### Australia
# 'australian capital territory',
# 'new south wales',
# 'western australia'
# 'tasmania',
# 'south australia',
# 'queensland',
# 'northern territory',
# 'victoria',

#`- 'Australian Capital Territory', 'New South Wales',
#`-       'Northern Territory', 'Queensland', 'South Australia', 'Tasmania',
#`-        'Victoria', 'Western Australia'


### China
# 'beijing',
# 'unknown',
# 'macau'
# 'xinjiang'
# 'zhejiang'
# 'fujian',
# 'gansu',
# 'anhui',
# 'chongqing',
# 'yunnan',
# 'winter olympics 2022',
# 'tianjin',
# 'shaanxi',
# 'shandong',
# 'shanghai',
# 'shanxi',
# 'sichuan',
# 'qinghai',
# 'ningxia',
# 'hubei',
# 'hunan',
# 'inner mongolia',
# 'jiangsu',
# 'jiangxi',
# 'jilin',
# 'liaoning',
# 'guangdong',
# 'guangxi',
# 'guizhou',
# 'hainan',
# 'hebei',
# 'heilongjiang',
# 'henan',
# 'tibet',
# 'anhui', 'beijing', 'chongqing','fujian', 'gansu', 'guangdong','guangxi', 'guizhou', 'hainan', 'hebei', 'heilongjiang', 'henan','hubei', 'hunan', 
# 'inner mongolia', 'jiangsu','jiangxi', 'jilin', 'liaoning', 'macau', 'ningxia', 'qinghai','shaanxi', 'shandong', 'shanghai', 'shanxi', 'sichuan', 
# 'tianjin','tibet', 'unknown', 'xinjiang', 'yunnan', 'zhejiang'
 
 
#  'Anhui', 'Beijing', 'Chongqing', 'Fujian', 'Gansu', 'Guangdong',
#         'Guangxi', 'Guizhou', 'Hainan', 'Hebei', 'Heilongjiang', 'Henan',
#         'Hong Kong', 'Hubei', 'Hunan', 'Inner Mongolia', 'Jiangsu',
#         'Jiangxi', 'Jilin', 'Liaoning', 'Macau', 'Ningxia', 'Qinghai',
#         'Shaanxi', 'Shandong', 'Shanghai', 'Shanxi', 'Sichuan', 'Tianjin',
#         'Tibet', 'Unknown', 'Xinjiang', 'Yunnan', 'Zhejiang'


### Canada
# 'alberta',
# 'british columbia',
# 'diamond princess',
# 'yukon',
# 'saskatchewan',
# 'quebec',
# 'prince edward island',
# 'northwest territories',
# 'nova scotia',
# 'nunavut',
# 'ontario',
# 'new brunswick',
# 'newfoundland and labrador',
# 'manitoba',
# 'repatriated travellers',
# 'grand princess',

#`- 'Alberta', 'British Columbia', 'Diamond Princess',
#`-        'Grand Princess', 'Manitoba', 'New Brunswick',
#`-        'Newfoundland and Labrador', 'Northwest Territories',
#`-        'Nova Scotia', 'Nunavut', 'Ontario', 'Prince Edward Island',
#`-        'Quebec', 'Repatriated Travellers', 'Saskatchewan', 'Yukon'


### France overseas
# 'sint maarten'
# 'saint pierre and miquelon',
# 'saint barthelemy',
# 'martinique',
# 'mayotte',
# 'french guiana',
# 'guadeloupe',
# 'reunion',
# 'st martin',

## UK
# 'channel islands',
# 'saint helena, ascension and tristan da cunha'



# 'eritrea',
# 'antarctica',
# 'cabo verde',
# 'holy see',
# 'marshall islands',
# 'micronesia',
# 'ms zaandam',
# 'palau',

# 'French Polynesia', 'New Caledonia', 'Wallis and Futuna',

In [None]:
set(vac)

## Remove
# 'africa',
# 'asia',
# 'europe',
# 'european union',
# 'north america',
# 'south america',
# 'high income',
# 'low income',
# 'lower middle income',
# 'upper middle income',
# 'world'

### 'united kingdom'
#(Done) 'wales'
#(Done) 'england',
#(Done) 'scotland',
#(Done) 'northern ireland',

#(Done)- 'australia',
#(Done)- 'canada',

#- 'china',
  # 'macao',

# 'oceania',
  # 'nauru',

# New Zealand
  # 'tokelau',

# Neatherland
  # 'sint maarten (dutch part)',

# 'French Guiana', 'French Polynesia', 'Guadeloupe', 'Martinique',
#        'Mayotte', 'New Caledonia', 'Reunion', 'Saint Barthelemy',
#        'Saint Pierre and Miquelon', 'St Martin', 'Wallis and Futuna'
       
# 'niue',
# 'northern cyprus',
# 'cape verde',
# 'faeroe islands',
# 'pitcairn',
# 'saint helena',
# 'turkmenistan',
# 'tuvalu',

In [None]:
inter

{'afghanistan',
 'albania',
 'algeria',
 'andorra',
 'angola',
 'anguilla',
 'antigua and barbuda',
 'argentina',
 'armenia',
 'aruba',
 'austria',
 'azerbaijan',
 'bahamas',
 'bahrain',
 'bangladesh',
 'barbados',
 'belarus',
 'belgium',
 'belize',
 'benin',
 'bermuda',
 'bhutan',
 'bolivia',
 'bosnia and herzegovina',
 'botswana',
 'brazil',
 'british virgin islands',
 'brunei',
 'bulgaria',
 'burkina faso',
 'burundi',
 'cambodia',
 'cameroon',
 'cayman islands',
 'central african republic',
 'chad',
 'chile',
 'colombia',
 'comoros',
 'cook islands',
 'costa rica',
 "cote d'ivoire",
 'croatia',
 'cuba',
 'curacao',
 'cyprus',
 'czechia',
 'denmark',
 'djibouti',
 'dominica',
 'dominican republic',
 'ecuador',
 'egypt',
 'el salvador',
 'equatorial guinea',
 'estonia',
 'eswatini',
 'ethiopia',
 'fiji',
 'finland',
 'france',
 'french polynesia',
 'gabon',
 'gambia',
 'georgia',
 'germany',
 'ghana',
 'gibraltar',
 'greece',
 'greenland',
 'grenada',
 'guatemala',
 'guernsey',
 'gui

In [None]:
uni

{'afghanistan',
 'africa',
 'albania',
 'alberta',
 'algeria',
 'andorra',
 'angola',
 'anguilla',
 'anhui',
 'antarctica',
 'antigua and barbuda',
 'argentina',
 'armenia',
 'aruba',
 'asia',
 'australia',
 'australian capital territory',
 'austria',
 'azerbaijan',
 'bahamas',
 'bahrain',
 'bangladesh',
 'barbados',
 'beijing',
 'belarus',
 'belgium',
 'belize',
 'benin',
 'bermuda',
 'bhutan',
 'bolivia',
 'bonaire sint eustatius and saba',
 'bonaire, sint eustatius and saba',
 'bosnia and herzegovina',
 'botswana',
 'brazil',
 'british columbia',
 'british virgin islands',
 'brunei',
 'bulgaria',
 'burkina faso',
 'burma',
 'burundi',
 'cabo verde',
 'cambodia',
 'cameroon',
 'canada',
 'cape verde',
 'cayman islands',
 'central african republic',
 'chad',
 'channel islands',
 'chile',
 'china',
 'chongqing',
 'colombia',
 'comoros',
 'congo',
 'congo (brazzaville)',
 'congo (kinshasa)',
 'cook islands',
 'costa rica',
 "cote d'ivoire",
 'croatia',
 'cuba',
 'curacao',
 'cyprus',
 '

In [None]:
vaccine_df[vaccine_df.Country=='France']

In [None]:
# merged = covid_df.combine_first(vaccine_df)
# merged = merged.fillna(0)
# merged = merged.astype({"Fully vaccinated":'int',
#                                 'Total vaccinations':'int',
#                                 'Total boosters':'int',
#                                 'Daily vaccinations':'int',
#                                 'Daily people vaccinated':'int'})
# merged = merged[['Date',
#                  'Country',
#                  'New cases',
#                  'New deaths',
#                  'New recovered',
#                  'Total boosters',
#                  'Total cases',
#                  'Total deaths',
#                  'Total recovered',
#                  'Daily people vaccinated',
#                  'Daily vaccinations',
#                  'Fully vaccinated',
#                  'Total vaccinations']]
# merged
covid_df.combine_first(vaccine_df)

Unnamed: 0,Country,Daily people vaccinated,Daily vaccinations,Date,Fully vaccinated,New cases,New deaths,Total boosters,Total cases,Total deaths,Total vaccinations
0,Afghanistan,0.0,0.0,2020-01-22,0.0,0,0,0.0,0,0,0.0
1,Afghanistan,1367.0,1367.0,2020-01-23,0.0,0,0,0.0,0,0,1367.0
2,Afghanistan,1367.0,1367.0,2020-01-24,0.0,0,0,0.0,0,0,2734.0
3,Afghanistan,1367.0,1367.0,2020-01-25,0.0,0,0,0.0,0,0,4101.0
4,Afghanistan,1367.0,1367.0,2020-01-26,0.0,0,0,0.0,0,0,5468.0
...,...,...,...,...,...,...,...,...,...,...,...
254785,Zimbabwe,,,2022-06-29,,137,4,,255520,5553,
254786,Zimbabwe,,,2022-06-30,,66,2,,255586,5555,
254787,Zimbabwe,,,2022-07-01,,0,0,,255586,5555,
254788,Zimbabwe,,,2022-07-02,,0,0,,255586,5555,


In [None]:
pd.merge(covid_df, vaccine_df, how="outer", on=["Date", "Country"])

Unnamed: 0,Date,Country,Total cases,Total deaths,New cases,New deaths,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
0,2020-01-22,Afghanistan,0.0,0.0,0.0,0.0,,,,,
1,2020-01-23,Afghanistan,0.0,0.0,0.0,0.0,,,,,
2,2020-01-24,Afghanistan,0.0,0.0,0.0,0.0,,,,,
3,2020-01-25,Afghanistan,0.0,0.0,0.0,0.0,,,,,
4,2020-01-26,Afghanistan,0.0,0.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
272925,2022-06-29,World,,,,,4.799123e+09,0.0,2.123141e+09,0.0,0.0
272926,2022-06-30,World,,,,,4.800716e+09,0.0,2.125459e+09,0.0,0.0
272927,2022-07-01,World,,,,,4.801855e+09,0.0,2.127183e+09,0.0,0.0
272928,2022-07-02,World,,,,,4.802835e+09,0.0,2.129358e+09,0.0,0.0


In [None]:
a = pd.merge(covid_df, vaccine_df, how="outer", on=["Date", "Country"])

Unnamed: 0,Date,Country,Total cases,Total deaths,New cases,New deaths,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
0,2020-01-22,Afghanistan,0.0,0.0,0.0,0.0,,,,,
1,2020-01-23,Afghanistan,0.0,0.0,0.0,0.0,,,,,
2,2020-01-24,Afghanistan,0.0,0.0,0.0,0.0,,,,,
3,2020-01-25,Afghanistan,0.0,0.0,0.0,0.0,,,,,
4,2020-01-26,Afghanistan,0.0,0.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
272925,2022-06-29,World,,,,,4.799123e+09,0.0,2.123141e+09,0.0,0.0
272926,2022-06-30,World,,,,,4.800716e+09,0.0,2.125459e+09,0.0,0.0
272927,2022-07-01,World,,,,,4.801855e+09,0.0,2.127183e+09,0.0,0.0
272928,2022-07-02,World,,,,,4.802835e+09,0.0,2.129358e+09,0.0,0.0


In [None]:
a[a.Country=='India']

Unnamed: 0,Date,Country,Total cases,Total deaths,New cases,New deaths,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
132312,2020-01-22,India,0.0,0.0,0.0,0.0,,,,,
132313,2020-01-23,India,0.0,0.0,0.0,0.0,,,,,
132314,2020-01-24,India,0.0,0.0,0.0,0.0,,,,,
132315,2020-01-25,India,0.0,0.0,0.0,0.0,,,,,
132316,2020-01-26,India,0.0,0.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
133201,2022-06-29,India,43452164.0,525116.0,18819.0,39.0,913642889.0,1.970128e+09,43868135.0,1432271.0,177132.0
133202,2022-06-30,India,43469234.0,525139.0,17070.0,23.0,914479703.0,1.971543e+09,44348776.0,1414474.0,178313.0
133203,2022-07-01,India,43486326.0,525168.0,17092.0,29.0,915190727.0,1.972913e+09,44697360.0,1369941.0,175455.0
133204,2022-07-02,India,43502429.0,525199.0,16103.0,31.0,915792386.0,1.974191e+09,45002940.0,1278094.0,165575.0


In [None]:
vaccine_df[vaccine_df.Country=='India']

Unnamed: 0,Date,Country,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
45795,2021-01-15,India,0,0,0,0,0
45796,2021-01-16,India,0,191181,0,191181,191181
45797,2021-01-17,India,0,303331,0,112150,112150
45798,2021-01-18,India,0,454681,0,151350,151350
45799,2021-01-19,India,0,623390,0,168709,168709
...,...,...,...,...,...,...,...
46325,2022-06-29,India,913642889,1970128269,43868135,1432271,177132
46326,2022-06-30,India,914479703,1971542743,44348776,1414474,178313
46327,2022-07-01,India,915190727,1972912684,44697360,1369941,175455
46328,2022-07-02,India,915792386,1974190778,45002940,1278094,165575


In [None]:
covid_df[covid_df.Country=='India']

Unnamed: 0,Date,Country,Total cases,Total deaths,New cases,New deaths
132312,2020-01-22,India,0,0,0,0
132313,2020-01-23,India,0,0,0,0
132314,2020-01-24,India,0,0,0,0
132315,2020-01-25,India,0,0,0,0
132316,2020-01-26,India,0,0,0,0
...,...,...,...,...,...,...
133201,2022-06-29,India,43452164,525116,18819,39
133202,2022-06-30,India,43469234,525139,17070,23
133203,2022-07-01,India,43486326,525168,17092,29
133204,2022-07-02,India,43502429,525199,16103,31


In [None]:
pd.concat([covid_df, vaccine_df], axis=0, ignore_index=True)

Unnamed: 0,Date,Country,Total cases,Total deaths,New cases,New deaths,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
0,2020-01-22,Afghanistan,0.0,0.0,0.0,0.0,,,,,
1,2020-01-23,Afghanistan,0.0,0.0,0.0,0.0,,,,,
2,2020-01-24,Afghanistan,0.0,0.0,0.0,0.0,,,,,
3,2020-01-25,Afghanistan,0.0,0.0,0.0,0.0,,,,,
4,2020-01-26,Afghanistan,0.0,0.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
366697,2022-06-22,Zimbabwe,,,,,4588340.0,11898709.0,1043552.0,7816.0,2587.0
366698,2022-06-23,Zimbabwe,,,,,4616555.0,11915330.0,1064036.0,16621.0,5081.0
366699,2022-06-24,Zimbabwe,,,,,4620396.0,11931309.0,1065656.0,15979.0,4958.0
366700,2022-06-25,Zimbabwe,,,,,4622428.0,11947500.0,1067073.0,16191.0,5001.0


In [None]:
pd.merge(covid_df, vaccine_df, how="outer", on=['Date','Country'])

Unnamed: 0,Date,Country,Total cases,Total deaths,New cases,New deaths,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
0,2020-01-22,Afghanistan,0.0,0.0,0.0,0.0,,,,,
1,2020-01-23,Afghanistan,0.0,0.0,0.0,0.0,,,,,
2,2020-01-24,Afghanistan,0.0,0.0,0.0,0.0,,,,,
3,2020-01-25,Afghanistan,0.0,0.0,0.0,0.0,,,,,
4,2020-01-26,Afghanistan,0.0,0.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
272925,2022-06-29,World,,,,,4.799123e+09,0.0,2.123141e+09,0.0,0.0
272926,2022-06-30,World,,,,,4.800716e+09,0.0,2.125459e+09,0.0,0.0
272927,2022-07-01,World,,,,,4.801855e+09,0.0,2.127183e+09,0.0,0.0
272928,2022-07-02,World,,,,,4.802835e+09,0.0,2.129358e+09,0.0,0.0


In [None]:
vaccine_df

Unnamed: 0,Date,Country,Fully vaccinated,Total vaccinations,Total boosters,Daily vaccinations,Daily people vaccinated
0,2021-02-22,Afghanistan,0,0,0,0,0
1,2021-02-23,Afghanistan,0,1367,0,1367,1367
2,2021-02-24,Afghanistan,0,2734,0,1367,1367
3,2021-02-25,Afghanistan,0,4101,0,1367,1367
4,2021-02-26,Afghanistan,0,5468,0,1367,1367
...,...,...,...,...,...,...,...
111907,2022-06-22,Zimbabwe,4588340,11898709,1043552,7816,2587
111908,2022-06-23,Zimbabwe,4616555,11915330,1064036,16621,5081
111909,2022-06-24,Zimbabwe,4620396,11931309,1065656,15979,4958
111910,2022-06-25,Zimbabwe,4622428,11947500,1067073,16191,5001


In [None]:
len(set(list(covid_df.Country.unique())) ^ set(list(vaccine_df.Country.unique())))

131

In [None]:
len(set(list(covid_df.Country.unique()))) - len(set(list(vaccine_df.Country.unique())))

49

In [None]:
set(list(vaccine_df.Country.unique())) - set(list(covid_df.Country.unique()))
# 'United States'
# 'South Korea'
# 'Australia'
# 'China'
# 'Myanmar'
# 'Canada'
# 'Democratic Republic of Congo','Congo'
# 'Falkland Islands'
# 'Taiwan'
# 'Saint Helena'

{'Africa',
 'Asia',
 'Australia',
 'Bonaire Sint Eustatius and Saba',
 'Canada',
 'Cape Verde',
 'China',
 'Congo',
 'Democratic Republic of Congo',
 'England',
 'Europe',
 'European Union',
 'Faeroe Islands',
 'Falkland Islands',
 'High income',
 'Low income',
 'Lower middle income',
 'Macao',
 'Myanmar',
 'Nauru',
 'Niue',
 'North America',
 'Northern Cyprus',
 'Northern Ireland',
 'Oceania',
 'Palestine',
 'Pitcairn',
 'Saint Helena',
 'Scotland',
 'Sint Maarten (Dutch part)',
 'South America',
 'South Korea',
 'Taiwan',
 'Timor',
 'Tokelau',
 'Turkmenistan',
 'Tuvalu',
 'United States',
 'Upper middle income',
 'Wales',
 'World'}

In [None]:
set(list(covid_df.Country.unique())) - set(list(vaccine_df.Country.unique()))
# 'Korea, South'
# 'US'
# 'Australian Capital Territory','Western Australia',
# 'Congo (Brazzaville)','Congo (Kinshasa)
# 'Falkland Islands (Malvinas)'
# 'Taiwan*'
# 'Saint Helena, Ascension and Tristan da Cunha',

{'Alberta',
 'Anhui',
 'Antarctica',
 'Australian Capital Territory',
 'Beijing',
 'Bonaire, Sint Eustatius and Saba',
 'British Columbia',
 'Burma',
 'Cabo Verde',
 'Channel Islands',
 'Chongqing',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'Diamond Princess',
 'Eritrea',
 'Falkland Islands (Malvinas)',
 'Faroe Islands',
 'French Guiana',
 'Fujian',
 'Gansu',
 'Grand Princess',
 'Guadeloupe',
 'Guangdong',
 'Guangxi',
 'Guizhou',
 'Hainan',
 'Hebei',
 'Heilongjiang',
 'Henan',
 'Holy See',
 'Hubei',
 'Hunan',
 'Inner Mongolia',
 'Jiangsu',
 'Jiangxi',
 'Jilin',
 'Korea, North',
 'Korea, South',
 'Liaoning',
 'MS Zaandam',
 'Macau',
 'Manitoba',
 'Marshall Islands',
 'Martinique',
 'Mayotte',
 'Micronesia',
 'New Brunswick',
 'New South Wales',
 'Newfoundland and Labrador',
 'Ningxia',
 'Northern Territory',
 'Northwest Territories',
 'Nova Scotia',
 'Nunavut',
 'Ontario',
 'Palau',
 'Prince Edward Island',
 'Qinghai',
 'Quebec',
 'Queensland',
 'Repatriated Travellers',
 'Reunion',

In [None]:
list(covid_df.Country.unique())
list(vaccine_df.Country.unique())
len(list(set(list(covid_df.Country.unique())).intersection(list(vaccine_df.Country.unique()))))

194

In [None]:
set(list(covid_df.Country.unique())) ^ set(list(vaccine_df.Country.unique()))

{'Africa',
 'Alberta',
 'Anhui',
 'Antarctica',
 'Asia',
 'Australia',
 'Australian Capital Territory',
 'Beijing',
 'Bonaire Sint Eustatius and Saba',
 'Bonaire, Sint Eustatius and Saba',
 'British Columbia',
 'Burma',
 'Cabo Verde',
 'Canada',
 'Cape Verde',
 'Channel Islands',
 'China',
 'Chongqing',
 'Congo',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'Democratic Republic of Congo',
 'Diamond Princess',
 'England',
 'Eritrea',
 'Europe',
 'European Union',
 'Faeroe Islands',
 'Falkland Islands',
 'Falkland Islands (Malvinas)',
 'Faroe Islands',
 'French Guiana',
 'Fujian',
 'Gansu',
 'Grand Princess',
 'Guadeloupe',
 'Guangdong',
 'Guangxi',
 'Guizhou',
 'Hainan',
 'Hebei',
 'Heilongjiang',
 'Henan',
 'High income',
 'Holy See',
 'Hubei',
 'Hunan',
 'Inner Mongolia',
 'Jiangsu',
 'Jiangxi',
 'Jilin',
 'Korea, North',
 'Korea, South',
 'Liaoning',
 'Low income',
 'Lower middle income',
 'MS Zaandam',
 'Macao',
 'Macau',
 'Manitoba',
 'Marshall Islands',
 'Martinique',
 'Mayotte',