In [90]:
# Check if libraries are installed
!pip show numpy
!pip show pandas

Name: numpy
Version: 1.21.6
Summary: NumPy is the fundamental package for array computing with Python.
Home-page: https://www.numpy.org
Author: Travis E. Oliphant et al.
Author-email: 
License: BSD
Location: /usr/local/lib/python3.8/dist-packages
Requires: 
Required-by: aeppl, aesara, albumentations, altair, arviz, astropy, atari-py, autograd, blis, bokeh, cftime, cmdstanpy, cufflinks, cvxpy, daft, datascience, db-dtypes, ecos, fa2, fastdtw, fix-yahoo-finance, folium, gensim, gym, h5py, holoviews, httpstan, hyperopt, imageio, imbalanced-learn, imgaug, jax, jaxlib, jpeg4py, kapre, Keras-Preprocessing, librosa, lightgbm, matplotlib, matplotlib-venn, missingno, mizani, mlxtend, moviepy, netCDF4, nibabel, numba, numexpr, opencv-contrib-python, opencv-python, opencv-python-headless, opt-einsum, osqp, pandas, pandas-gbq, patsy, plotnine, prophet, pyarrow, pycocotools, pyemd, pyerfa, pymc, pystan, python-louvain, PyWavelets, qdldl, qudida, resampy, scikit-image, scikit-learn, scipy, scs, seab

In [91]:
# Import libraries
import numpy as np
import pandas as pd

In [92]:
# Fetch file and cast date to type datetime64 (as requested)
country_vaccinations = pd.read_csv('/content/sample_data/country_vaccinations.csv')
country_vaccinations['date'] = pd.to_datetime(country_vaccinations['date'])

In [93]:
# Check data types
country_vaccinations.dtypes

country                                        object
iso_code                                       object
date                                   datetime64[ns]
total_vaccinations                            float64
people_vaccinated                             float64
people_fully_vaccinated                       float64
daily_vaccinations_raw                        float64
daily_vaccinations                            float64
total_vaccinations_per_hundred                float64
people_vaccinated_per_hundred                 float64
people_fully_vaccinated_per_hundred           float64
daily_vaccinations_per_million                float64
vaccines                                       object
source_name                                    object
source_website                                 object
dtype: object

In [94]:
# Check data information
country_vaccinations.info

<bound method DataFrame.info of            country iso_code       date  total_vaccinations  people_vaccinated  \
0      Afghanistan      AFG 2021-02-22                 0.0                0.0   
1      Afghanistan      AFG 2021-02-23                 NaN                NaN   
2      Afghanistan      AFG 2021-02-24                 NaN                NaN   
3      Afghanistan      AFG 2021-02-25                 NaN                NaN   
4      Afghanistan      AFG 2021-02-26                 NaN                NaN   
...            ...      ...        ...                 ...                ...   
86507     Zimbabwe      ZWE 2022-03-25           8691642.0          4814582.0   
86508     Zimbabwe      ZWE 2022-03-26           8791728.0          4886242.0   
86509     Zimbabwe      ZWE 2022-03-27           8845039.0          4918147.0   
86510     Zimbabwe      ZWE 2022-03-28           8934360.0          4975433.0   
86511     Zimbabwe      ZWE 2022-03-29           9039729.0          5053114.0

In [95]:
# This represents the total vaccinations per day of different groups of companies
# It is impossible to calculate per individual company with the current data given that the number of vaccines per company is not specified (only per group)
print('Total vaccines per group of companies')
country_vaccinations[['vaccines', 'daily_vaccinations']].groupby('vaccines').sum()

Total vaccines per group of companies


Unnamed: 0_level_0,daily_vaccinations
vaccines,Unnamed: 1_level_1
"Abdala, Johnson&Johnson, Oxford/AstraZeneca, Pfizer/BioNTech, Soberana02, Sputnik Light, Sputnik V",9616160.0
"Abdala, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V",201816053.0
"Abdala, Sinopharm/Beijing, Sinovac, Soberana02, Sputnik Light, Sputnik V",37861146.0
"Abdala, Soberana Plus, Soberana02",33802957.0
"COVIran Barekat, Covaxin, FAKHRAVAC, Oxford/AstraZeneca, Razi Cov Pars, Sinopharm/Beijing, Soberana02, SpikoGen, Sputnik V",146357015.0
...,...
"Pfizer/BioNTech, Sinovac, Turkovac",147484328.0
"Pfizer/BioNTech, Sputnik V",68643.0
"QazVac, Sinopharm/Beijing, Sputnik V",20891256.0
Sinopharm/Beijing,893824.0


In [96]:
print('Total vaccines applied in the world')
int(country_vaccinations['daily_vaccinations'].sum())

Total vaccines applied in the world


11320239871

In [97]:
print('Average vaccines per country')
country_vaccinations['daily_vaccinations'].sum()/country_vaccinations['country'].unique().size

Average vaccines per country


50763407.49327354

In [98]:
print('Vaccines applied on 29/01/21')
date = pd.to_datetime('29/01/2021')
int(country_vaccinations.loc[country_vaccinations['date']==date, ['daily_vaccinations']].sum())

Vaccines applied on 29/01/21


4884052

In [99]:
print('Differences between daily_vaccinations and daily_vaccinations_raw')

conDiferencias = country_vaccinations
conDiferencias['diferencias'] = conDiferencias['daily_vaccinations']-conDiferencias['daily_vaccinations_raw']
conDiferencias[['country', 'date', 'daily_vaccinations', 'daily_vaccinations_raw', 'diferencias']]

Differences between daily_vaccinations and daily_vaccinations_raw


Unnamed: 0,country,date,daily_vaccinations,daily_vaccinations_raw,diferencias
0,Afghanistan,2021-02-22,,,
1,Afghanistan,2021-02-23,1367.0,,
2,Afghanistan,2021-02-24,1367.0,,
3,Afghanistan,2021-02-25,1367.0,,
4,Afghanistan,2021-02-26,1367.0,,
...,...,...,...,...,...
86507,Zimbabwe,2022-03-25,69579.0,139213.0,-69634.0
86508,Zimbabwe,2022-03-26,83429.0,100086.0,-16657.0
86509,Zimbabwe,2022-03-27,90629.0,53311.0,37318.0
86510,Zimbabwe,2022-03-28,100614.0,89321.0,11293.0


In [100]:
print('Days between oldest and earliest entry')
country_vaccinations['date'].max() - country_vaccinations['date'].min()

Days between oldest and earliest entry


Timedelta('482 days 00:00:00')

In [101]:
print('Different vaccines used each day separated by /')
conCantidad = country_vaccinations
conCantidad['conVac'] = conCantidad['vaccines'].str.split('/').str.len()
conCantidad[['country', 'date', 'vaccines', 'conVac']]

Different vaccines used each day separated by /


Unnamed: 0,country,date,vaccines,conVac
0,Afghanistan,2021-02-22,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",4
1,Afghanistan,2021-02-23,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",4
2,Afghanistan,2021-02-24,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",4
3,Afghanistan,2021-02-25,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",4
4,Afghanistan,2021-02-26,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",4
...,...,...,...,...
86507,Zimbabwe,2022-03-25,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",3
86508,Zimbabwe,2022-03-26,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",3
86509,Zimbabwe,2022-03-27,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",3
86510,Zimbabwe,2022-03-28,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",3


In [102]:
print('All entries before 20/12/2020')
date = pd.to_datetime('20/12/2020')
antes20 = country_vaccinations.loc[country_vaccinations['date']<date]
antes20

All entries before 20/12/2020


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website,diferencias,conVac
13403,Canada,CAN,2020-12-14,5.0,5.0,,,,0.00,0.00,,,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Official data from provinces via covid19tracke...,https://covid19tracker.ca/vaccinationtracker.html,,3
13404,Canada,CAN,2020-12-15,727.0,727.0,,722.0,722.0,0.00,0.00,,19.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Official data from provinces via covid19tracke...,https://covid19tracker.ca/vaccinationtracker.html,0.0,3
13405,Canada,CAN,2020-12-16,3025.0,3025.0,,2298.0,1510.0,0.01,0.01,,40.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Official data from provinces via covid19tracke...,https://covid19tracker.ca/vaccinationtracker.html,-788.0,3
13406,Canada,CAN,2020-12-17,7279.0,7279.0,,4254.0,2425.0,0.02,0.02,,64.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Official data from provinces via covid19tracke...,https://covid19tracker.ca/vaccinationtracker.html,-1829.0,3
13407,Canada,CAN,2020-12-18,11296.0,11296.0,,4017.0,2823.0,0.03,0.03,,74.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Official data from provinces via covid19tracke...,https://covid19tracker.ca/vaccinationtracker.html,-1194.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82362,United States,USA,2020-12-15,84638.0,76984.0,6281.0,49771.0,27175.0,0.03,0.02,0.0,82.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://data.cdc.gov/Vaccinations/COVID-19-Vac...,-22596.0,2
82363,United States,USA,2020-12-16,244549.0,231496.0,6757.0,159911.0,71420.0,0.07,0.07,0.0,215.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://data.cdc.gov/Vaccinations/COVID-19-Vac...,-88491.0,2
82364,United States,USA,2020-12-17,517161.0,496980.0,7507.0,272612.0,121718.0,0.16,0.15,0.0,367.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://data.cdc.gov/Vaccinations/COVID-19-Vac...,-150894.0,2
82365,United States,USA,2020-12-18,933551.0,904656.0,8718.0,416390.0,180653.0,0.28,0.27,0.0,544.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://data.cdc.gov/Vaccinations/COVID-19-Vac...,-235737.0,2


In [103]:
print("All entries where the pfizer vaccine was used")
pfizer = country_vaccinations[country_vaccinations['vaccines'].str.contains('Pfizer', case=False)]
pfizer

All entries where the pfizer vaccine was used


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website,diferencias,conVac
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.00,0.00,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/,,4
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/,,4
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/,,4
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/,,4
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85069,Wales,OWID_WLS,2022-03-24,6921195.0,2526832.0,2397677.0,1756.0,1705.0,218.33,79.71,75.64,538.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...,-51.0,3
85070,Wales,OWID_WLS,2022-03-25,6923298.0,2527100.0,2398097.0,2103.0,1667.0,218.40,79.72,75.65,526.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...,-436.0,3
85071,Wales,OWID_WLS,2022-03-26,6923706.0,2527184.0,2398322.0,408.0,1519.0,218.41,79.72,75.66,479.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...,1111.0,3
85072,Wales,OWID_WLS,2022-03-27,6925183.0,2528051.0,2398676.0,1477.0,1505.0,218.46,79.75,75.67,475.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...,28.0,3


In [104]:
# Save to excel the generated dataframes
with pd.ExcelWriter("/content/resultadosReto.xlsx") as writer:
  conDiferencias.to_excel(writer, sheet_name="conDiferencias")
  conCantidad.to_excel(writer, sheet_name="conCantidad")
  antes20.to_excel(writer, sheet_name="antes20")
  pfizer.to_excel(writer, sheet_name="pfizer")