# Covid project (Triade.io)
Responsible: Luís Fernando Matias de Farias (luis.farias@triade.io)

From the 31 December 2019 to the 21 March 2020, WHO collected the numbers of confirmed COVID-19 cases and deaths through official communications under the International Health Regulations (IHR, 2005), complemented by monitoring the official ministries of health websites and social media accounts. Since 22 March 2020, global data are compiled through WHO region-specific dashboards (see links below), and/or aggregate count data reported to WHO headquarters daily.

[Data source link](https://covid19.who.int/data)

## Import libraries and data

In [1]:
import pandas as pd

In [2]:
gb_data = pd.read_csv("../data/global-data.csv", parse_dates=['Date_reported'])
gb_tb_data = pd.read_csv("../data/global-table-data.csv")

vaccina = pd.read_csv("../data/vaccination-data.csv", parse_dates=['DATE_UPDATED'])
meta_vaccina = pd.read_csv("../data/vaccination-metadata.csv")

population = pd.read_csv("../data/Population-Estimates/Population-EstimatesData.csv", 
                         index_col=False,
                         parse_dates=['2018', '2019', '2020', '2021', '2022']
                        )

## Name of the columns

In [23]:
gb_data.columns

Index(['Date_reported', 'Country_code', 'Country', 'WHO_region', 'New_cases',
       'Cumulative_cases', 'New_deaths', 'Cumulative_deaths'],
      dtype='object')

In [24]:
gb_tb_data.columns

Index(['Name', 'WHO Region', 'Cases - cumulative total',
       'Cases - cumulative total per 100000 population',
       'Cases - newly reported in last 7 days',
       'Cases - newly reported in last 7 days per 100000 population',
       'Cases - newly reported in last 24 hours', 'Deaths - cumulative total',
       'Deaths - cumulative total per 100000 population',
       'Deaths - newly reported in last 7 days',
       'Deaths - newly reported in last 7 days per 100000 population',
       'Deaths - newly reported in last 24 hours'],
      dtype='object')

In [25]:
vaccina.columns

Index(['COUNTRY', 'ISO3', 'WHO_REGION', 'DATA_SOURCE', 'DATE_UPDATED',
       'TOTAL_VACCINATIONS', 'PERSONS_VACCINATED_1PLUS_DOSE',
       'TOTAL_VACCINATIONS_PER100', 'PERSONS_VACCINATED_1PLUS_DOSE_PER100',
       'PERSONS_FULLY_VACCINATED', 'PERSONS_FULLY_VACCINATED_PER100',
       'VACCINES_USED', 'FIRST_VACCINE_DATE', 'NUMBER_VACCINES_TYPES_USED',
       'PERSONS_BOOSTER_ADD_DOSE', 'PERSONS_BOOSTER_ADD_DOSE_PER100'],
      dtype='object')

In [26]:
meta_vaccina.columns

Index(['ISO3', 'VACCINE_NAME', 'PRODUCT_NAME', 'COMPANY_NAME',
       'AUTHORIZATION_DATE', 'START_DATE', 'END_DATE', 'COMMENT',
       'DATA_SOURCE'],
      dtype='object')

In [121]:
population.columns

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022',
       '2023', '2024', '2025', '2026', '2027', '2028', '2029', '2030', '2031',
       '2032', '2033', '2034', '2035', '2036', '2037', '2038', '2039', '2040',
       '2041', '2042', '2043', '2044', '2045', '2046', '2047', '2048', '2049',
       '2050', 'Unnamed: 95'],
      dtype='object')

In [5]:
gb_data.head()

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0


In [3]:
vaccina.shape

(228, 16)

In [3]:
vaccina.head()

Unnamed: 0,COUNTRY,ISO3,WHO_REGION,DATA_SOURCE,DATE_UPDATED,TOTAL_VACCINATIONS,PERSONS_VACCINATED_1PLUS_DOSE,TOTAL_VACCINATIONS_PER100,PERSONS_VACCINATED_1PLUS_DOSE_PER100,PERSONS_FULLY_VACCINATED,PERSONS_FULLY_VACCINATED_PER100,VACCINES_USED,FIRST_VACCINE_DATE,NUMBER_VACCINES_TYPES_USED,PERSONS_BOOSTER_ADD_DOSE,PERSONS_BOOSTER_ADD_DOSE_PER100
0,Afghanistan,AFG,EMRO,REPORTING,2022-04-04,5872684.0,5188057.0,15.086,13.327,4532577.0,11.643,"Beijing CNBG - BBIBP-CorV,Janssen - Ad26.COV 2...",2021-02-22,4.0,,
1,Albania,ALB,EURO,REPORTING,2022-04-03,2801184.0,1303431.0,97.3,45.799,1226522.0,43.097,"AstraZeneca - Vaxzevria,Gamaleya - Gam-Covid-V...",2021-01-13,5.0,271231.0,9.53
2,Algeria,DZA,AFRO,REPORTING,2022-03-09,13704895.0,7461932.0,31.253,17.017,6110712.0,13.935,"Beijing CNBG - BBIBP-CorV,Gamaleya - Gam-Covid...",2021-01-30,4.0,490676.0,1.119
3,American Samoa,ASM,WPRO,REPORTING,2022-03-17,97346.0,43637.0,176.361,79.057,39137.0,70.904,"Janssen - Ad26.COV 2-S,Moderna - Spikevax,Pfiz...",2020-12-21,3.0,15135.0,27.42
4,Andorra,AND,EURO,REPORTING,2022-03-27,152187.0,57835.0,197.0,75.922,53389.0,70.085,"AstraZeneca - Vaxzevria,Moderna - Spikevax,Pfi...",2021-01-20,3.0,40963.0,53.773


In [5]:
meta_vaccina.head()

Unnamed: 0,ISO3,VACCINE_NAME,PRODUCT_NAME,COMPANY_NAME,AUTHORIZATION_DATE,START_DATE,END_DATE,COMMENT,DATA_SOURCE
0,JEY,Moderna - mRNA-1273,mRNA-1273,Moderna,,,,,OWID
1,JEY,AstraZeneca - AZD1222,AZD1222,AstraZeneca,,,,,OWID
2,JEY,Pfizer BioNTech - Comirnaty,Comirnaty,Pfizer BioNTech,,,,,OWID
3,GGY,Moderna - mRNA-1273,mRNA-1273,Moderna,,,,,OWID
4,GGY,AstraZeneca - AZD1222,AZD1222,AstraZeneca,,,,,OWID


## Check the requirements

### Attributes

#### Contry
In the dataset are used ISO2 and ISO3, be careful

In [31]:
gb_data.shape

(195999, 8)

In [6]:
gb_data[gb_data['Country'] == 'Afghanistan']

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0
...,...,...,...,...,...,...,...,...
822,2022-04-04,AF,Afghanistan,EMRO,22,177827,0,7671
823,2022-04-05,AF,Afghanistan,EMRO,70,177897,0,7671
824,2022-04-06,AF,Afghanistan,EMRO,35,177932,0,7671
825,2022-04-07,AF,Afghanistan,EMRO,42,177974,0,7671


In [13]:
gb_data['Country'].value_counts()

Afghanistan        827
Paraguay           827
Nigeria            827
Niue               827
North Macedonia    827
                  ... 
Grenada            827
Guadeloupe         827
Guam               827
Guatemala          827
Zimbabwe           827
Name: Country, Length: 237, dtype: int64

In [29]:
gb_data.isnull().sum()

Date_reported          0
Country_code         827
Country                0
WHO_region             0
New_cases              0
Cumulative_cases       0
New_deaths             0
Cumulative_deaths      0
dtype: int64

In [37]:
country_without_code = gb_data.loc[(gb_data['Country_code'].isnull() == True)]
country_without_code['Country'].value_counts()

Namibia    827
Name: Country, dtype: int64

In [45]:
vaccina['COUNTRY'].value_counts()

Afghanistan    1
Netherlands    1
New Zealand    1
Nicaragua      1
Niger          1
              ..
Gibraltar      1
Greece         1
Greenland      1
Grenada        1
Zimbabwe       1
Name: COUNTRY, Length: 228, dtype: int64

#### Region

In [53]:
gb_tb_data.head(2)

Unnamed: 0,Name,WHO Region,Cases - cumulative total,Cases - cumulative total per 100000 population,Cases - newly reported in last 7 days,Cases - newly reported in last 7 days per 100000 population,Cases - newly reported in last 24 hours,Deaths - cumulative total,Deaths - cumulative total per 100000 population,Deaths - newly reported in last 7 days,Deaths - newly reported in last 7 days per 100000 population,Deaths - newly reported in last 24 hours
Global,,494587638,6345.308578,7708594,98.897352,1138093,6170283,79.161602,23201,0.297657,3905,
United States of America,Americas,79544396,24031.347,201598,60.905,43389,976516,295.018,3480,1.051,976,


In [54]:
gb_data['WHO_region'].value_counts()

EURO     51274
AMRO     46312
AFRO     41350
WPRO     28945
EMRO     18194
SEARO     9097
Other      827
Name: WHO_region, dtype: int64

#### Date

In [68]:
gb_data.dtypes

Date_reported        datetime64[ns]
Country_code                 object
Country                      object
WHO_region                   object
New_cases                     int64
Cumulative_cases              int64
New_deaths                    int64
Cumulative_deaths             int64
dtype: object

In [78]:
gb_data.sort_values(by="Date_reported").head(1)

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0


In [79]:
gb_data.sort_values(by="Date_reported").tail(1)

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
195998,2022-04-08,ZW,Zimbabwe,AFRO,67,246870,2,5455


#### Vaccine and Company

In [None]:
vaccina.shape

(228, 16)

In [None]:
meta_vaccina.shape

(887, 9)

In [100]:
meta_vaccina['VACCINE_NAME'].value_counts()

Pfizer BioNTech - Comirnaty    167
AstraZeneca - Vaxzevria        129
Janssen - Ad26.COV 2-S         113
Moderna - Spikevax             102
Beijing CNBG - BBIBP-CorV       95
SII - Covishield                91
Gamaleya - Gam-Covid-Vac        61
Sinovac - CoronaVac             53
Bharat - Covaxin                12
Gamaleya - Sputnik-Light         9
AstraZeneca - AZD1222            8
CanSino - Convidecia             7
Novavax-NUVAXOVID                6
Moderna - mRNA-1273              5
CIGB - CIGB-66                   4
Finlay - Soberana-02             4
Wuhan CNBG - Inactivated         3
Anhui ZL - Zifivax               3
Julphar - Hayat-Vax              3
RIBSP - QazVac                   3
Finlay - Soberana Plus           2
SRCVB - EpiVacCorona             2
Zydus - ZyCov-D                  1
IMB - Covidful                   1
Shenzhen - LV-SMENP-DC           1
Turkovac                         1
Shifa - COVIran Barakat          1
Name: VACCINE_NAME, dtype: int64

In [97]:
meta_vaccina['PRODUCT_NAME'].value_counts()

Comirnaty                         167
Vaxzevria                         129
Ad26.COV 2-S                      113
Spikevax                          102
BBIBP-CorV                         95
Covishield                         91
Gam-Covid-Vac                      61
Coronavac                          53
Covaxin                            12
Sputnik-Light                       9
AZD1222                             8
Convidecia                          7
NUVAXOVID                           6
mRNA-1273                           5
CIGB-66                             4
Soberana-02                         4
Inactivated SARS-CoV-2 vaccine      3
QazVac                              3
Zifivax                             3
Hayat-Vax                           3
Soberana Plus                       2
EpiVacCorona                        2
ZyCov-D                             1
Covidful                            1
LV-SMENP-DC                         1
COVIran Barakat                     1
Name: PRODUC

In [101]:
meta_vaccina['COMPANY_NAME'].value_counts()

Pfizer BioNTech                                      167
AstraZeneca                                          137
Janssen Pharmaceuticals                              113
Moderna                                              107
Beijing Bio-Institute Biological Products (CNBG)      95
Serum Institute of India                              91
Gamaleya Research Institute                           70
Sinovac                                               53
Bharat Biotech                                        12
CanSino Biologicals                                    7
Instituto Finlay de Vacunas                            6
Novavax                                                6
Center for Genetic Engineering and Biotechnology       4
Anhui Zhifei Longcom Biopharmaceutical                 3
Wuhan Institute of Biological Products (CNBG)          3
Research Institute for Biological Safety Problems      3
State Research Center of Virology & Biotechnology      2
Shenzhen GenoImmune Medical Ins

In [98]:
meta_vaccina.isnull().sum()

ISO3                    0
VACCINE_NAME            0
PRODUCT_NAME            1
COMPANY_NAME            5
AUTHORIZATION_DATE    497
START_DATE            263
END_DATE              887
COMMENT               887
DATA_SOURCE             0
dtype: int64

In [99]:
meta_vaccina.loc[(meta_vaccina['PRODUCT_NAME'].isnull() == True)]

Unnamed: 0,ISO3,VACCINE_NAME,PRODUCT_NAME,COMPANY_NAME,AUTHORIZATION_DATE,START_DATE,END_DATE,COMMENT,DATA_SOURCE
18,TUR,Turkovac,,,,,,,OWID


In [10]:
meta_vaccina.loc[(meta_vaccina['VACCINE_NAME'] == 'Beijing CNBG - BBIBP-CorV')]

Unnamed: 0,ISO3,VACCINE_NAME,PRODUCT_NAME,COMPANY_NAME,AUTHORIZATION_DATE,START_DATE,END_DATE,COMMENT,DATA_SOURCE
98,DZA,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),2021-02-10,,,,REPORTING
99,TKM,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),,2021-02-24,,,REPORTING
100,CHN,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),2020-07-22,2020-07-22,,,REPORTING
101,AFG,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),,,,,REPORTING
102,SLV,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),2021-06-03,2021-06-22,,,REPORTING
...,...,...,...,...,...,...,...,...,...
188,KEN,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),,,,,REPORTING
189,LSO,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),,,,,REPORTING
190,MDG,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),2021-08-05,,,,REPORTING
191,PRT,Beijing CNBG - BBIBP-CorV,BBIBP-CorV,Beijing Bio-Institute Biological Products (CNBG),,2020-12-23,,,REPORTING


In [102]:
vaccina.head()

Unnamed: 0,COUNTRY,ISO3,WHO_REGION,DATA_SOURCE,DATE_UPDATED,TOTAL_VACCINATIONS,PERSONS_VACCINATED_1PLUS_DOSE,TOTAL_VACCINATIONS_PER100,PERSONS_VACCINATED_1PLUS_DOSE_PER100,PERSONS_FULLY_VACCINATED,PERSONS_FULLY_VACCINATED_PER100,VACCINES_USED,FIRST_VACCINE_DATE,NUMBER_VACCINES_TYPES_USED,PERSONS_BOOSTER_ADD_DOSE,PERSONS_BOOSTER_ADD_DOSE_PER100
0,Afghanistan,AFG,EMRO,REPORTING,2022-04-04,5872684.0,5188057.0,15.086,13.327,4532577.0,11.643,"Beijing CNBG - BBIBP-CorV,Janssen - Ad26.COV 2...",2021-02-22,4.0,,
1,Albania,ALB,EURO,REPORTING,2022-04-03,2801184.0,1303431.0,97.3,45.799,1226522.0,43.097,"AstraZeneca - Vaxzevria,Gamaleya - Gam-Covid-V...",2021-01-13,5.0,271231.0,9.53
2,Algeria,DZA,AFRO,REPORTING,2022-03-09,13704895.0,7461932.0,31.253,17.017,6110712.0,13.935,"Beijing CNBG - BBIBP-CorV,Gamaleya - Gam-Covid...",2021-01-30,4.0,490676.0,1.119
3,American Samoa,ASM,WPRO,REPORTING,2022-03-17,97346.0,43637.0,176.361,79.057,39137.0,70.904,"Janssen - Ad26.COV 2-S,Moderna - Spikevax,Pfiz...",2020-12-21,3.0,15135.0,27.42
4,Andorra,AND,EURO,REPORTING,2022-03-27,152187.0,57835.0,197.0,75.922,53389.0,70.085,"AstraZeneca - Vaxzevria,Moderna - Spikevax,Pfi...",2021-01-20,3.0,40963.0,53.773


### Measures

#### Total Cases and Total Deaths

In [106]:
gb_data

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0
...,...,...,...,...,...,...,...,...
195994,2022-04-04,ZW,Zimbabwe,AFRO,44,246525,0,5446
195995,2022-04-05,ZW,Zimbabwe,AFRO,87,246612,5,5451
195996,2022-04-06,ZW,Zimbabwe,AFRO,132,246744,0,5451
195997,2022-04-07,ZW,Zimbabwe,AFRO,59,246803,2,5453


#### Vaccinated

In [108]:
vaccina.columns

Index(['COUNTRY', 'ISO3', 'WHO_REGION', 'DATA_SOURCE', 'DATE_UPDATED',
       'TOTAL_VACCINATIONS', 'PERSONS_VACCINATED_1PLUS_DOSE',
       'TOTAL_VACCINATIONS_PER100', 'PERSONS_VACCINATED_1PLUS_DOSE_PER100',
       'PERSONS_FULLY_VACCINATED', 'PERSONS_FULLY_VACCINATED_PER100',
       'VACCINES_USED', 'FIRST_VACCINE_DATE', 'NUMBER_VACCINES_TYPES_USED',
       'PERSONS_BOOSTER_ADD_DOSE', 'PERSONS_BOOSTER_ADD_DOSE_PER100'],
      dtype='object')

In [109]:
meta_vaccina.columns

Index(['ISO3', 'VACCINE_NAME', 'PRODUCT_NAME', 'COMPANY_NAME',
       'AUTHORIZATION_DATE', 'START_DATE', 'END_DATE', 'COMMENT',
       'DATA_SOURCE'],
      dtype='object')

In [107]:
vaccina

Unnamed: 0,COUNTRY,ISO3,WHO_REGION,DATA_SOURCE,DATE_UPDATED,TOTAL_VACCINATIONS,PERSONS_VACCINATED_1PLUS_DOSE,TOTAL_VACCINATIONS_PER100,PERSONS_VACCINATED_1PLUS_DOSE_PER100,PERSONS_FULLY_VACCINATED,PERSONS_FULLY_VACCINATED_PER100,VACCINES_USED,FIRST_VACCINE_DATE,NUMBER_VACCINES_TYPES_USED,PERSONS_BOOSTER_ADD_DOSE,PERSONS_BOOSTER_ADD_DOSE_PER100
0,Afghanistan,AFG,EMRO,REPORTING,2022-04-04,5872684.0,5188057.0,15.086,13.327,4532577.0,11.643,"Beijing CNBG - BBIBP-CorV,Janssen - Ad26.COV 2...",2021-02-22,4.0,,
1,Albania,ALB,EURO,REPORTING,2022-04-03,2801184.0,1303431.0,97.300,45.799,1226522.0,43.097,"AstraZeneca - Vaxzevria,Gamaleya - Gam-Covid-V...",2021-01-13,5.0,271231.0,9.530
2,Algeria,DZA,AFRO,REPORTING,2022-03-09,13704895.0,7461932.0,31.253,17.017,6110712.0,13.935,"Beijing CNBG - BBIBP-CorV,Gamaleya - Gam-Covid...",2021-01-30,4.0,490676.0,1.119
3,American Samoa,ASM,WPRO,REPORTING,2022-03-17,97346.0,43637.0,176.361,79.057,39137.0,70.904,"Janssen - Ad26.COV 2-S,Moderna - Spikevax,Pfiz...",2020-12-21,3.0,15135.0,27.420
4,Andorra,AND,EURO,REPORTING,2022-03-27,152187.0,57835.0,197.000,75.922,53389.0,70.085,"AstraZeneca - Vaxzevria,Moderna - Spikevax,Pfi...",2021-01-20,3.0,40963.0,53.773
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,Viet Nam,VNM,WPRO,REPORTING,2022-03-24,204566009.0,79974238.0,210.159,82.161,76300975.0,78.387,"AstraZeneca - Vaxzevria,Beijing CNBG - BBIBP-C...",2021-03-08,6.0,46791620.0,48.071
224,Wallis and Futuna,WLF,WPRO,REPORTING,2022-04-01,16287.0,6576.0,144.825,58.474,6608.0,58.759,Moderna - Spikevax,2021-03-19,1.0,3103.0,27.592
225,Yemen,YEM,EMRO,REPORTING,2022-03-16,807502.0,643501.0,2.707,2.158,404781.0,1.357,"Janssen - Ad26.COV 2-S,SII - Covishield,Sinova...",2021-04-20,3.0,,
226,Zambia,ZMB,AFRO,REPORTING,2022-03-29,3383947.0,2919895.0,18.407,15.883,2139138.0,11.636,"Beijing CNBG - BBIBP-CorV,Janssen - Ad26.COV 2...",2021-04-14,3.0,58881.0,0.320


In [3]:
meta_vaccina

Unnamed: 0,ISO3,VACCINE_NAME,PRODUCT_NAME,COMPANY_NAME,AUTHORIZATION_DATE,START_DATE,END_DATE,COMMENT,DATA_SOURCE
0,JEY,Moderna - mRNA-1273,mRNA-1273,Moderna,,,,,OWID
1,JEY,AstraZeneca - AZD1222,AZD1222,AstraZeneca,,,,,OWID
2,JEY,Pfizer BioNTech - Comirnaty,Comirnaty,Pfizer BioNTech,,,,,OWID
3,GGY,Moderna - mRNA-1273,mRNA-1273,Moderna,,,,,OWID
4,GGY,AstraZeneca - AZD1222,AZD1222,AstraZeneca,,,,,OWID
...,...,...,...,...,...,...,...,...,...
882,SYR,Gamaleya - Sputnik-Light,Sputnik-Light,Gamaleya Research Institute,,,,,REPORTING
883,PHL,Julphar - Hayat-Vax,Hayat-Vax,,2021-08-11,2021-08-25,,,REPORTING
884,SYC,Julphar - Hayat-Vax,Hayat-Vax,,,,,,REPORTING
885,PRY,Julphar - Hayat-Vax,Hayat-Vax,,2020-12-30,2021-05-24,,,REPORTING


#### Population

In [23]:
population_total = population[population['Indicator Name'] == 'Population, total']

In [24]:
population_total.shape

(266, 96)

In [25]:
# Wide format
population_total.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2042,2043,2044,2045,2046,2047,2048,2049,2050,Unnamed: 95
174,Africa Eastern and Southern,AFE,"Population, total",SP.POP.TOTL,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,...,1113727000.0,1135460000.0,1157274000.0,1179158000.0,1201097000.0,1223090000.0,1245124000.0,1267187000.0,1289283000.0,
361,Africa Western and Central,AFW,"Population, total",SP.POP.TOTL,96396419.0,98407221.0,100506960.0,102691339.0,104953470.0,107289875.0,...,773484000.0,789816000.0,806260000.0,822806000.0,839444000.0,856170000.0,872979000.0,889863000.0,906826000.0,
548,Arab World,ARB,"Population, total",SP.POP.TOTL,92197715.0,94724540.0,97334438.0,100034191.0,102832792.0,105736428.0,...,611550000.0,619148000.0,626700000.0,634197000.0,641626000.0,648994000.0,656288000.0,663508000.0,670643000.0,
735,Caribbean small states,CSS,"Population, total",SP.POP.TOTL,4194711.0,4274052.0,4353623.0,4432240.0,4508189.0,4580382.0,...,7851000.0,7845000.0,7840000.0,7833000.0,7823000.0,7808000.0,7798000.0,7782000.0,7765000.0,
922,Central Europe and the Baltics,CEB,"Population, total",SP.POP.TOTL,91401764.0,92232738.0,93009498.0,93840016.0,94715795.0,95440988.0,...,92757000.0,92270000.0,91793000.0,91324000.0,90859000.0,90395000.0,89927000.0,89462000.0,88976000.0,


In [26]:
population_total.isnull().sum()

Country Name        0
Country Code        0
Indicator Name      0
Indicator Code      0
1960                6
                 ... 
2047               10
2048               10
2049               10
2050               10
Unnamed: 95       266
Length: 96, dtype: int64

In [34]:
# Long format
population_usefull_data = population_total[['Country Name', 'Country Code', '2018', '2019', '2020', '2021', '2022']]
population_usefull_data = pd.melt(population_usefull_data,
        id_vars= ['Country Name', 'Country Code'],
        value_vars=['2018', '2019', '2020', '2021', '2022'],
        ignore_index=False,
        var_name='year',
        value_name='number_population' 
)
population_usefull_data.set_index('year')

Unnamed: 0_level_0,Country Name,Country Code,number_population
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,Africa Eastern and Southern,AFE,643090131
2018,Africa Western and Central,AFW,435229381
2018,Arab World,ARB,419851989
2018,Caribbean small states,CSS,7358929
2018,Central Europe and the Baltics,CEB,102538451
...,...,...,...
2022,Virgin Islands (U.S.),VIR,106000
2022,West Bank and Gaza,PSE,5034000
2022,"Yemen, Rep.",YEM,31155000
2022,Zambia,ZMB,19470000


In [28]:
dates_needed = pd.DataFrame({'Date':pd.date_range(start='1/1/2018', end='31/12/2022')})
dates_needed['year'] = pd.DatetimeIndex(dates_needed['Date']).year
dates_needed.set_index('year')

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,Date
year,Unnamed: 1_level_1
2018,2018-01-01
2018,2018-01-02
2018,2018-01-03
2018,2018-01-04
2018,2018-01-05
...,...
2022,2022-12-27
2022,2022-12-28
2022,2022-12-29
2022,2022-12-30
