# Pandas Demo: Vaccination data analysis

In [1]:
import pandas as pd
import numpy as np

### Read CSV file using Pandas, output is a data frame

In [2]:
df = pd.read_csv('./country_vaccinations_by_manufacturer.csv')

In [3]:
df

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Austria,2021-01-08,Johnson&Johnson,0
1,Austria,2021-01-08,Moderna,0
2,Austria,2021-01-08,Oxford/AstraZeneca,0
3,Austria,2021-01-08,Pfizer/BioNTech,30874
4,Austria,2021-01-15,Johnson&Johnson,0
...,...,...,...,...
7418,Uruguay,2021-06-18,Pfizer/BioNTech,837620
7419,Uruguay,2021-06-18,Sinovac,2628437
7420,Uruguay,2021-06-19,Oxford/AstraZeneca,43098
7421,Uruguay,2021-06-19,Pfizer/BioNTech,837620


In [4]:
df.head(20)

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Austria,2021-01-08,Johnson&Johnson,0
1,Austria,2021-01-08,Moderna,0
2,Austria,2021-01-08,Oxford/AstraZeneca,0
3,Austria,2021-01-08,Pfizer/BioNTech,30874
4,Austria,2021-01-15,Johnson&Johnson,0
5,Austria,2021-01-15,Moderna,88
6,Austria,2021-01-15,Oxford/AstraZeneca,0
7,Austria,2021-01-15,Pfizer/BioNTech,115106
8,Austria,2021-01-22,Johnson&Johnson,0
9,Austria,2021-01-22,Moderna,298


In [5]:
df.tail(10)

Unnamed: 0,location,date,vaccine,total_vaccinations
7413,Uruguay,2021-06-16,Sinovac,2563994
7414,Uruguay,2021-06-17,Oxford/AstraZeneca,43091
7415,Uruguay,2021-06-17,Pfizer/BioNTech,822534
7416,Uruguay,2021-06-17,Sinovac,2596584
7417,Uruguay,2021-06-18,Oxford/AstraZeneca,43098
7418,Uruguay,2021-06-18,Pfizer/BioNTech,837620
7419,Uruguay,2021-06-18,Sinovac,2628437
7420,Uruguay,2021-06-19,Oxford/AstraZeneca,43098
7421,Uruguay,2021-06-19,Pfizer/BioNTech,837620
7422,Uruguay,2021-06-19,Sinovac,2628437


In [6]:
df.iloc[4000]

location                   Italy
date                  2021-05-08
vaccine                  Moderna
total_vaccinations       1896594
Name: 4000, dtype: object

In [7]:
df.iloc[4000:4010]

Unnamed: 0,location,date,vaccine,total_vaccinations
4000,Italy,2021-05-08,Moderna,1896594
4001,Italy,2021-05-08,Oxford/AstraZeneca,5170680
4002,Italy,2021-05-08,Pfizer/BioNTech,16627258
4003,Italy,2021-05-09,Johnson&Johnson,184160
4004,Italy,2021-05-09,Moderna,1929371
4005,Italy,2021-05-09,Oxford/AstraZeneca,5261630
4006,Italy,2021-05-09,Pfizer/BioNTech,16877333
4007,Italy,2021-05-10,Johnson&Johnson,192241
4008,Italy,2021-05-10,Moderna,1971352
4009,Italy,2021-05-10,Oxford/AstraZeneca,5370059


In [8]:
df.iloc[4000:4010].iloc[0]

location                   Italy
date                  2021-05-08
vaccine                  Moderna
total_vaccinations       1896594
Name: 4000, dtype: object

In [9]:
type(df.iloc[4000:4010])

pandas.core.frame.DataFrame

### Get specific columns in Data Frame

In [10]:
all_vac = df['total_vaccinations']
all_vac.iloc[4000:4010]

4000     1896594
4001     5170680
4002    16627258
4003      184160
4004     1929371
4005     5261630
4006    16877333
4007      192241
4008     1971352
4009     5370059
Name: total_vaccinations, dtype: int64

In [11]:
some_data = df.iloc[4000:4010]['total_vaccinations']

In [12]:
some_data.mean()

5548067.8

In [13]:
some_data.std()

6218559.780673832

In [14]:
some_data.median()

3571016.0

In [15]:
df.iloc[-1]

location                 Uruguay
date                  2021-06-19
vaccine                  Sinovac
total_vaccinations       2628437
Name: 7422, dtype: object

In [16]:
italy_data = df[ df['location']=='Italy' ]

In [17]:
italy_data

Unnamed: 0,location,date,vaccine,total_vaccinations
3633,Italy,2020-12-27,Pfizer/BioNTech,7220
3634,Italy,2020-12-28,Pfizer/BioNTech,8649
3635,Italy,2020-12-29,Pfizer/BioNTech,9665
3636,Italy,2020-12-30,Pfizer/BioNTech,14397
3637,Italy,2020-12-31,Pfizer/BioNTech,39901
...,...,...,...,...
4162,Italy,2021-06-17,Pfizer/BioNTech,30937826
4163,Italy,2021-06-18,Johnson&Johnson,1177398
4164,Italy,2021-06-18,Moderna,4232971
4165,Italy,2021-06-18,Oxford/AstraZeneca,8204808


In [18]:
italy_data.describe()

Unnamed: 0,total_vaccinations
count,534.0
mean,4455262.0
std,6570672.0
min,1.0
25%,309883.2
50%,1809838.0
75%,5525791.0
max,31407080.0


In [19]:
italy_data.count()

location              534
date                  534
vaccine               534
total_vaccinations    534
dtype: int64

In [20]:
italy_data.median()

total_vaccinations    1809838.5
dtype: float64

### How many vaccinations in Italy up to date?

In [21]:
df[df['date']=='2021-06-18'] # All countries vaccinations (all types) in 18/6/2021

Unnamed: 0,location,date,vaccine,total_vaccinations
92,Austria,2021-06-18,Johnson&Johnson,87288
93,Austria,2021-06-18,Moderna,661323
94,Austria,2021-06-18,Oxford/AstraZeneca,1171324
95,Austria,2021-06-18,Pfizer/BioNTech,4740794
168,Belgium,2021-06-18,Johnson&Johnson,184774
...,...,...,...,...
7082,United States,2021-06-18,Moderna,130656013
7083,United States,2021-06-18,Pfizer/BioNTech,173280164
7417,Uruguay,2021-06-18,Oxford/AstraZeneca,43098
7418,Uruguay,2021-06-18,Pfizer/BioNTech,837620


In [22]:
italy_data[italy_data['date']=='2021-06-18'] # Italy vaccinations (all types) in 18/6/2021

Unnamed: 0,location,date,vaccine,total_vaccinations
4163,Italy,2021-06-18,Johnson&Johnson,1177398
4164,Italy,2021-06-18,Moderna,4232971
4165,Italy,2021-06-18,Oxford/AstraZeneca,8204808
4166,Italy,2021-06-18,Pfizer/BioNTech,31407077


### How many Pfizer vaccinations in Italy up to date?

In [23]:
italy_data[ (italy_data['date']=='2021-06-18') & (italy_data['vaccine']=='Pfizer/BioNTech')].total_vaccinations

4166    31407077
Name: total_vaccinations, dtype: int64

In [24]:
italy_data[italy_data['vaccine']=='Pfizer/BioNTech'].max()

location                        Italy
date                       2021-06-18
vaccine               Pfizer/BioNTech
total_vaccinations           31407077
dtype: object

In [25]:
italy_data[italy_data['vaccine']=='Pfizer/BioNTech'].max().total_vaccinations

31407077

In [26]:
italy_data[italy_data['vaccine']=='Pfizer/BioNTech'].max().date

'2021-06-18'

### When and how many vaccinations for AstraZeneca data recently collected in Chile?

In [27]:
chile_data = df[df['location'] == 'Chile']

In [28]:
chile_data.describe()

Unnamed: 0,total_vaccinations
count,373.0
mean,3969455.0
std,5121632.0
min,72.0
25%,118460.0
50%,1642198.0
75%,6135904.0
max,16514870.0


In [29]:
chile_data.count()

location              373
date                  373
vaccine               373
total_vaccinations    373
dtype: int64

In [30]:
chile_data_astrazeneca = chile_data[chile_data['vaccine']=='Oxford/AstraZeneca']

In [31]:
chile_data_astrazeneca.count()

location              49
date                  49
vaccine               49
total_vaccinations    49
dtype: int64

In [32]:
# When?
chile_data_astrazeneca.max().date

'2021-06-15'

In [33]:
#How many vaccinations?
chile_data_astrazeneca.max().total_vaccinations

373452

## Grouping

### What is the total vaccinations for all types in each country?

In [34]:
data_by_country = df.groupby('location')

In [35]:
data_by_country.count()

Unnamed: 0_level_0,date,vaccine,total_vaccinations
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austria,96,96,96
Belgium,76,76,76
Bulgaria,79,79,79
Chile,373,373,373
Croatia,87,87,87
Cyprus,72,72,72
Czechia,517,517,517
Denmark,67,67,67
Estonia,77,77,77
Finland,99,99,99


In [36]:
data_by_country.describe()

Unnamed: 0_level_0,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
location,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Austria,96.0,589491.7,998949.3,0.0,300.25,167379.0,666670.0,4740794.0
Belgium,76.0,953573.9,1412168.0,0.0,70642.0,425840.5,1080058.0,6257829.0
Bulgaria,79.0,184200.4,222508.8,0.0,14146.5,108865.0,244545.0,898271.0
Chile,373.0,3969455.0,5121632.0,72.0,118460.0,1642198.0,6135904.0,16514870.0
Croatia,87.0,210265.2,317793.7,3.0,8771.5,103010.0,242329.5,1454685.0
Cyprus,72.0,80092.35,109369.1,0.0,6091.25,42231.5,97183.75,464507.0
Czechia,517.0,716932.2,1183262.0,1.0,49950.0,299043.0,589592.0,5763203.0
Denmark,67.0,501980.3,827481.4,0.0,67977.5,151347.0,407641.5,3625181.0
Estonia,77.0,78433.69,118480.2,0.0,2611.0,25789.0,119801.0,551119.0
Finland,99.0,332894.4,623983.6,0.0,12.5,68667.0,344072.0,2945167.0


In [37]:
data_by_country_and_vaccine = df.groupby(['location','vaccine'])

In [38]:
data_by_country_and_vaccine.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations,total_vaccinations
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
location,vaccine,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Austria,Johnson&Johnson,24.0,1.269842e+04,2.576316e+04,0.0,0.00,25.0,8.102000e+03,87288.0
Austria,Moderna,24.0,2.080116e+05,2.238080e+05,0.0,12978.25,121770.0,3.581978e+05,661323.0
Austria,Oxford/AstraZeneca,24.0,4.546528e+05,4.083775e+05,0.0,35264.00,399932.5,7.716372e+05,1171324.0
Austria,Pfizer/BioNTech,24.0,1.682604e+06,1.462671e+06,30874.0,501622.25,1195979.0,2.657775e+06,4740794.0
Belgium,Johnson&Johnson,9.0,6.633400e+04,6.375930e+04,0.0,23320.00,48666.0,9.556500e+04,184774.0
...,...,...,...,...,...,...,...,...,...
United States,Moderna,152.0,7.182282e+07,4.172076e+07,3835859.0,32037939.00,73855479.0,1.126445e+08,130656013.0
United States,Pfizer/BioNTech,152.0,8.637920e+07,5.482301e+07,5488697.0,33957312.75,81915893.0,1.400692e+08,173280164.0
Uruguay,Oxford/AstraZeneca,113.0,2.345065e+04,2.077058e+04,0.0,33.00,38382.0,4.285500e+04,43098.0
Uruguay,Pfizer/BioNTech,113.0,3.556079e+05,2.663575e+05,0.0,107699.00,361041.0,5.728950e+05,837620.0


In [39]:
data_by_country_and_vaccine.agg({'total_vaccinations':'max'})

Unnamed: 0_level_0,Unnamed: 1_level_0,total_vaccinations
location,vaccine,Unnamed: 2_level_1
Austria,Johnson&Johnson,87288
Austria,Moderna,661323
Austria,Oxford/AstraZeneca,1171324
Austria,Pfizer/BioNTech,4740794
Belgium,Johnson&Johnson,184774
...,...,...
United States,Moderna,130656013
United States,Pfizer/BioNTech,173280164
Uruguay,Oxford/AstraZeneca,43098
Uruguay,Pfizer/BioNTech,837620


In [40]:
?pd

[0;31mType:[0m        module
[0;31mString form:[0m <module 'pandas' from '/Users/ahmadalawad/Desktop/all-cf-material/401/python/amman-python-401d4/class-12/demo/vaccination/.venv/lib/python3.8/site-packages/pandas/__init__.py'>
[0;31mFile:[0m        ~/Desktop/all-cf-material/401/python/amman-python-401d4/class-12/demo/vaccination/.venv/lib/python3.8/site-packages/pandas/__init__.py
[0;31mDocstring:[0m  
pandas - a powerful data analysis and manipulation library for Python

**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with "relational" or "labeled" data both
easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.

Main Features
--

## Export data to files

In [41]:
exported_data = data_by_country_and_vaccine.agg({'total_vaccinations':'max'})

In [42]:
data = exported_data.to_json()

In [43]:
import json
parsed_data = json.loads(data)

In [44]:
json_dumped = json.dumps(parsed_data)

In [45]:
with open('data_by_country_and_vaccine.json', 'w') as file:
    file.write(json_dumped)