### TASKS 4&5

In [1]:
import pandas as pd

df = pd.read_csv("country_vaccination_stats.csv")

In [3]:
# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Sort the dataframe by country and date
df = df.sort_values(by=['country', 'date'])

# Group by country and calculate the minimum daily vaccinations
min_vaccinations = df.groupby('country')['daily_vaccinations'].min().reset_index()

# Merge the minimum vaccinations back to the original dataframe
df = pd.merge(df, min_vaccinations, on='country', suffixes=('', '_min'))

# Fill missing values in daily_vaccinations column with the minimum daily vaccination number
df['daily_vaccinations'] = df['daily_vaccinations'].fillna(df['daily_vaccinations_min'])

# Fill missing values in daily_vaccinations column for countries without any valid vaccination number yet with 0
df['daily_vaccinations'] = df['daily_vaccinations'].fillna(0)

# Convert daily_vaccinations to integer after handling missing values
df['daily_vaccinations'] = df['daily_vaccinations'].astype(int)

# Drop the extra column used for merging
df.drop(columns=['daily_vaccinations_min'], inplace=True)

# Save the modified dataframe
df.to_csv("task4_filled_dataset.csv", index=False)

df

Unnamed: 0,country,date,daily_vaccinations,vaccines
0,Argentina,2020-12-29,6483,Sputnik V
1,Argentina,2020-12-30,15656,Sputnik V
2,Argentina,2020-12-31,15656,Sputnik V
3,Argentina,2021-01-01,11070,Sputnik V
4,Argentina,2021-01-02,8776,Sputnik V
...,...,...,...,...
1497,Wales,2021-01-20,11105,"Oxford/AstraZeneca, Pfizer/BioNTech"
1498,Wales,2021-01-21,12318,"Oxford/AstraZeneca, Pfizer/BioNTech"
1499,Wales,2021-01-22,15148,"Oxford/AstraZeneca, Pfizer/BioNTech"
1500,Wales,2021-01-23,17371,"Oxford/AstraZeneca, Pfizer/BioNTech"


### TASK 6

In [4]:
# Calculate the median daily vaccination numbers for each country
median_daily_vaccinations = df.groupby('country')['daily_vaccinations'].median().reset_index()

# Sort the median daily vaccinations in descending order and select top 3 countries
top_3_countries = median_daily_vaccinations.sort_values(by='daily_vaccinations', ascending=False).head(3)

# Reset index starting from 0
top_3_countries.reset_index(drop=True, inplace=True)

# Display the top 3 countries with highest median daily vaccination numbers
print("Top 3 countries with highest median daily vaccination numbers:")
print(top_3_countries)

Top 3 countries with highest median daily vaccination numbers:
         country  daily_vaccinations
0  United States            378253.0
1          China            276786.0
2          India            173922.0


### TASK7

In [6]:
# Load the filled dataset with missing values imputed
df = pd.read_csv("task4_filled_dataset.csv")

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Filter the DataFrame for January 6, 2021
vaccinations_on_date = df[df['date'] == '2021-01-06']

# Calculate the total number of vaccinations done on January 6, 2021
total_vaccinations_on_date = vaccinations_on_date['daily_vaccinations'].sum()

# Display the total number of vaccinations done on January 6, 2021
print("Total number of vaccinations done on 2021-01-06:", total_vaccinations_on_date)

Total number of vaccinations done on 2021-01-06: 1485255
