In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CSV files
df_owid = pd.read_csv('owid-covid-data.csv')
df_excess_deaths = pd.read_csv('cumulative-excess-deaths-covid.csv')

# Convert 'date' and 'Day' columns to datetime
df_owid['date'] = pd.to_datetime(df_owid['date'])
df_excess_deaths['Day'] = pd.to_datetime(df_excess_deaths['Day'])

# Filter data for the year 2022
df_owid_2022 = df_owid[df_owid['date'].dt.year == 2022]
df_excess_deaths_2022 = df_excess_deaths[df_excess_deaths['Day'].dt.year == 2022]

# Get the last available non-NaN value for each country in 2022
df_owid_last_2022 = df_owid_2022.groupby('location').apply(lambda x: x.ffill().iloc[-1]).reset_index(drop=True)
df_excess_deaths_last_2022 = df_excess_deaths_2022.groupby('Entity').apply(lambda x: x.ffill().iloc[-1]).reset_index(drop=True)

# Merge the datasets based on the country
df_merged = pd.merge(df_owid_last_2022, df_excess_deaths_last_2022, left_on='location', right_on='Entity', how='inner')

# Create the scatter plot with regression line
plt.figure(figsize=(10, 6))
sns.regplot(x='total_vaccinations', y='cum_excess_proj_all_ages', data=df_merged, scatter_kws={'alpha':0.7})
plt.title('Total Vaccinations vs. Cumulative Excess Deaths in 2022')
plt.xlabel('Total Vaccinations')
plt.ylabel('Cumulative Excess Deaths')
plt.grid(True)
plt.show()
