In [21]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Load the CSV file
df = pd.read_csv('docs/owid-covid-data.csv')

# Convert the date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter the data for the year 2022
df_2022 = df[df['date'].dt.year == 2022].copy()

# Function to fill NaN values with the last available non-NaN value per country
def fill_last_available(df, col):
    df.loc[:, col] = df.groupby('location')[col].ffill()
    return df

# Fill NaN values for the relevant columns
df_2022 = fill_last_available(df_2022, 'excess_mortality_cumulative_per_million')
df_2022 = fill_last_available(df_2022, 'people_fully_vaccinated_per_hundred')

# Extract the last available data for each country in 2022
df_last_2022 = df_2022.groupby('location').last().reset_index()

# Create scatter plot for excess deaths vs people fully vaccinated
fig1 = px.scatter(df_last_2022, x='people_fully_vaccinated_per_hundred', y='excess_mortality_cumulative_per_million', 
                 trendline='ols', trendline_color_override='darkblue',
                 title='Excess mortality per million inhabitants vs. People fully vaccinated per hundred inhabitants, per country for 2022',
                 labels={'people_fully_vaccinated_per_hundred': 'People fully vaccinated per hundred inhabitants',
                         'excess_mortality_cumulative_per_million': 'Excess mortality cumulative per million inhabitants'},
                 hover_name='location', opacity=0.7)

# Grab the last available GDP per capita data for 2022
df_gdp = df[df['date'].dt.year == 2022].groupby('location').last().reset_index()

# Create scatter plot for GDP vs people fully vaccinated
fig2 = px.scatter(df_gdp, x='people_fully_vaccinated_per_hundred', y='gdp_per_capita', 
                 trendline='ols', trendline_color_override='darkblue',
                 title='GDP per capita vs. People fully vaccinated per hundred inhabitants, per country for 2022',
                 labels={'people_fully_vaccinated_per_hundred': 'People fully vaccinated per hundred inhabitants',
                         'gdp_per_capita': 'GDP per capita'},
                 hover_name='location', opacity=0.7)

# Create a figure with subplots
fig = go.Figure()

# Add traces for the first plot
for trace in fig1.data:
    fig.add_trace(trace)

# Add traces for the second plot
for trace in fig2.data:
    fig.add_trace(trace)

# Update layout for buttons to toggle between plots, set axis titles, and define x-axis ranges dynamically
fig.update_layout(
    title="Excess mortality per million inhabitants vs. People fully vaccinated per hundred inhabitants, per country for 2022",  # Set initial title here
    updatemenus=[
        dict(
            type="buttons",
            direction="left",
            buttons=list([
                dict(
                    args=[{"visible": [True] * len(fig1.data) + [False] * len(fig2.data)},
                          {"title": "Excess mortality per million inhabitants vs. People fully vaccinated per hundred inhabitants, per country for 2022",
                           "xaxis": {"title": "People fully vaccinated per hundred inhabitants", "range": [18, 110]},
                           "yaxis": {"title": "Excess mortality cumulative per million inhabitants"}}],
                    label="Excess Deaths vs Vaccinations",
                    method="update"
                ),
                dict(
                    args=[{"visible": [False] * len(fig1.data) + [True] * len(fig2.data)},
                          {"title": "GDP per capita vs. People fully vaccinated per hundred inhabitants, per country for 2022",
                           "xaxis": {"title": "People fully vaccinated per hundred inhabitants", "range": [0, 110]},
                           "yaxis": {"title": "GDP per capita"}}],
                    label="GDP vs Vaccinations",
                    method="update"
                )
            ]),
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.57,
            xanchor="center",
            y=1.15,
            yanchor="top"
        ),
    ],
    # Set initial axis titles and range
    xaxis={"title": "People fully vaccinated per hundred inhabitants", "range": [18, 110]},
    yaxis={"title": "Excess mortality cumulative per million inhabitants"}
)

# Initially set the second plot to be not visible
for i in range(len(fig2.data)):
    fig.data[len(fig1.data) + i].visible = False

fig.show()

In [22]:
# Corrected code to rename columns and calculate correlation matrices
df_renamed = df_last_2022.rename(columns={
    'gdp_per_capita': 'GDP per capita',
    'people_fully_vaccinated_per_hundred': 'People fully vaccinated per hundred',
    'excess_mortality_cumulative_per_million': 'Excess mortality cumulative per million'
})

# Calculate the first correlation matrix with corrected column names
corr1 = df_renamed[['People fully vaccinated per hundred', 'Excess mortality cumulative per million']].corr()

# Calculate the second correlation matrix with corrected column names
corr2 = df_renamed[['People fully vaccinated per hundred', 'GDP per capita']].corr()

# Display the first correlation matrix with a caption
display(corr1.style.set_caption('Correlation between People fully vaccinated per hundred and Excess mortality cumulative per million'))

# Display the second correlation matrix with a caption
display(corr2.style.set_caption('Correlation between People fully vaccinated per hundred and GDP per capita'))

Unnamed: 0,People fully vaccinated per hundred,Excess mortality cumulative per million
People fully vaccinated per hundred,1.0,-0.505233
Excess mortality cumulative per million,-0.505233,1.0


Unnamed: 0,People fully vaccinated per hundred,GDP per capita
People fully vaccinated per hundred,1.0,0.612648
GDP per capita,0.612648,1.0
