In [1]:
import pandas as pd
import plotly.express as px

# Load the CSV file
df = pd.read_csv('docs/owid-covid-data.csv')

# Convert the date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter the data for the year 2021
df_2021 = df[df['date'].dt.year == 2021].copy()  # Ensure a copy to avoid chained indexing warnings

# Function to fill NaN values with the last available non-NaN value per country
def fill_last_available(df, col):
    return df[col].groupby(df['location']).ffill()

# Fill NaN values for the relevant columns
df_2021['excess_mortality_cumulative_per_million'] = fill_last_available(df_2021, 'excess_mortality_cumulative_per_million')
df_2021['total_vaccinations_per_hundred'] = fill_last_available(df_2021, 'total_vaccinations_per_hundred')

# Extract the last available data for each country
df_last_2021 = df_2021.groupby('location').last().reset_index()

# Plotting with Plotly
fig = px.scatter(df_last_2021, x='total_vaccinations_per_hundred', y='excess_mortality_cumulative_per_million', 
                 trendline='ols', trendline_color_override='darkblue',
                 title='Excess mortality per million inhabitants vs. Total vaccinations per hundred inhabitants, per country for 2021',
                 labels={'total_vaccinations_per_hundred': 'Total vaccinations per hundred inhabitants',
                         'excess_mortality_cumulative_per_million': 'Excess mortality cumulative per million inhabitants'},
                 hover_name='location', opacity=0.7,
                 color_discrete_sequence=['cornflowerblue'])

# Set x-axis range from 0 to around 330
fig.update_layout(xaxis=dict(range=[20, 340]))

fig.update_traces(
    line=dict(width=2, color='darkblue')
)

# Adjust the figure dimensions
fig.update_layout(width=1000,
                  height=600)

fig.show()

ModuleNotFoundError: No module named 'statsmodels'