In [14]:
import pandas as pd

vaccine_file = "Folkhalsomyndigheten_Covid19_Vaccine.xlsx"

veckodata_vaccin_df = pd.read_excel(vaccine_file, sheet_name = "Vaccinerade kommun och ålder")

In [15]:
# a)
# The length of the array of unique entries in "Län_namn" is the same as number of Län represented in the data set
counties = veckodata_vaccin_df["Län_namn"].unique().__len__() # tried .__len__() instead of len() since I find it so cool

print(f"Det finns {counties} län representerade i datasetet.")

Det finns 21 län representerade i datasetet.


In [16]:
# b)
# Much the same as in a)
municipalities = veckodata_vaccin_df["Kommun_namn"].unique().__len__()

print(f"Det finns {municipalities} kommuner representerade i datasetet.")

Det finns 290 kommuner representerade i datasetet.


In [26]:
# c)
# The sum of everything in i the column "Befolkning" should be the total number of people in the dataset
population = veckodata_vaccin_df["Befolkning"].sum()

print(f"Det finns {population} individer representerade i datasetet.")

Det finns 8541497 individer representerade i datasetet.


In [27]:
# d)

swedish_population = 10427296

population_under_16 = swedish_population - population

print(f"Det finns {population_under_16} personer under 16 år i Sverige.")

Det finns 1885799 personer under 16 år i Sverige.


In [28]:
# e) - f)

# Create a new dataframe with "Antal minst 1 dos", "Antal färdigvaccinerade" and "Befolknin" grouped by "Län_namn" as sums.
counties_sum = veckodata_vaccin_df.groupby(["Län_namn"])[["Antal minst 1 dos", "Antal färdigvaccinerade", "Befolkning"]].sum().reset_index()

# Calculate "Andel"-columns of the "Antal" and "Befolkning" columns
counties_sum["Andel minst 1 dos"] = counties_sum["Antal minst 1 dos"] / counties_sum["Befolkning"]
counties_sum["Andel färdigvaccinerade"] = counties_sum["Antal färdigvaccinerade"] / counties_sum["Befolkning"]

In [29]:
import plotly_express as px

def px_bar_plotter(df, plotdicts):
    """ Takes a dataframe and a list of dicts to plot, plots these as
    barplots and saves them as html-files.

    Required keys and values of a plot dict:
        "title" - Subplot title
        "y" - List of columns on the y - axis
        "y_label" - Y-axis label
    """

    for plot in plotdicts:
        fig = px.bar(
            df,
            x = "Län_namn", 
            y = plot["y"], 
            barmode = "group", 
            title = plot["title"],
            labels = {"variable": "Variabel"}
        )

        fig.update_xaxes(title_text = "Län")
        fig.update_yaxes(title_text = plot["y_label"])

        # If the Y-axis represents an "Andel" the plotter should convert the labels
        # from absolute numbers to percentages.
        if plot['y_label'] == "Andel":
            fig.update_traces(hovertemplate = "%{y:.2%}")
            fig.update_layout(yaxis = { "tickformat": ",.0%"})
        else:
            fig.update_traces(hovertemplate = "%{y:.0f}")
       
        fig.write_html(f"./Visualiseringar/{plot['y_label'].lower()}_vaccinerade.html")

to_plot = [{
    "title": "Andel vaccinerade, per län",
    "y": ["Andel minst 1 dos", "Andel färdigvaccinerade"],
    "y_label": "Andel"
},{
    "title": "Vaccination och befolkning över 16 år, per län",
    "y": ["Antal minst 1 dos", "Antal färdigvaccinerade", "Befolkning"],
    "y_label": "Antal"
}]

px_bar_plotter(counties_sum, to_plot)