In [1]:
import pandas as pd
import plotly_express as px

file_path = "Data/Folkhalsomyndigheten_Covid19_Vaccine.xlsx"
sheet_name = "Vaccinationer tidsserie"

df_vaccinationer = pd.read_excel(file_path, sheet_name = sheet_name)

df_vaccinationer.head()

Unnamed: 0,Vecka,År,Region,Antal vaccinationer
0,52,2020,| Sverige |,2478
1,52,2020,Stockholm,289
2,52,2020,Uppsala,9
3,52,2020,Södermanland,116
4,52,2020,Östergötland,214


In [2]:
# naming country properly
df_vaccinationer.replace("| Sverige |", "Sverige", inplace = True)
df_vaccinationer

Unnamed: 0,Vecka,År,Region,Antal vaccinationer
0,52,2020,Sverige,2478
1,52,2020,Stockholm,289
2,52,2020,Uppsala,9
3,52,2020,Södermanland,116
4,52,2020,Östergötland,214
...,...,...,...,...
2107,42,2022,Gävleborg,726017
2108,42,2022,Västernorrland,632660
2109,42,2022,Jämtland,328655
2110,42,2022,Västerbotten,724409


In [3]:
len(df_vaccinationer["Region"].unique()) # 22 regions (including sweden)

22

In [4]:
df_vaccinationer["Vecka"] = df_vaccinationer["År"].astype(str) + "v" + df_vaccinationer["Vecka"].astype(str)
df_vaccinationer.drop("År", axis = "columns", inplace = True)
df_vaccinationer

Unnamed: 0,Vecka,Region,Antal vaccinationer
0,2020v52,Sverige,2478
1,2020v52,Stockholm,289
2,2020v52,Uppsala,9
3,2020v52,Södermanland,116
4,2020v52,Östergötland,214
...,...,...,...
2107,2022v42,Gävleborg,726017
2108,2022v42,Västernorrland,632660
2109,2022v42,Jämtland,328655
2110,2022v42,Västerbotten,724409


# Amount of Vaccinations by Region
As we can clearly see, this graph is very hard to read  
There is a huge difference between the highest amount of vaccinations and all the lower ones, this is due to the large difference in population size  
In order to make the graph more readable, we should instead look at vaccinations per population size of the different regions  

In [5]:
# Antal vaccinationer per län per vecka
px.line(df_vaccinationer, x = "Vecka", y = "Antal vaccinationer", color = "Region")

In [6]:
# tidsserie sheet is lacking population data, importing it from different sheet
file_path = "Data/Folkhalsomyndigheten_Covid19_Vaccine.xlsx"
sheet_name = "Vaccinerade kommun och ålder"

df_befolkning = pd.read_excel(file_path, sheet_name = sheet_name, usecols = "B,F")

df_befolkning.head()

Unnamed: 0,Län_namn,Befolkning
0,Stockholms län,2422
1,Stockholms län,1203
2,Stockholms län,6692
3,Stockholms län,7332
4,Stockholms län,6946


In [7]:
# summing population per county
df_befolkning = df_befolkning.groupby("Län_namn", as_index = False).sum()
df_befolkning.head()

Unnamed: 0,Län_namn,Befolkning
0,Blekinge län,139327
1,Dalarnas län,252075
2,Gotlands län,53924
3,Gävleborgs län,252216
4,Hallands län,295663


In [8]:
# grouped by county name we get the regions
len(df_befolkning["Län_namn"].unique()) # 21 regions (excluding sweden)

21

In [9]:
# renaming from county to region (replacing "s län" and " län" endings and renaming "Län_namn" column to "Region")
df_befolkning["Län_namn"].replace({'s län$': '', ' län$': ''}, regex = True, inplace = True) # {'[\ län,)]': '', '[\s län,)]': ''}
df_befolkning.rename(columns={"Län_namn": "Region"}, inplace = True)
df_befolkning.head()

Unnamed: 0,Region,Befolkning
0,Blekinge,139327
1,Dalarna,252075
2,Gotland,53924
3,Gävleborg,252216
4,Halland,295663


In [10]:
# adding total pop in sweden, from sum of pop of all regions
df_befolkning = pd.concat([df_befolkning, pd.DataFrame({"Region": ["Sverige"], "Befolkning": [df_befolkning["Befolkning"].sum()]})], ignore_index = True)
df_befolkning.tail(1)

Unnamed: 0,Region,Befolkning
21,Sverige,9092790


In [11]:
df_vaccinationer.head(1)

Unnamed: 0,Vecka,Region,Antal vaccinationer
0,2020v52,Sverige,2478


In [12]:
# merging population based per region into vaccination dataframe
df_vaccinationer = df_vaccinationer.merge(df_befolkning, on = "Region")
df_vaccinationer.head()

Unnamed: 0,Vecka,Region,Antal vaccinationer,Befolkning
0,2020v52,Sverige,2478,9092790
1,2020v53,Sverige,16581,9092790
2,2021v1,Sverige,53258,9092790
3,2021v2,Sverige,132915,9092790
4,2021v3,Sverige,249712,9092790


In [13]:
# adding new column showing vaccinations per population of region (value of 1 -> on average 1 vaccination per inhabitant and so on)
df_vaccinationer["Vaccinationer per invånare"] = df_vaccinationer["Antal vaccinationer"] / df_vaccinationer["Befolkning"]
df_vaccinationer.head()

Unnamed: 0,Vecka,Region,Antal vaccinationer,Befolkning,Vaccinationer per invånare
0,2020v52,Sverige,2478,9092790,0.000273
1,2020v53,Sverige,16581,9092790,0.001824
2,2021v1,Sverige,53258,9092790,0.005857
3,2021v2,Sverige,132915,9092790,0.014618
4,2021v3,Sverige,249712,9092790,0.027463


# Average Amount of Vaccinations (by region and week)
This shows a historic view of how different regions compared to each other in terms of average vaccinations in their population  
However it is rather cluttered unless you unselect most lines (double click on legend)  
It is also rarely relevant to look at the historic data, as you would most often want to compare your current standings rather than historical ones  
Although it is good to have this historic perspective available

In [14]:
# Antal vaccinationer per invånare per län per vecka (baserat på befolkningsmängd 27 okt 2022)
px.line(df_vaccinationer, x = "Vecka", y = "Vaccinationer per invånare", color = "Region")

# Average Amount of Vaccinations by Region (most recent data)
Here we can see the most recent results much more clearly, this is a much more telling graph without needing to do a lot of selecing and zooming in  
The historical perspective is still interesting however, and should remain available for those cases where it might be relevant to look back at  

Note that Swedens average lies very far towards the lower end, this is influenced by the population size of each region, as this graph shows the average amount of doses, not taking population size into account  
Some of the regions below the countrys average are highly populated areas, as opposed to some of the higher vaccinated areas which have lower population size  

At a glance one might think it would be more relevant to have a higher amount of vaccinations in the more densely populated areas  
However, as earlier KPI explorations have told us, older people generally have more doses because they are prioritised in the vaccination process  
Larger cities tend to have a younger population, and that might be a contributing factor as to why the higher populated regions tend to have a lower average amount of doses  

In [15]:
fig = px.bar(
    df_vaccinationer[df_vaccinationer["Vecka"] == "2022v42"].sort_values(
        by="Vaccinationer per invånare"
    ),
    x="Region",
    y="Vaccinationer per invånare",
    title="Genomsnittlig Mängd Doser per Region (v42 2022)",
    labels={"Vaccinationer per invånare": "Genomsnittlig Mängd Doser"},
)

fig.write_html("Visualiseringar/E3_KPI_Average_Amount_Doses_Region.html")
fig.show()