In [209]:
import pandas as pd
import plotly_express as px

file_path = "Data/Folkhalsomyndigheten_Covid19.xlsx"
sheet_name = "Totalt antal per region"

df_antal_fall = pd.read_excel(file_path, sheet_name = sheet_name, usecols = "A,B,E,D")

df_antal_fall.head(1)

Unnamed: 0,Region,Totalt_antal_fall,Totalt_antal_intensivvårdade,Totalt_antal_avlidna
0,Blekinge,30829,85,184


In [210]:
file_path = "Data/Folkhalsomyndigheten_Covid19_Vaccine.xlsx"
sheet_name = "Vaccinationer tidsserie"

df_vaccinationer = pd.read_excel(file_path, sheet_name = sheet_name)

df_vaccinationer.head()

Unnamed: 0,Vecka,År,Region,Antal vaccinationer
0,52,2020,| Sverige |,2478
1,52,2020,Stockholm,289
2,52,2020,Uppsala,9
3,52,2020,Södermanland,116
4,52,2020,Östergötland,214


In [211]:
# naming country properly
df_vaccinationer.replace("| Sverige |", "Sverige", inplace = True)
df_vaccinationer

Unnamed: 0,Vecka,År,Region,Antal vaccinationer
0,52,2020,Sverige,2478
1,52,2020,Stockholm,289
2,52,2020,Uppsala,9
3,52,2020,Södermanland,116
4,52,2020,Östergötland,214
...,...,...,...,...
2107,42,2022,Gävleborg,726017
2108,42,2022,Västernorrland,632660
2109,42,2022,Jämtland,328655
2110,42,2022,Västerbotten,724409


In [212]:
# combining week and year, overwriting values of week column
df_vaccinationer["Vecka"] = df_vaccinationer["År"].astype(str) + "v" + df_vaccinationer["Vecka"].astype(str)
# dropping year now that it is no longer used
df_vaccinationer.drop("År", axis = "columns", inplace = True)
df_vaccinationer

Unnamed: 0,Vecka,Region,Antal vaccinationer
0,2020v52,Sverige,2478
1,2020v52,Stockholm,289
2,2020v52,Uppsala,9
3,2020v52,Södermanland,116
4,2020v52,Östergötland,214
...,...,...,...
2107,2022v42,Gävleborg,726017
2108,2022v42,Västernorrland,632660
2109,2022v42,Jämtland,328655
2110,2022v42,Västerbotten,724409


In [213]:
# renaming different names of same region to match
df_antal_fall.loc[(df_antal_fall["Region"] == "Sörmland"), "Region"] = "Södermanland"
df_antal_fall.loc[(df_antal_fall["Region"] == "Jämtland Härjedalen"), "Region"] = "Jämtland"

df_antal_fall

Unnamed: 0,Region,Totalt_antal_fall,Totalt_antal_intensivvårdade,Totalt_antal_avlidna
0,Blekinge,30829,85,184
1,Dalarna,75091,260,544
2,Gotland,11874,46,82
3,Gävleborg,74803,332,754
4,Halland,108822,229,518
5,Jämtland,34347,95,197
6,Jönköping,89662,414,756
7,Kalmar,62810,135,385
8,Kronoberg,51460,162,410
9,Norrbotten,50755,267,459


In [214]:
# adding total pop in sweden, from sum of pop of all regions
df_antal_fall = pd.concat(
    [
        df_antal_fall,
        pd.DataFrame(
            {
                "Region": ["Sverige"],
                "Totalt_antal_fall": [df_antal_fall["Totalt_antal_fall"].sum()],
                "Totalt_antal_intensivvårdade": [df_antal_fall["Totalt_antal_intensivvårdade"].sum()],
                "Totalt_antal_avlidna": [df_antal_fall["Totalt_antal_avlidna"].sum()],
            }
        ),
    ],
    ignore_index=True,
)
df_antal_fall.tail(1)


Unnamed: 0,Region,Totalt_antal_fall,Totalt_antal_intensivvårdade,Totalt_antal_avlidna
21,Sverige,2611580,9544,20659


In [215]:
len(df_antal_fall["Region"].unique()) # all regions, including sverige, now accounted for

22

In [216]:
# setting region to index
df_antal_fall.set_index("Region", inplace = True)
df_antal_fall

Unnamed: 0_level_0,Totalt_antal_fall,Totalt_antal_intensivvårdade,Totalt_antal_avlidna
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blekinge,30829,85,184
Dalarna,75091,260,544
Gotland,11874,46,82
Gävleborg,74803,332,754
Halland,108822,229,518
Jämtland,34347,95,197
Jönköping,89662,414,756
Kalmar,62810,135,385
Kronoberg,51460,162,410
Norrbotten,50755,267,459


In [217]:
# only using data from latest week
df_vaccinationer = df_vaccinationer[df_vaccinationer["Vecka"] == "2022v42"].reset_index(drop = True)
# dropping week
df_vaccinationer.drop("Vecka", axis = "columns", inplace = True)
# setting region to index
df_vaccinationer.set_index("Region", inplace = True)
df_vaccinationer


Unnamed: 0_level_0,Antal vaccinationer
Region,Unnamed: 1_level_1
Sverige,24768511
Stockholm,5322674
Uppsala,986104
Södermanland,731135
Östergötland,1210948
Jönköping,882818
Kronoberg,470521
Kalmar,644639
Gotland,164795
Blekinge,402759


In [218]:
# # merging dataframes together using data from latest week
# df_antal_fall_vaccinationer = df_antal_fall.merge(df_vaccinationer[df_vaccinationer["Vecka"] == "2022v42"].reset_index(drop = True), on = "Region")
# df_antal_fall_vaccinationer #drop = True drops index,
# # TODO rename cols

In [219]:
# df_antal_fall_vaccinationer.drop("Vecka", axis = "columns", inplace = True)
# # df_antal_fall_vaccinationer.drop(21, axis = "rows", inplace = True)
# df_antal_fall_vaccinationer

In [220]:
# calculating amount of cases per amount vaccinated
df_andel_fall = df_antal_fall.div(df_vaccinationer["Antal vaccinationer"], axis = "rows")
df_andel_fall

Unnamed: 0_level_0,Totalt_antal_fall,Totalt_antal_intensivvårdade,Totalt_antal_avlidna
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blekinge,0.076545,0.000211,0.000457
Dalarna,0.101635,0.000352,0.000736
Gotland,0.072053,0.000279,0.000498
Gävleborg,0.103032,0.000457,0.001039
Halland,0.129144,0.000272,0.000615
Jämtland,0.104508,0.000289,0.000599
Jönköping,0.101563,0.000469,0.000856
Kalmar,0.097434,0.000209,0.000597
Kronoberg,0.109368,0.000344,0.000871
Norrbotten,0.076922,0.000405,0.000696


In [221]:
px.bar(df_andel_fall, log_y = True)

In [222]:
# TODO get befolkning
# TODO plot