# Effect of Covid19 based on Age

In [1]:
import pandas as pd
import plotly_express as px

file_path = "Data/Folkhalsomyndigheten_Covid19.xlsx"
sheet_name = "Totalt antal per åldersgrupp"

df = pd.read_excel(file_path, sheet_name = sheet_name)
df.rename(columns = {"Totalt_antal_fall": "Antal Fall", "Totalt_antal_intensivvårdade": "Antal Intensivvårdade", "Totalt_antal_avlidna": "Antal Avlidna"}, inplace = True)

# renaming age groups, removing "Ålder_", changing "_plus" to "+", and replacing underscores with dashes
df["Åldersgrupp"] = df["Åldersgrupp"].replace({"Ålder_": "", "_plus": "+", "_": "-"}, regex = True)

df

Unnamed: 0,Åldersgrupp,Antal Fall,Antal Intensivvårdade,Antal Avlidna
0,0-9,138071,109,17
1,10-19,355823,101,9
2,20-29,418506,285,41
3,30-39,493443,492,71
4,40-49,474702,997,172
5,50-59,378468,1932,523
6,60-69,180079,2595,1422
7,70-79,87096,2394,4654
8,80-89,58170,612,8326
9,90+,26677,21,5420


In [2]:
df.info() # no nulls, all of the correct datatype

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 4 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Åldersgrupp            11 non-null     object
 1   Antal Fall             11 non-null     int64 
 2   Antal Intensivvårdade  11 non-null     int64 
 3   Antal Avlidna          11 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 480.0+ bytes


---
### Looking at "Uppgift saknas"
Calculating percentage of values reported as "uppgift saknas"
It is roughly 0.02% (0.0002) which is statistically insignificant  
Therefore it will not be taken into consideration in the following calculations  

In [3]:
# calculating percentage of values reported as "uppgift saknas"
df[df["Åldersgrupp"] == "Uppgift saknas"].drop("Åldersgrupp", axis = "columns").sum().sum() / df.drop("Åldersgrupp", axis = "columns").sum().sum()

0.00021008538551425305

In [4]:
# also looking at percentage of each value, in case two of them are very low while one is higher
# but as we can see the largest is ~0.6% (0.0006) which is still statistically insignificant
df[df["Åldersgrupp"] == "Uppgift saknas"].drop("Åldersgrupp", axis = "columns") / df.drop("Åldersgrupp", axis = "columns").sum()

Unnamed: 0,Antal Fall,Antal Intensivvårdade,Antal Avlidna
10,0.000209,0.000629,0.000194


In [5]:
# droppping "uppgift saknas" to avoid extra clutter in graphs, since it is such an incredibly small amount
df.drop(df.loc[df["Åldersgrupp"] == "Uppgift saknas"].index, inplace = True)
df

Unnamed: 0,Åldersgrupp,Antal Fall,Antal Intensivvårdade,Antal Avlidna
0,0-9,138071,109,17
1,10-19,355823,101,9
2,20-29,418506,285,41
3,30-39,493443,492,71
4,40-49,474702,997,172
5,50-59,378468,1932,523
6,60-69,180079,2595,1422
7,70-79,87096,2394,4654
8,80-89,58170,612,8326
9,90+,26677,21,5420


---
## Plotting amount of infections, intensive care patients and deaths in different age groups
This shows a fairly clear view, but it can be made more clear by taking intensive care patients and deaths compared to amount of cases  

In [6]:
# plot showing how badly different age groups are affected by the virus
px.bar(
    df,
    x="Åldersgrupp",
    y=["Antal Fall", "Antal Intensivvårdade", "Antal Avlidna"],
    barmode="group",
    log_y=True,
    title = "Påverkan på olika åldersgrupper",
    labels = {"value": "Antal Invånare", "variable": ""}
)


---
### Looking at numbers relative to amount of cases instead

In [7]:
# dividing intensive care patients and deceased by amount of infected to get relative amounts in the different age groups
df_andel = pd.DataFrame(
    {
        "Åldersgrupp": df["Åldersgrupp"],
        "Intensivvårdade": df["Antal Intensivvårdade"] / df["Antal Fall"],
        "Avlidna": df["Antal Avlidna"] / df["Antal Fall"],
    }
)

df_andel.head(2)


Unnamed: 0,Åldersgrupp,Intensivvårdade,Avlidna
0,0-9,0.000789,0.000123
1,10-19,0.000284,2.5e-05


# KPI: Mortality Rates, Intensive Care Patients 

Looking at intensive care patients and deaths based on amount of infected gives a better overview of how badly the virus affects people of different ages 

- We can see clearly that the mortality rate is much higher in people of higher ages  
- Younger people are less likely to receive intensive care, this is likely because they are not affected as badly
- Interestingly, individuals of higher age receive less intensive care even though the mortality rate keeps increasing  
This could potentially be because they are less able to seek help,  
Or because of prioritisations made in hospitals to treat patients with a higher chance of survival first when they are understaffed and unable to treat all patiens requiring care

In [8]:
# plotting relative amounts
fig = px.bar(
    df_andel,
    x="Åldersgrupp",
    y=["Intensivvårdade", "Avlidna"],
    barmode="group",
    log_y=True,
    labels={"value": "Andel av Smittade", "variable": "Andel Smittade som är:"},
    title="Andel Smittade per Åldersgrupp som Intensivvårdats eller Avlidit",
)

fig.write_html("Visualiseringar/E3_KPI_Mortality_Rate_Age.html")
fig.show()
