# Importing necessary packages

In [4]:
import pandas as pd  # For handling the data

import seaborn as sns  # For plotting
import matplotlib.pyplot as plt  # for subplots
import plotly_express as px  # for interactiv plots

# import warnings
# warnings.filterwarnings(action="ignore")

import plotly.io as pio
pio.templates.default = "gridon"  # Sets "gridon" as default for all Plotly graphs


# KPI 1

- amount of cases per each age group.
- amount of deaths in relation to cases per age group.
___
# Hypothesis

- I imagine that ages 20-40 will be the most likely to get sick but must likely to survive the virus

- There will be higher amounts of deaths as age goes up

- If the intensive care number is high. so should deaths.
___

In [5]:
df_age_group = pd.read_excel("../Lab_Data/Covid19.xlsx", sheet_name="Totalt antal per åldersgrupp")
#no nulls. "Ålder is object. rest are int."¨

df_age_group.drop([10][0], inplace = True) # Dropping "Uppgift saknas because it is negligible amount"

# Renaming columns to translate them into english
df_age_group.rename(
    columns={
        "Åldersgrupp": "Age group",
        "Totalt_antal_fall": "Total amount of cases",
        "Totalt_antal_intensivvårdade": "Total amount in intensive care",
        "Totalt_antal_avlidna": "Total deaths",
    },
    inplace=True,
)

# Translating Age group rows into english
df_age_group.replace(
    {
        "Ålder_0_9": "Ages 0-9",
        "Ålder_10_19": "Ages 10-19",
        "Ålder_20_29": "Ages 20-29",
        "Ålder_30_39": "Ages 30-39",
        "Ålder_40_49": "Ages 40-49",
        "Ålder_50_59": "Ages 50-59",
        "Ålder_60_69": "Ages 60-69",
        "Ålder_70_79": "Ages 70-79",
        "Ålder_80_89": "Ages 80-89",
        "Ålder_90_plus": "Age 90 plus",
    },
    inplace=True,
)


In [6]:
def percentage_func(df, column: str, lst, column2 = "NA",):
    """Function to get precentages out of a Dataframe and save it to a list.
    column2 can be used when you need percentages between two columns."""
    if column2 == "NA":
        col_sum = df[column].sum()
        for i in range(len(df[column])):
            lst.append(round(df[column][i] / col_sum*100))
    else:
        col_sum2 = df[column2].sum()
        for i in range(len(df[column])):
             lst.append(round(df[column][i] / col_sum2 *100))

In [15]:
cases_percent = [] # checking amount of cases procenteges by age group
intensive_care_percent = []
total_dead_percent = []

percentage_func(df_age_group, "Total amount of cases", cases_percent,"NA")
percentage_func(df_age_group, "Total amount in intensive care", intensive_care_percent)
percentage_func(df_age_group, "Total deaths", total_dead_percent)

cases_percent, intensive_care_percent, total_dead_percent

([5, 14, 16, 19, 18, 14, 7, 3, 2, 1],
 [1, 1, 3, 5, 10, 20, 27, 25, 6, 0],
 [0, 0, 0, 0, 1, 3, 7, 23, 40, 26])

In [22]:
fig = px.bar(
    x=df_age_group["Age group"],
    y=[cases_percent, intensive_care_percent, total_dead_percent],
    labels = {"x": "Age groups", "variable": "Datapoint(s)", "value": "Percentage of people"},
    text_auto=True, # Adds the values above the bars
    barmode="group", # groups the bars in plot
    title="Amount of cases, deaths and patients in intensive care, by percent",
    width = 1080, # Sets the size so that the graph is consistent for every user that runs the program
    height = 540,
    # pattern_shape = df_age_group["Åldersgrupp"],
    # pattern_shape_sequence=["/","/","/","/","/","/","/","/"],
)
bar_names = {
    "wide_variable_0": "Sick per age group per percentage",  # Todo fix names.
    "wide_variable_1": "Intensive care patients per percentage",
    "wide_variable_2": "Deaths per age group",
}
fig.update_traces( # rotates text and puts text outside of bar.
    textposition="outside",
)
fig.for_each_trace(
    lambda t: t.update(
        name=bar_names[t.name],
        legendgroup=bar_names[t.name],
        hovertemplate=t.hovertemplate.replace(t.name, bar_names[t.name]),
    )
)
fig.update_layout(  # https://plotly.com/python/setting-graph-size/
    margin=dict(l=40, r=50, t=60, b=70), # Changes the margins to make the graph look the for everyone who runs the program
    paper_bgcolor="white",
)
fig.add_annotation(  # adds an line and text to the barplot.
    text="age 80-89 is responsible for        %       of all deaths",
    x=8,
    y=38,  
)

fig.show()

Very interesting to see that almost 0 percent of ages 90 > gets intensive care.

As I Thought, Ages 10-59 (Ages that are most active and meets with the most people on average) gets sick the most but there are almost no deaths compared to 59 and above.

## KPI 2
___
Make a gapminder graph:

Cases per Län, = Size
x = "Kumulative cases"
y = Cumulative deaths
Color = Län
animation_frame = "year"
animation_group = "country"


In [23]:
df_lan = pd.read_excel("../Lab_Data/Covid19.xlsx", sheet_name="Veckodata Region")
df_lan.insert(0, "Vecka", df_lan["år"].astype(str) + "v" + df_lan["veckonummer"].astype(str)) # Cleaning data to get weeks and years combined again.
df_lan.pop("år") # Remove column year because it is not needed any more
df_lan.pop("veckonummer") # same as above

# RENAME

0        1
1        2
2        3
3        4
4        5
        ..
3082    38
3083    39
3084    40
3085    41
3086    42
Name: veckonummer, Length: 3087, dtype: int64

In [26]:
fig = px.scatter( # fix... 
    df_lan,
    x="Kum_antal_intensivvårdade",
    y="Kum_antal_avlidna",
    size="Kum_antal_fall",
    log_x=True,
    log_y=True,
    size_max=1000,
    color="Region",
    animation_frame = "Vecka",
    animation_group = "Region",
    title="Deaths and patients in Intensive care for every week",
    range_x = [40,250],
    range_y = [40,560],
)

fig.show()

## KPI 3
___
When were the most people in intensive care TOP 5. Also What were the deaths like. 

In [None]:
# Read in data and clean it as I did in task 1

kingdom_all = pd.read_excel("../Lab_Data/Covid19.xlsx", sheet_name="Veckodata Riket")
kingdom_all.insert(0,"Week",kingdom_all["år"].astype(str)+"v"+kingdom_all["veckonummer"].astype(str))
kingdom_all.pop("år")
kingdom_all.pop("veckonummer")

In [48]:
kingdom_all.head(10)

Unnamed: 0,Week,Antal_fall_vecka,Antal_fall_100000inv_vecka,Antal_fall_100000inv_14dagar,Kum_antal_fall,Kum_fall_100000inv,Antal_nyaintensivvårdade_vecka,Kum_antal_intensivvårdade,Antal_avlidna_vecka,Antal_avlidna_milj_inv_vecka,Kum_antal_avlidna,Kum_antal_avlidna_milj_inv
0,2020v6,1,0,0,1,0,0,0,0,0.0,0,0.0
1,2020v7,0,0,0,1,0,0,0,0,0.0,0,0.0
2,2020v8,0,0,0,1,0,0,0,0,0.0,0,0.0
3,2020v9,13,0,0,14,0,0,0,0,0.0,0,0.0
4,2020v10,211,2,2,225,2,3,3,0,0.0,0,0.0
5,2020v11,838,8,10,1063,10,16,19,5,0.5,5,0.5
6,2020v12,924,9,17,1987,19,88,107,44,4.3,49,4.7
7,2020v13,1957,19,28,3944,38,238,345,190,18.4,239,23.1
8,2020v14,3229,31,50,7173,69,277,622,451,43.700001,690,66.800003
9,2020v15,3740,36,67,10913,106,289,911,664,64.300003,1354,131.100006


In [55]:
# Using method chaining to get the 5 indexes of max values in intensive care and deaths.
top_5_intensive = kingdom_all["Antal_nyaintensivvårdade_vecka"].sort_values(ascending=False).head().index 
top_5_deaths = kingdom_all["Antal_avlidna_vecka"].sort_values(ascending=False).head().index 

In [59]:
kingdom_all.loc[9]["Week"]

'2020v15'

In [104]:
# Creating a loop to get a new dataframe with the correct indexes.
temp_top_5_intensive = []
temp_top_5_deaths = []
for value in top_5_intensive:
    temp_top_5_intensive.append(kingdom_all.loc[value])

for value in top_5_deaths:
    temp_top_5_deaths.append(kingdom_all.loc[value])



In [139]:
df_top_intensive = pd.DataFrame(temp_top_5_intensive, columns= ["Week","Antal_nyaintensivvårdade_vecka"])
df_top_deaths = pd.DataFrame(temp_top_5_deaths, columns= ["Week", "Antal_avlidna_vecka"])
df_top_intensive.sort_values(by="Week",inplace=True)
df_top_deaths.sort_values(by="Week",inplace=True)

In [140]:
df_top_intensive

Unnamed: 0,Week,Antal_nyaintensivvårdade_vecka
8,2020v14,277
9,2020v15,289
10,2020v16,250
61,2021v14,250
62,2021v15,246


In [141]:
# Combining previous data sets. 
df_deaths_IVA = pd.concat([df_top_deaths,df_top_intensive],join="outer",ignore_index=False)
df_deaths_IVA.head(10)


Unnamed: 0,Week,Antal_avlidna_vecka,Antal_nyaintensivvårdade_vecka
9,2020v15,664.0,
10,2020v16,657.0,
45,2020v51,645.0,
47,2020v53,663.0,
48,2021v1,644.0,
8,2020v14,,277.0
9,2020v15,,289.0
10,2020v16,,250.0
61,2021v14,,250.0
62,2021v15,,246.0


In [158]:
px.bar(df_top_intensive, x = "Week", y= "Antal_nyaintensivvårdade_vecka",title = "Top 5 weeks most people needed intensive care",)
#sns.barplot(df_top_deaths, x = "Week", y= "Antal_avlidna_vecka")
#px.line(, x = "Week", y= "Antal_nyaintensivvårdade_vecka")