# Importing necessary packages

In [1]:
import pandas as pd  # For handling the data

import seaborn as sns  # For plotting
import matplotlib.pyplot as plt  # for subplots
import plotly_express as px  # for interactiv plots
from Lab_functions import percentage_func
# import warnings
# warnings.filterwarnings(action="ignore")

import plotly.io as pio
pio.templates.default = "gridon"  # Sets "gridon" as default for all Plotly graphs


# Task 2
Warm up Vaccine Data
___

In [2]:
# read in data:
df_vaccine = pd.read_excel(
    "../Lab_Data/Vaccine.xlsx", sheet_name="Vaccinerade kommun och ålder"
)


In [3]:
# Initial information gathering.

# df_vaccine.info()  # Län_namn, Kommun_namn and Ålder are objects. rest are int or floats
df_vaccine.isnull().sum()  # Ammount of nulls are: Antal 3 doser 580, Antal 4 doser 2030, Andel 3 doser 580, Andel 4 doser 2030


Län                       0
Län_namn                  0
Kommun                    0
Kommun_namn               0
Ålder                     0
Befolkning                0
Antal minst 1 dos         0
Antal minst 2 doser       0
Antal 3 doser           580
Antal 4 doser          2030
Andel minst 1 dos         0
Andel minst 2 doser       0
Andel 3 doser           580
Andel 4 doser          2030
dtype: int64

In [4]:
df_vaccine.rename(
    columns={"Ålder": "Age group"}, inplace=True
)  # Renaming Ålder into Age group.
df_vaccine.replace(
    {"90 eller äldre": "90 or older"},
    inplace=True,
)


a) How many regional Regional Councils are represented in the data set.

In [5]:
len(df_vaccine["Län"].drop_duplicates().value_counts())
# answer = 21


21

b) How many municipalitys are represented in the data set?

In [6]:
len(df_vaccine["Kommun"].drop_duplicates().value_counts())
# Answer is 290 Kommuner.


290

c) How large is the population in the data set?

In [7]:
sheet_population = df_vaccine["Befolkning"].sum()
# Answer is 9 092 790
print(f"Population according to sheet = {sheet_population}")


Population according to sheet = 9092790


d) Calculate how many children that are under 18 years old in Sweden based on this data sheet.

In [8]:
# Håkan helped me with this code, my previous attempt was a for loop. Changed into this instead due to efficency.
sheet_population_under_18 = df_vaccine[
    (df_vaccine["Age group"] == "12-15") | (df_vaccine["Age group"] == "16-17")
]["Befolkning"].sum()
print(
    f"Amount of children under the age of 18 represented in sheet = {sheet_population_under_18}"
)


Amount of children under the age of 18 represented in sheet = 745370


d)2 Calculate how many children under the age of 18 when compared to actual data for population in sweden.

In [9]:
# Calculating how many children are below 12:
swedish_population = 10452326  # Source: https://www.scb.se/hitta-statistik/sverige-i-siffror/manniskorna-i-sverige/sveriges-befolkning/
age_0_11 = (
    swedish_population - df_vaccine["Befolkning"].sum()
)  # Calculates value for how many children between 0-11
print(
    f"Amount of children in age group 0-11: {age_0_11}. Total population: {sheet_population + age_0_11}"
)

swedish_population_all = df_vaccine.append(
    {"Age group": "0-11", "Befolkning": age_0_11}, ignore_index=True
)
# Creating a new variable with all agegroups
swedish_population_all.sort_values(
    by="Age group", ascending=True, inplace=True
)  # Sorts them so 0-11 is displayed first


Amount of children in age group 0-11: 1359536. Total population: 10452326


  swedish_population_all = df_vaccine.append(


In [10]:
swedish_population_under_18 = swedish_population_all[
    (swedish_population_all["Age group"] == "0-11")
    | (swedish_population_all["Age group"] == "12-15")
    | (swedish_population_all["Age group"] == "16-17")
]["Befolkning"].sum()
print(f"Real amount of children under the age of 18 = {swedish_population_under_18}")


Real amount of children under the age of 18 = 2104906


e) Draw a diagram that shows the age distribution.

In [11]:
fig = px.histogram(
    swedish_population_all,
    x="Age group",
    y="Befolkning",
    title="Age distribution Sweden 2022",
    width=1080,  # Sets the size so that the graph is consistent for every user that runs the program
    height=540,
).update_layout(yaxis_title="Population in millions")
# https://plotly.com/python/styling-plotly-express/
fig.add_annotation(  # adds an line and text to the barplot.
    text="Highest value",
    x=2.6,
    y=1505000,
)

# TODO SAVE FILE


1 f) Draw a barplot for: **Individuals with atleast one dose, two doses and three doses per regional council**

In [12]:
# 2)f
vaccine_lan = df_vaccine.groupby("Län")
befolkning_sum = vaccine_lan["Befolkning"].sum()

fig = px.bar(
    y=[
        round((vaccine_lan["Antal minst 1 dos"].sum() / befolkning_sum)*100), # precentage of vaccine lan.
        round(vaccine_lan["Antal minst 2 doser"].sum() / befolkning_sum*100),
        round(vaccine_lan["Antal 3 doser"].sum() / befolkning_sum*100),
    ],
    title="Amount of doses per Regional Council",
    labels={"value": "Percentage of Population Vaccinated", "x": "Regional Council", "variable": "Dose"},
    x=df_vaccine["Län_namn"].unique(),
    width=1080,  # Sets the size so that the graph is consistent for every user that runs the program
    height=540,
    barmode="group"
)
bar_names = {
    "wide_variable_0": "Dose 1",
    "wide_variable_1": "Dose 2",
    "wide_variable_2": "Dose 3",
}

# Source https://stackoverflow.com/questions/64371174/plotly-how-to-change-variable-label-names-for-the-legend-in-a-plotly-express-li
fig.for_each_trace(
    lambda t: t.update(
        name=bar_names[t.name],
        legendgroup=bar_names[t.name],
        hovertemplate=t.hovertemplate.replace(t.name, bar_names[t.name]),
    )
)

fig.show()
#TODO SAVE FILE

2 f) Take Västra Götalands regional council and Stockholms regional council and draw a barplot over proportion of atleast one, two, three and four doses.

In [21]:
vaccin_vg_sthlm = df_vaccine[
    (df_vaccine["Län_namn"] == "Västra Götalands län") 
    | (df_vaccine["Län_namn"] == "Stockholms län")
]
vaccin_city_group = vaccin_vg_sthlm.groupby("Län")
# Translating the regional councils that will be displayed in graph
vaccin_vg_sthlm.replace(
    {"Stockholms län": "Stockholm", "Västra Götalands län": "Gothenburg"}, 
    inplace=True,
)

dose_1 = (
    round(vaccin_city_group["Antal minst 1 dos"].sum() / vaccin_city_group["Befolkning"].sum()*100)
)
dose_2 = (
    round(vaccin_city_group["Antal minst 2 doser"].sum()
    / vaccin_city_group["Befolkning"].sum()*100)
)
dose_3 = (
    round(vaccin_city_group["Antal 3 doser"].sum() / vaccin_city_group["Befolkning"].sum()*100)
)
dose_4 = (
    round(vaccin_city_group["Antal 4 doser"].sum() / vaccin_city_group["Befolkning"].sum()*100)
)




fig = px.bar(
    x=vaccin_vg_sthlm.Län_namn.unique(),
    y=[dose_1, dose_2, dose_3, dose_4],
    labels={
        "value": "Population in Percent",
        "variable": "Dosage",
        "x": "Swedish Cities",
    },
    barmode="group",
    width=1080,  # Sets the size so that the graph is consistent for every user that runs the program
    height=540,
    title="Persons vaccinated per city",
    text_auto=True,
)
fig.update_layout(  # https://plotly.com/python/setting-graph-size/
    margin=dict(
        l=40, r=50, t=60, b=70
    ),  # Changes the margins to make the graph look the for everyone who runs the program

)

test = {
    "wide_variable_0": "Dose 1",
    "wide_variable_1": "Dose 2",
    "wide_variable_2": "Dose 3",
    "wide_variable_3": "Dose 4",
}
fig.for_each_trace(
    lambda t: t.update(
        name=test[t.name],
        legendgroup=test[t.name],
        hovertemplate=t.hovertemplate.replace(t.name, test[t.name]),
    )
)


fig.show()
# ADD TEXT TO BARS, OR ANNOTATIONS

# TODO SAVE FILE



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

