# Education in Slovakia

## Import packages

In [None]:
#%cd mathprofi-education

In [None]:
import numpy as np

import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from dash import Dash, dcc, html, Input, Output, State, ALL
from dash.exceptions import PreventUpdate
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from IPython.display import display

import preprocessing

import warnings
# TODO: uncomment before publishing
#warnings.filterwarnings('ignore')

## Styling

To change plotly theme to dark, just change the `PLOTLY_DARK_THEME` constant to `True`.

In [None]:
PLOTLY_DARK_THEME = False

if PLOTLY_DARK_THEME:
    style = {
        "background-color": "#1b1b1b",  # rgb(27, 27, 27)
        "color": "white",  # font
    }
    pio.templates.default = "plotly_dark"
else:
    style = {}
    pio.templates.default = "plotly"

## Import data

In [None]:
#TODO: change links before publishing
#data_root = 'https://raw.githubusercontent.com/AndruKP/mathprofi-education/dev/data'
data_root = './data'
url_010_SR = data_root + '/RV_O_010_R_SR_SK.CSV'
url_040_SR = data_root + '/RV_O_040_R_SR_SK.CSV'
url_040_OK = data_root + '/RV_O_040_L_OK_SK.CSV'
url_047_SR = data_root + '/RV_O_047_R_SR_SK.CSV'
url_047_OK = data_root + '/RV_O_047_L_OK_SK.CSV'
url_067_SR = data_root + '/RV_O_067_R_SR_SK.CSV'
url_067_KR = data_root + '/RV_O_067_R_KR_SK.CSV'
url_067_OK = data_root + '/RV_O_067_L_OK_SK.CSV'

districts_url = data_root + '/districts.json'
districts_geojson_url = data_root + '/districts.geojson'

In [None]:
table_010_SR = pd.read_csv(url_010_SR, sep=';')
table_040_SR = pd.read_csv(url_040_SR, sep=';')
table_040_OK = pd.read_csv(url_040_OK, sep=';')
table_047_SR = pd.read_csv(url_047_SR, sep=';')
table_047_OK = pd.read_csv(url_047_OK, sep=';')
table_067_SR = pd.read_csv(url_067_SR, sep=';')
table_067_KR = pd.read_csv(url_067_KR, sep=';')
table_067_OK = pd.read_csv(url_067_OK, sep=';')

districts_table = gpd.read_file(districts_url)
districts_geojson = gpd.read_file(districts_geojson_url, crs="EPSG:4326")

display(table_010_SR.head())
display(table_040_SR.head())
display(table_040_OK.head())
display(table_047_SR.head())
display(table_047_OK.head())
display(table_067_SR.head())
display(table_067_KR.head())
display(table_067_OK.head())

display(districts_table.head())
display(districts_geojson.head())

## Preprocess data

To prepare geographic data, let's set the index, merge, and rename columns:

In [None]:
districts_geojson_indexed = districts_geojson.set_index("IDN3")
districts_indexed = districts_table.set_index("IDN3")
districts_indexed[["geometry", "Area", "AreaHA"]] = districts_geojson_indexed[
    ["geometry", "Shape_Area", "VYMERA_ha"]
]
geo_frame = districts_indexed
geo_frame['NUTS1_CODE'] = 'SK0'
geo_frame['NUTS1'] = 'Slovenská republika'

To preprocess data (rename columns, group education by level, etc.), let's use the defined ```preprocessing``` function ```preprocess``` (do not confuse it with the existing module ```preprocessing``` for text preprocessing):

In [None]:
preprocess_tables = [table_010_SR, table_040_OK, table_040_SR, table_047_OK,
                     table_047_SR, table_067_OK, table_067_KR, table_067_SR]

for table in preprocess_tables:
    preprocessing.preprocess(table)

display('table_010_SR:')
display(table_010_SR.head())
display('table_040_SR:')
display(table_040_SR.head())
display('table_040_OK:')
display(table_040_OK.head())
display('table_047_SR:')
display(table_047_SR.head())
display('table_047_OK:')
display(table_047_OK.head())
display('table_067_SR:')
display(table_067_SR.head())
display('table_067_KR:')
display(table_067_KR.head())

## Basic demographic analysis

### Population pyramid

To create population pyramid, data should be grouped by age and sex:

In [None]:
grouped_by_sex_age = table_040_SR.groupby(['sex', 'age'])['count'].sum()
women_counts = grouped_by_sex_age.loc['female']
men_counts = grouped_by_sex_age.loc['male']

min_counts = np.minimum(men_counts, women_counts)

To create a graph, where male and female bars are in opposite directions, negative values for men are used. Also, using  ```matplotlib.mpatches``` helps to create the legend.

In [None]:
figure, axes = plt.subplots(figsize=(10, 10))
colors_ = sns.color_palette("Paired")

# Surplus
women_surplus_graph = sns.histplot(y=women_counts.index,
                                   weights=women_counts,
                                   binwidth=1,
                                   color=colors_[5],
                                   ax=axes,
                                   label='women_s')

men_surplus_graph = sns.histplot(y=men_counts.index,
                                 weights=-1 * men_counts,
                                 binwidth=1,
                                 color=colors_[1],
                                 ax=axes,
                                 label='men_s')

# Population
women_pop_graph = sns.histplot(y=min_counts.index,
                               weights=min_counts,
                               binwidth=1,
                               color=colors_[4],
                               ax=axes,
                               label='women')

men_pop_graph = sns.histplot(y=min_counts.index,
                             weights=-1 * min_counts,
                             binwidth=1,
                             color=colors_[0],
                             ax=axes,
                             label='men')

plt.title('Population pyramid')
plt.ylabel('Age')
plt.xticks([-40000, -20000, 0, 20000, 40000],
           labels=['40000', '20000', '0', '20000', '40000'])

men_pop_patch = mpatches.Patch(color=colors_[0], label='Men population')
men_surplus_patch = mpatches.Patch(color=colors_[1], label='Men surplus')
women_pop_patch = mpatches.Patch(color=colors_[4], label='Women population')
women_surplus_patch = mpatches.Patch(color=colors_[5], label='Women surplus')

axes.legend(handles=[women_surplus_patch, women_pop_patch, men_surplus_patch, men_pop_patch])

pass

## Education analysis

### Education specialization

To see how many people have some level of education, the data should be grouped by education category:

In [None]:
education_series = (table_010_SR
                    .query('`education_category` != "unspecified"')
                    .groupby('education_category')['count']
                    .sum())

counts_df = pd.DataFrame({'education_category': education_series.index,
                          'count': education_series.values})

In [None]:
figure, axes = plt.subplots(figsize=(10, 10))
sns.barplot(data=counts_df,
            x='education_category',
            y='count',
            color=sns.color_palette(as_cmap=True)[0],
            order=['primary', 'secondary', 'vocational', 'higher', 'without'],
            ax=axes)

axes.set_xlabel('Achieved education')
axes.set_ylabel('Count')
plt.ylim(0)

plt.title('The number of people by achieved education')
plt.show()

### Geographic specialization of education

To create graphs below, data should be specifically grouped. The first function computes groups in the next way:

1. Queries the dataset and filters out some data (`filter_query`)
2. Queries the dataset and chooses data for the next calculation (`chosen_query`)
3. Groups dataset by `groupby` column
4. Calculates number of people and 2 types of percent:
    * `percent` means $\frac{\text{number of selected people with specific entry in `groupby` column}}{\text{number of all people with specific entry in `groupby` column}} \cdot 100\%$
    * `number_percent` means $\frac{\text{number of  selected people with specific entry in `groupby` column}}{\text{number of all selected people}} \cdot 100\%$

Let's note, that `number_percent` is used only in treemap graph in the last part of notebook.

To better understand the difference between these methods, let's ilustrate with one example. Let `groupby="LAU1_CODE"`, `chosen_query="education_category.isin(['higher'])"`. Then values for `LAU1_CODE = "SK01"` equal:
$$\text{percent} = \frac{\text{number of people with higher education in SK01}}{\text{number of all people in SK01}} \cdot 100\%$$
and
$$\text{number_percent} = \frac{\text{number of people with higher education in SK01}}{\text{number of all people with higher education}} \cdot 100\%$$

Also, we can define function, that can draw all graphs with respect to given `groupby` and queries. For geographic columns like district, it creates choropleth map. In other cases, it draws a bar graph when the value is percent, and treemap otherwise.

In [None]:
# dash_geo functions
def compute_groups(data, groupby, chosen_query="", filter_query=""):
    if filter_query != "":
        data = data.query(filter_query)
    if chosen_query != "":
        selected = data.query(chosen_query)
    else:
        selected = data
    aggregated = (selected
                  .groupby(groupby, observed=True)
                  ["count"]
                  .sum()
                  .rename("number")
                  .to_frame()
                  )

    aggregated["total"] = data.groupby(groupby, observed=True)["count"].sum()

    aggregated["number_percent"] = aggregated["number"] / selected["count"].sum() * 100
    aggregated["percent"] = aggregated["number"] / aggregated["total"] * 100
    aggregated = aggregated.reset_index()
    return aggregated


def plot_groups(data, groupby, value, title=""):
    figure = None
    if groupby not in ["NUTS1_CODE", "NUTS2_CODE", "NUTS3_CODE", "LAU1_CODE", "LAU2_CODE"]:
        data = data.sort_values(by=value, ascending=False)
        if value == "percent":
            figure = px.bar(
                data,
                y=groupby,
                color=groupby,
                x="percent",
                orientation="h",
                hover_data=["number", "percent"],
            )
        elif value == "number":
            figure = px.treemap(
                data,
                path=[px.Constant("all"), groupby],
                values="number",
                hover_data=["number", "number_percent"],
            )
    else:
        merged = geo_frame.merge(data, on=groupby)
        figure = px.choropleth_mapbox(
            data_frame=merged,
            geojson=merged.geometry,
            locations=merged.index,
            color=value,
            mapbox_style="carto-positron",
            center={"lat": 48.6737532, "lon": 19.696058},
            zoom=6,
            opacity=0.5,
            hover_name="LAU1",
            hover_data={"number":True, "percent":':.2f', "AreaHA":True}
        )
        
    figure.update_layout(title=title)
    return figure

#### Where are people in Slovakia?'

In [None]:
data = compute_groups(table_040_OK, groupby="LAU1_CODE")
figure = plot_groups(data, groupby="LAU1_CODE", value="number")
figure.update_layout(coloraxis_colorbar=dict(title='Number of people'))
figure.show()

That will help us to understand the following data
#### Where is the largest number of people with a degree?

In [None]:
data = compute_groups(table_040_OK,
                      groupby="LAU1_CODE",
                      chosen_query="`education_category`.isin(['higher'])")

In [None]:
figure = plot_groups(data,
                     groupby="LAU1_CODE",
                     value="number",
                     title="Number of people achieved higher education")
figure.show()

So after all it is just about the big cities  
#### Where is the largest *percent* of people with a degree?

In [None]:
figure = plot_groups(data,
                     groupby="LAU1_CODE",
                     value="percent",
                     title="Percent of people achieved higher education")
figure.show()

Now if we were to be aliens we would be assured it is more to Bratislava and Košice than we thought before
#### Where is the largest number of students?

In [None]:
data = compute_groups(table_040_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`current_economic_activity`.isin(['študent vysokej školy'])")

In [None]:
figure = plot_groups(data, groupby='LAU1_CODE', value='number')
figure.show()

Same story here
#### Percent?

In [None]:
figure = plot_groups(data, groupby='LAU1_CODE', value='percent')
figure.show()

Interesting
#### Where is the largest percent of people continuing their education among people after 22

In [None]:
data = compute_groups(table_040_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`education_category`.isin(['higher']) and `current_economic_activity`.isin(['študent vysokej školy'])",
                      filter_query='22 <= `age` < 91')

In [None]:
figure = plot_groups(data, groupby='LAU1_CODE', value='percent')
figure.show()

So, where does young (from 18 years to 25 years) people live?

In [None]:
data = compute_groups(table_040_OK,
                      groupby='LAU1_CODE',
                      chosen_query="18<=age<=25"
                      )

In [None]:
figure = plot_groups(data, groupby='LAU1_CODE', value='percent')
figure.show()

#### Where degree means less?  
Where your diploma with some chance won't give you a desired *type* of job?  
It depends on what you want. If you thinking first three ISCO major groups then here you are

In [None]:
data = compute_groups(table_047_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`education_category`.isin(['higher']) and `ISCO_group`.isin(['Craft and related trades workers', 'Elementary occupations', 'Plant and machine operators and assemblers', 'Skilled agricultural and fishery workers', 'Armed forces', 'Service workers and shop and market sales workers', 'Clerks'])",
                      filter_query='')

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people with higher education working on a simple job')
figure.show()

If working as a clerk suits you

In [None]:
data = compute_groups(table_047_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`education_category`.isin(['higher']) and `ISCO_group`.isin(['Craft and related trades workers', 'Elementary occupations', 'Plant and machine operators and assemblers', 'Skilled agricultural and fishery workers', 'Armed forces', 'Service workers and shop and market sales workers'])",
                      filter_query='')

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people with higher education working on a simple job')
figure.show()

If job as a service worker or shop and market sales worker works for you as well

In [None]:
data = compute_groups(table_047_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`education_category`.isin(['higher']) and `ISCO_group`.isin(['Craft and related trades workers', 'Elementary occupations', 'Plant and machine operators and assemblers', 'Skilled agricultural and fishery workers', 'Armed forces'])",
                      filter_query='')

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people with higher education working on a simple job')
figure.show()

At this point it is not so bad. Let's look at something different.  
#### What percent of people with higher education works in occupation group, in which 60% of workers do not have higher education?

In [None]:
grouped = (table_047_SR
           .groupby(["ISCO_group", "education_category"])["count"]
           .sum()
           .reset_index()
           )

grouped["percent"] = (grouped
                      .groupby("ISCO_group")["count"]
                      .transform(lambda x: x / x.sum())
                      )

In [None]:
figure = px.bar(
    grouped,
    x="percent",
    y="ISCO_group",
    color="education_category",
    orientation="h",
    barmode="relative",
    title="Distrubution of ISCO groups by education",
    labels={"education_category": "Education", "ISCO_group": ""},
    category_orders={
        "ISCO_group": [
            "Legislators, senior officials and managers",
            "Professionals",
            "Technicians and associate professionals",
            "Clerks",
            "Service workers and shop and market sales workers",
            "Skilled agricultural and fishery workers",
            "Craft and related trades workers",
            "Plant and machine operators and assemblers",
            "Elementary occupations",
            "Armed forces",
            "unspecified"
        ],
        "education_category": [
            "higher",
            "vocational",
            "secondary",
            "primary",
            "without",
            "unspecified",
        ],
    },
)
figure.update_layout(
    autosize=False,
    width=1400,  # Adjust the width as needed
    height=600,  # Adjust the height as needed
    font=dict(size=18),  # Adjust the font size of the text
)
figure.update_xaxes(range=[0, 1])
# figure.update_traces(marker=dict(line=dict(width=4)))
figure.update_xaxes(tickformat=',.1%')
figure

In [None]:
data = compute_groups(table_047_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`ISCO_group`.isin(['Armed forces', 'Clerks', 'Craft and related trades workers', 'Elementary occupations', 'Plant and machine operators and assemblers', 'Service workers and shop and market sales workers', 'Skilled agricultural and fishery workers', 'Technicians and associate professionals'])",
                      filter_query="`education_category`.isin(['higher'])")

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people working on simple job among people with a degree'
                     )
figure.show()

### Gender specialization of education

#### Gender distribution of education by gender

To see, how education is distributed over sex, let's group data by sex and education, and then create a barplot.

In [None]:
education_by_sex = (table_040_SR
                    .groupby(['sex', 'education_category'])['count']
                    .sum()
                    .reset_index()
                    )

education_by_sex.sort_values(by='count', ascending=False, inplace=True)

In [None]:
figure, axes = plt.subplots(figsize=(10, 10))

sns.barplot(data=education_by_sex,
            y="education_category",
            x="count",
            hue="sex",
            ax=axes)

axes.set_xlabel('Count')
axes.set_ylabel('Education')
axes.set_title('Distribution of education')
pass

#### When the higher education began to open to women in Slovakia

To see age differences while dividing dataset by some other property, we can define  next functions. They work similar to the previous functions, but there are some differences, so let's describe, how they work.

`compute_age_based` computes a dataset grouped by age and some optional parameter, and performs the next steps:
1. Queries dataset by query (if query isn't trivial).
2. If `groupby` is `None` (which means that we're only grouping by age), then:
    1. Groups the filtered dataset by `'age'` columns and calculates the sum.
    2. Calculates the percent in one of 3 ways:
        * `category_percent` is the ratio of the number of people chosen by the query with a specific value of age to all people, chosen by the query.
        * `filtered_percent` is the ratio of the number of people chosen by query with a specific value of age to *all* people with this age.
        * `age_percent` is the ratio of the number of people chosen by query with a specific value of age to all people *chosen by query* with this age.
3. If `groupby` isn't `None`, then so do the same actions, but all `pd.DataFrame.groupby` functions are applied with `'age'` and `groupby` as parameters.




`plot_age_based` is just plotting a linegraph with specific parameters. 

In [None]:
# dash_age functions
def compute_age_based(data, query, groupby=None, filter_result=None):
    if query != "":
        chosen = data.query(query)
    else:
        chosen = data
        
    if groupby is None:
        aggregated = chosen.groupby(['age'])['count'].sum().reset_index(name='number')
        
        total_category = chosen['count'].sum()
        total_unfiltered = data.groupby(['age'])['count'].sum().rename('total_unfiltered')
        total_age = chosen.groupby(['age'])['count'].sum().rename('total_age')
        
        aggregated = pd.merge(aggregated, total_unfiltered, on=['age'], how='left')
        aggregated = pd.merge(aggregated, total_age, on=['age'], how='left')
        
        aggregated['category_percent'] = aggregated['number'] / total_category * 100
        aggregated['filtered_percent'] = aggregated['number'] / aggregated['total_unfiltered'] * 100
        aggregated['age_percent'] = aggregated['number'] / aggregated['total_age'] * 100
        
    else:
        aggregated = chosen.groupby(['age', groupby])['count'].sum().reset_index(name='number')
        
        total_category = chosen.groupby([groupby])['count'].sum().rename('total_category')
        total_unfiltered = data.groupby(['age', groupby])['count'].sum().rename('total_unfiltered')
        total_age = chosen.groupby(['age'])['count'].sum().rename('total_age')
        
        aggregated = pd.merge(aggregated, total_category, on=[groupby], how='left')
        aggregated = pd.merge(aggregated, total_unfiltered, on=['age', groupby], how='left')
        aggregated = pd.merge(aggregated, total_age, on=['age', ], how='left')
        
        aggregated['category_percent'] = aggregated['number'] / aggregated['total_category'] * 100
        aggregated['filtered_percent'] = aggregated['number'] / aggregated['total_unfiltered'] * 100
        aggregated['age_percent'] = aggregated['number'] / aggregated['total_age'] * 100
        
    if filter_result is not None and filter_result > 0:
        aggregated = aggregated[aggregated['number'] >= filter_result]
        
    return aggregated


def plot_age_based(
        data,
        groupby=None,
        title="",
        display_value="number",
        markers=False,
):
    figure = px.line(
        data, x="age", y=display_value, color=groupby, hover_data=["number"]
    )
    
    figure.update_layout(xaxis_title="Age", title=title)
    if display_value == 0:
        figure.update_layout(yaxis_title="Number of people")
    else:
        figure.update_layout(yaxis_title="Percent of people")
    figure.update_traces(
        mode="lines" + ("+markers" if markers else ""),
        connectgaps=True,
    )
    return figure

In [None]:
data = compute_age_based(table_040_SR,
                         query="`education_category`.isin(['higher']) and 0 <= `age` < 91",
                         groupby='sex',
                         filter_result=1)

In [None]:
figure = plot_age_based(data,
                        groupby='sex',
                        title='Percent of people with a degree',
                        display_value='filtered_percent',
                        markers=False)
figure.show()

Women started to getting higher education on the same level as man only about (60 - 18) = 42 years ago, so in year around 1970

In [None]:
data = compute_age_based(table_040_OK,
                         query="18 <= `age` < 90 and `education_category`.isin(['primary', 'without'])",
                         groupby='sex',
                         filter_result=1)

In [None]:
figure = plot_age_based(data,
                        groupby='sex',
                        title='Percent of people with primary education or without',
                        display_value='filtered_percent',
                        markers=False)
figure.show()

### Adult illiteracy

#### Geographic aspect of illiteracy

Here is some trivial calculations to select all people that are older 14, haven't any education and not studying.

In [None]:
districts_illiteracy = districts_table.set_index('LAU1_CODE')

table_OK_population = table_040_OK.groupby('LAU1_CODE')['count'].sum()

table_OK_uneducated = (table_040_OK
                       .query(
    'education == "bez školského vzdelania – osoby vo veku 15 rokov a viac" & current_economic_activity != "žiak základnej školy"')
                       .groupby(['LAU1_CODE'])['count']
                       .sum())

table_uneducated_percent = 100 * (table_OK_uneducated / table_OK_population)

geotable_uneducated = gpd.GeoDataFrame(table_OK_uneducated, geometry=districts_illiteracy['geometry'])
geotable_uneducated_percent = gpd.GeoDataFrame(table_uneducated_percent, geometry=districts_illiteracy['geometry'])

display(table_OK_population.head(2))
display(geotable_uneducated.head(2))
display(geotable_uneducated_percent.head(2))

Let's do some simple maps, one with absolute values, and the second one with percents:

In [None]:
plot = geotable_uneducated.plot(column='count',
                                legend=True,
                                legend_kwds={"orientation": "horizontal"},
                                cmap=sns.color_palette("flare", as_cmap=True))

plot.set_title("Number of people without education")
plot.set_axis_off()

plot = geotable_uneducated_percent.plot(column='count',
                                        legend=True,
                                        legend_kwds={"orientation": "horizontal"},
                                        cmap=sns.color_palette("flare", as_cmap=True))

plot.set_title("Percent of people without education")
plot.set_axis_off()

#### Employment of uneducated people

Let's select info about employment, divide into 3 parts and analyze it:

In [None]:
table_uneducated_by_isco = (table_067_SR
                            .query("education == 'bez školského vzdelania – osoby vo veku 15 rokov a viac'")
                            .groupby('ISCO_occupation')['count']
                            .sum()
                            .sort_values(ascending=False)
                            .reset_index())

undefined_occupation_count = table_uneducated_by_isco.query('ISCO_occupation == "nezistené"')['count'].sum()

inapplicable_occupation_count = table_uneducated_by_isco.query('ISCO_occupation == "neaplikovateľné"')['count'].sum()

applicable_occupation = (table_uneducated_by_isco
                         .query('ISCO_occupation != "nezistené" & ISCO_occupation != "neaplikovateľné"'))

applicable_occupation_count = applicable_occupation['count'].sum()
display(applicable_occupation.head())

In [None]:
plt.pie([undefined_occupation_count, inapplicable_occupation_count, applicable_occupation_count],
        labels=['unknown', 'inapplicable', 'applicable'],
        autopct='%1.1f%%')

plt.show()

In [None]:
plot = sns.barplot(data=applicable_occupation.head(10),
                   x='count',
                   y='ISCO_occupation',
                   color=sns.color_palette(as_cmap=True)[0])

plot.bar_label(plot.containers[0], fontsize=8, padding=3)
plot.set_xlabel('Number of people')
plot.set_ylabel('')
plt.subplots_adjust(left=0.5, right=1.6)

#### Age distribution of illiteracy

Similar, we can do age histogram:

In [None]:
table_SR_uneducated = (table_040_SR
                       .query('education == "bez školského vzdelania – osoby vo veku 15 rokov a viac" & current_economic_activity != "žiak základnej školy"'))

table_SR_uneducated

In [None]:
plot = sns.histplot(data=table_SR_uneducated, x="age", weights="count", binwidth=1)

plot.set_xlabel('Age')
plot.set_ylabel('Number of people')
plot.set_title('Number of people without education')

plt.xticks(rotation=45)
plt.show()

## Employment analysis

### Employment specialization

Let's copy the dataframe to do manipulations: 

In [None]:
table_067_KR_copy = table_067_KR.copy()

In [None]:
occupation_bar = table_067_KR_copy.dropna().groupby('ISCO_group')['count'].sum()

Occupation groups (barplot):

In [None]:
px.bar(y=occupation_bar.index,
       x=occupation_bar.values,
       labels={'x': 'count', 'y': 'occupation'},
       title='The number of people by type of occupation',
       orientation='h')

For Parcats graph we need group dataframe by education groups and ISCO groups:

In [None]:
help_for_par1 = (table_067_KR_copy
                 .query('`education_category` != "undefined"')
                 .groupby(['education_category', 'ISCO_group'])['count']
                 .sum())

fig = go.Figure(go.Parcats(
    dimensions=[{'label': 'education',
                 'values': [i[0] for i in help_for_par1.index.values]},
                {'label': 'occupation',
                 'values': [i[1] for i in help_for_par1.index.values]}],
    counts=help_for_par1.values
))

fig.update_layout(height=600, width=800)
fig.show()

### Sectoral structure of the economy

At first, filter out `undefined`:

In [None]:
table_067_KR_copy = table_067_KR_copy.query('`education_category` != "undefined"')
table_067_KR_copy = table_067_KR_copy.query('`NACE_group` != "undefined"')

After that, calculate the number of people in one economic sector and in one economic sector with specific education category.

In [None]:
sector_sum = table_067_KR_copy.groupby('NACE_group')['count'].sum()
sector_sum_edu = table_067_KR_copy.groupby(['NACE_group', 'education_category'])['count'].sum()

After we can map the following numbers in the dataframe and calculate percents: 

In [None]:
table_067_KR_copy['count_for_sector'] = (table_067_KR_copy['NACE_group']
                                         .map(sector_sum)
                                         .astype('int64'))

table_067_KR_copy['count_edu_sector'] = table_067_KR_copy.apply(
    lambda x: sector_sum_edu[x['NACE_group'], x['education_category']],
    axis=1)

table_067_KR_copy['percentage'] = (table_067_KR_copy['count_edu_sector']
                                   / table_067_KR_copy['count_for_sector']
                                   * 100)

For every economic sector let's create graph, which tells education distribution in this sector:

In [None]:
tmp_sectors = table_067_KR_copy['NACE_group'].unique()
# takes != 'without' because it looks like 0 percent on graph
tmp_education = (table_067_KR_copy
                 [table_067_KR_copy['education_category'] != 'without']
                 ['education_category']
                 .unique())

starts = pd.Series([0] * tmp_sectors.shape[0])

figure, axes = plt.subplots(figsize=(10, 6))

for group in tmp_education:
    tmp_data = (table_067_KR_copy
                .query("`education_category` == @group")
                .drop_duplicates('count_edu_sector'))

    rectangles = axes.barh(y=tmp_data['NACE_group'],
                           width=tmp_data['percentage'],
                           left=starts,
                           label=group)

    axes.bar_label(rectangles, label_type='center', fmt="%.0f%%")
    starts += tmp_data['percentage'].reset_index(drop=True)

axes.set_title('Economic sectors + Level of education')
axes.legend(bbox_to_anchor=(1, 1), loc=2)
axes.xaxis.set_visible(False)
axes.set_frame_on(False)
pass

Now let's change our perspective - for each education group create a graph with distribution of this education over all economic sectors:

In [None]:
table_for_pie = table_067_KR_copy.drop_duplicates(['education_category', 'NACE_group', 'count_for_sector'])

In [None]:
figure, axes = plt.subplots(figsize=(8, 5))
axes.pie(x=table_for_pie.query('`education_category` == "without"')['count_edu_sector'],
         labels=table_for_pie.query('`education_category` == "without"')['NACE_group'],
         autopct="%.1f%%")

axes.set_title('People without education by economic sectors')
pass

In [None]:
figure, axes = plt.subplots(figsize=(8, 5))
axes.pie(x=table_for_pie.query('`education_category` == "higher"')['count_edu_sector'],
         labels=table_for_pie.query('`education_category` == "higher"')['NACE_group'],
         autopct="%.1f%%")

axes.set_title('People with higher education by economic sectors')
pass

In [None]:
figure, axes = plt.subplots(figsize=(8, 5))
axes.pie(x=table_for_pie.query('`education_category` == "primary"')['count_edu_sector'],
         labels=table_for_pie.query('`education_category` == "primary"')['NACE_group'],
         autopct="%.1f%%")

axes.set_title('People with primary education by economic sectors')
pass

### Geographic specialization of economy

As in previous graphs, let's create choropleth graphs:

In [None]:
data = compute_groups(table_067_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`NACE_group`.isin(['primary'])",
                      filter_query='')

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='number',
                     title='Number of people working in primary economic sector')
figure.show()

Why there is so many people in Prievidza?

In [None]:
prievidza_primary_sector = table_067_OK[(table_067_OK['district_name'] == 'Okres Prievidza') & (table_067_OK['NACE_group'] == 'primary')]

prievidza_primary_sector = (prievidza_primary_sector
                            .groupby('NACE_section')
                            ['count']
                            .sum()
                            .reset_index())

In [None]:
plt.pie(prievidza_primary_sector['count'],
        labels = prievidza_primary_sector['NACE_section'],
        autopct="%.1f%%")
pass

Our assumption - combination of 2 factors: "Nestle" (agronomy) and "Hornonitrianske bane Prievidza" (mining and quarrying)

The primary sector of economics is small in comparison to secondary and tertiary, that's why number data is more valuable here. Let's look on percents:

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people working in primary economic sector')
figure.show()

Now let's do the same for secondary and tertiary sectors:

In [None]:
data = compute_groups(table_067_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`NACE_group`.isin(['secondary'])",
                      filter_query='')

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='number',
                     title='Number of people working in secondary economic sector')
figure.show()

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people working in secondary economic sector')
figure.show()

In [None]:
data = compute_groups(table_067_OK,
                      groupby='LAU1_CODE',
                      chosen_query="`NACE_group`.isin(['tertiary'])",
                      filter_query='')

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='number',
                     title='Number of people working in tertiary economic sector')
figure.show()

In [None]:
figure = plot_groups(data,
                     groupby='LAU1_CODE',
                     value='percent',
                     title='Percent of people working in tertiary economic sector')
figure.show()

### Analysis of unemployment

Let's take a look on current economic activity:

In [None]:
table_040_SR['current_economic_activity'].unique()

We will use the classic definition of unemployment: $u = \frac{U}{U+E} \cdot 100\%$, where $E$ is the number of employed people and $U$ is the number of unemployed. Note, that unemployed workers, by the ILO definition, are those not working but willing, available, and actively searching for work.

So, we will be interested in workers (excluding retiree) (`pracujúci (okrem dôchodcov)`), a working retiree  (`pracujúci dôchodca`) and the unemployed (`nezamestnaný`).

Let's define function to calculate the number of people with the same age:

In [None]:
def calculate_counts(data):
    return (data
            .groupby('age')
            .agg({'count': 'sum'})
            .rename(columns={'count': 'total_count'})
            .reset_index())

Now we can create dataframe, in which entries in 1 row are corresponding to the number of employed or unemployed people with one specific age:

In [None]:
working_idx = table_040_SR['current_economic_activity'].isin(['pracujúci (okrem dôchodcov)', 'pracujúci dôchodca'])
unemployed_idx = table_040_SR['current_economic_activity'].isin(['nezamestnaný'])

not_working_groups = table_040_SR[unemployed_idx]
working_age_groups = table_040_SR[working_idx]

unemployed_table = calculate_counts(not_working_groups)
employed_table = calculate_counts(working_age_groups)

employed_table['Economic status'] = 'employed'
unemployed_table['Economic status'] = 'unemployed'

labor_forces = pd.concat([unemployed_table, employed_table])
labor_forces.sample(5)

In [None]:
plt.figure(figsize=(10, 6))

sns.histplot(data=labor_forces, 
             x='age', 
             weights='total_count', 
             hue='Economic status', 
             multiple="stack", 
             binwidth=5, 
             hue_order=['unemployed', 'employed'],
             palette=sns.color_palette()[:2][::-1]) # [:2][::-1] fixes yellow-blue order (hue_order otherwise messes with stack order, which is ugly) :(

plt.xlabel('Age')
plt.ylabel('Total Count')
plt.title('Employed and Unemployed Counts by Age')
plt.legend(labels=['Employed', 'Unemployed'])
pass

Now let's see, how many are unemployed men and women:

In [None]:
colors = sns.color_palette('pastel')
not_working_groups_gender = (not_working_groups[(not_working_groups['age'] <= 70)
                                                & (not_working_groups['age'] >= 20)])

not_working_groups_gender = (not_working_groups_gender
                             .groupby('sex')['count']
                             .sum()
                             .reset_index())

plt.figure(figsize=(6, 6))
plt.pie(x=not_working_groups_gender['count'],
        labels=not_working_groups_gender['sex'],
        autopct='%1.1f%%',
        colors=colors)

plt.title('Sex Distribution over Unemployment from 20 to 70', fontweight='bold')
plt.show()
pass

Let's add age categorization by introducing 5-year-groups:

In [None]:
def categorize_age(age):
    age_int = int(age)
    if age_int >= 90:
        return '90 and greater'
    else:
        group_start = (age_int // 5) * 5
        return f'{group_start} to {group_start + 4}'

In [None]:
not_working_groups['5_year_groups'] = not_working_groups['age'].apply(categorize_age)

Now we can add interactive graph to see, how employment depends on age and education.

`update_plot` is a function, which aggregates data in appropriate way: filters out data to restrict dataframe with only specific age groups, after groups by age and education level, sorts and draws: 

In [None]:
app = Dash(__name__)

age_group_options = [{'label': age_group, 'value': age_group} for age_group in
                     not_working_groups['5_year_groups'].unique()]

app.layout = html.Div([
    html.H1("Education vs. Unemployment by Age Group"),
    html.Label("Select Age Group(s)"),
    dcc.Dropdown(
        id='age-group-dropdown',
        options=age_group_options,
        value=[age_group_options[0]['value']],
        multi=True
    ),
    dcc.Graph(id='education-vs-unemployment-plot', style={'height': '500px'})
])


@app.callback(
    Output('education-vs-unemployment-plot', 'figure'),
    [Input('age-group-dropdown', 'value')]
)
def update_plot(selected_age_groups):
    filtered_data = not_working_groups[not_working_groups['5_year_groups'].isin(selected_age_groups)]

    aggregated_data = (filtered_data
                       .groupby(['education_category', '5_year_groups'])['count']
                       .sum()
                       .reset_index())

    sorted_education = (aggregated_data
                        .groupby('education_category')['count']
                        .sum()
                        .sort_values(ascending=False)
                        .index)

    fig = px.bar(aggregated_data,
                 x='education_category', y='count',
                 color='5_year_groups',
                 barmode='group',
                 title='Education vs. Unemployment by Age Group',
                 labels={'count': 'Total Count'},
                 category_orders={'education_category': sorted_education})
    return fig


if __name__ == '__main__':
    app.run_server(debug=True, port=8045)

## Interactive section

While analysing data, our team produced interactive graphs.

Let's start with graph that shows occupations by education level with economic sectors division.

Implementation is not so hard, data is just grouped by ISCO and NACE groups and filtered out by some education level.

In [None]:
app = Dash(__name__)
COLORS_DICTIONARY = {'other activities': 'orange',
                     'primary': 'royalblue',
                     'secondary': 'green',
                     'tertiary': 'red'}

app.layout = html.Div([
    html.Div(children=[
        html.Label('Education: '),
        dcc.Dropdown(['Without education', 'Primary education', 'Secondary education','Vocational education', 'Higher education'],
                     'Primary education', id='education-type')]),

    html.Div(children=[
        html.Label('Color by economic sector: '),
        dcc.RadioItems(['yes', 'no'], 'no', id='color-choice')
    ]),

    dcc.Graph(id='graph-content')
])


@app.callback(
    Output('graph-content', 'figure'),
    [
        Input('education-type', 'value'),
        Input('color-choice', 'value')
    ]
)
def update_figure(selected_education, color_c):
    education_dict = {
        'Without education': 'without',
        'Primary education': 'primary',
        'Secondary education': 'secondary',
        'Higher education': 'higher',
        'Vocational education': 'vocational'
    }

    education = education_dict.get(selected_education)

    education_subset = (table_067_KR_copy
                        .query('`education_category` == @education')
                        .groupby(['ISCO_group', 'NACE_group'])
                        ['count']
                        .sum())

    if color_c == 'yes':
        color_c = 'NACE_group'
        categories = {'NACE_group': ['primary', 'secondary', 'tertiary', 'other activities']}
    else:
        color_c = None
        categories = None

    education_subset = education_subset.reset_index()
    figure = px.histogram(education_subset,
                          x='count', y='ISCO_group',
                          width=900, height=600,
                          color=color_c,
                          orientation='h',
                          labels={'y': 'occupation', 'sum of count': 'count', 'color': 'economic sector'},
                          category_orders=categories)

    figure.update_layout(title_text='Occupations by level of education')
    #figure.update_layout(legend_traceorder="reversed")

    return figure


if __name__ == '__main__':
    app.run_server(debug=True, port=8051)

In [None]:
ATTR_SELECTOR_MAP = {
    # lambda for lazyness
    "category": lambda data, attr, type: dcc.Dropdown(
        # Mark selector element with type and attr to find then
        id={"type": type, "attr": attr},
        # attr must be of type `category`
        options=data[attr].cat.categories,
        persistence=True,
        multi=True,
    ),
    "int64": lambda data, attr, type: dcc.RangeSlider(
        id={"type": type, "attr": attr},
        min=data[attr].min(),
        max=data[attr].max() + 1,
        step=1,
        marks={
            i: str(i)
            for i in range(
                data[attr].min(),
                data[attr].max() + 1,
                (data[attr].max() - data[attr].min()) // 10,
            )
        },
        value=[data[attr].min(), data[attr].max() + 1],
        persistence=True,
    ),
}

ATTR_QUERY_EXPR_MAP = {
    # value of a Dropdown is an array of option values
    "category": lambda attr, entry: (f"`{attr}`.isin({entry})"),
    # value of a Range Slider is an array of two boundaries
    "int64": lambda attr, entry: f"{entry[0]} <= `{attr}` < {entry[1]}",
}

TEST_ATTR_FILTER_MAP = {
    "category": lambda entry: len(entry) > 0,
    "int64": lambda entry: len(entry) == 2,
}


def get_selectivity(data, attributes, type):
    return [
        html.Div(
            [
                html.H4(f"Select {data[attr].name}"),
                # Choose appropriate 'selector' according to attr type
                # Cause error by calling None in case of unmatched type
                ATTR_SELECTOR_MAP.get(str(data[attr].dtype).lower(), None)(
                    data, attr, type
                ),
            ]
        )
        for attr in attributes
    ]


def form_query(data, selectors, attrs):
    return " and ".join(
        [
            ATTR_QUERY_EXPR_MAP.get(str(data[attr].dtype).lower(), None)(attr, entry)
            for attr, entry in zip(attrs, selectors)
            if entry is not None
               and TEST_ATTR_FILTER_MAP.get(str(data[attr].dtype).lower(), None)(entry)
        ]
    )


def big_annotation(text: str, color: str):
    return dict(
        name="draft watermark",
        text=text.upper(),
        textangle=-30,
        opacity=0.1,
        font=dict(color=color, size=100),
        xref="paper",
        yref="paper",
        x=0.5,
        y=0.5,
        showarrow=False,
    )

In [None]:
table_names = [
    "RV_O_010_L_OK_SK.CSV",
    "RV_O_040_L_OK_SK.CSV",
    "RV_O_047_L_OK_SK.CSV",
    "RV_O_067_L_OK_SK.CSV",
]
interactive_tables = [pd.read_csv(data_root + '/' + table, sep=";") for table in table_names]

age_table = interactive_tables[1]

districts_indexed = geo_frame.set_index('LAU1_CODE')
for table in interactive_tables:
    preprocessing.preprocess(table)

    object_columns = [column for column in table.columns if table[column].dtype in ['object', 'string']]
    table[object_columns] = table[object_columns].astype('string')
    table[object_columns] = table[object_columns].astype('category')

    if "LAU1_CODE" in table:
        table.set_index("LAU1_CODE", inplace=True)
        table[["region_name", "NUTS3_CODE", "ecoregion_name", "NUTS2_CODE", "state_name", "NUTS1_CODE"]] = (
            districts_indexed[["NUTS3", "NUTS3_CODE", "NUTS2", "NUTS2_CODE", "NUTS1", "NUTS1_CODE"]]
        )
        table.reset_index(inplace=True)

### Age based plots

In [None]:
# %%script true # Skip

app = Dash(__name__)

figure = go.Figure()
figure.add_annotation(big_annotation("START", "black"))

app.layout = html.Div(
    [
        html.Div(
            [
                html.H4("Enter title"),
                dcc.Input(
                    id="title", type="text", persistence=True, style={"width": "100%"}
                ),
                html.H4("Select groupby"),
                dcc.Dropdown(
                    id="groupby",
                    options=["None"] + list(age_table.columns),
                    persistence=True,
                ),
                html.H4("Select display value"),
                dcc.Dropdown(
                    [
                        {"value": i, "label": l}
                        for i, l in enumerate(
                        [
                            "Number",
                            "Category percent",
                            "Filtered percent",
                            "Age percent",
                        ]
                    )
                    ],
                    0,
                    id="display-value",
                    persistence=True,
                ),
                html.H4("Enter lower bound"),
                dcc.Input(id="lower-bound", type="number", persistence=True),
                dcc.Checklist(
                    id="markers-checkbox",
                    options=[{"label": "Add markers", "value": True}],
                    value=[],
                    persistence=True,
                ),
                html.H4("Select chosen/percented attributes"),
                dcc.Dropdown(
                    id="chosen-attributes",
                    multi=True,
                    persistence=True,
                    options=age_table.columns,
                ),
                html.Div(id="choose-zone"),
            ],
            style={"flex": 1, "minWidth": 400, "padding": 10},
        ),
        html.Div(
            [
                dcc.Graph(
                    id="line-plot",
                    style={"aspect-ratio": "1.6"},
                    figure=figure,
                ),
                dcc.Textarea(id="function-call", style={"width": "100%"}),
            ],
            style={"flex": 2, "padding": 10},
        ),
    ],
    style=style | {"padding": 10, "display": "flex", "flexDirection": "row"},
)


@app.callback(
    Output("choose-zone", "children"),
    Input("chosen-attributes", "value"),
)
def update_chosen(chosen_attributes):
    if chosen_attributes is None:
        raise PreventUpdate
    return get_selectivity(age_table, chosen_attributes, type="chosen")


@app.callback(
    Output("line-plot", "figure"),
    Output("function-call", "value"),
    Input("groupby", "value"),
    Input("lower-bound", "value"),
    Input("title", "value"),
    Input("display-value", "value"),
    Input("markers-checkbox", "value"),
    Input({"type": "chosen", "attr": ALL}, "value"),
    State({"type": "chosen", "attr": ALL}, "id"),
)
def update_figure(groupby, lower_bound,
                  title,
                  display_value, checkbox, chosen, chosen_id):

    query = form_query(age_table, chosen, [a["attr"] for a in chosen_id])
    groupby = groupby if groupby != "None" else None
    compute_values = ["number", "category_percent", "filtered_percent", "age_percent"]
    display_value = compute_values[display_value]
    figure = go.Figure()
    try:
        data = compute_age_based(age_table, query, groupby, filter_result=lower_bound)
        figure = plot_age_based(data, groupby, title, display_value, len(checkbox) > 0)
    except Exception as e:
        figure.add_annotation(big_annotation("ERROR", "red"))
        print(e)

    return [
        figure,
        f"data = compute_age_based(age_table, {query=!r}, {groupby=!r}, filter_result={lower_bound!r})\nfigure = plot_age_based(data, {groupby=!r}, {title=!r}, {display_value=!r}, markers={len(checkbox) > 0!r})",
    ]


app.run_server(port=8053, use_reloader=True, debug=True)

### Grouped plots

In [None]:
# %%script true # Skip

app = Dash(__name__)

figure = go.Figure()
figure.add_annotation(big_annotation("START", "black"))

app.layout = html.Div(
    [
        html.Div(
            [
                html.H4("Enter title"),
                dcc.Input(
                    id="title", type="text", persistence=True, style={"width": "100%"}
                ),
                html.H4("Select table"),
                dcc.Dropdown(
                    id="table-index",
                    options=[
                        {"value": v, "label": l} for v, l in enumerate(table_names)
                    ],
                    persistence=True,
                ),
                html.H4("Select groupby"),
                dcc.Dropdown(
                    id="groupby",
                    persistence=True,
                ),
                html.H4("Select display value"),
                dcc.Dropdown(
                    ["number", "percent"],
                    "number",
                    id="display-value",
                    persistence=True,
                ),
                html.H4("Select chosen/percented attributes"),
                dcc.Dropdown(
                    id="chosen-attributes",
                    multi=True,
                    persistence=True,
                ),
                html.H4("Select filter attributes"),
                dcc.Dropdown(
                    id="filter-attributes",
                    multi=True,
                    persistence=True,
                ),
                html.H4("Choose zone"),
                html.Div(id="choose-zone"),
                html.H4("Filter zone"),
                html.Div(id="filter-zone"),
            ],
            style={"flex": 1, "minWidth": 400, "padding": 10},
        ),
        # html.Br(),
        html.Div(
            [
                dcc.Graph(
                    id="line-plot",
                    style={"aspect-ratio": "1.6"},
                    figure=figure,
                ),
                dcc.Textarea(id="function-call", style={"width": "100%"}),
            ],
            style={"flex": 2, "padding": 10},
        ),
    ],
    style=style | {"padding": 10, "display": "flex", "flexDirection": "row"},
)


@app.callback(
    Output("chosen-attributes", "options"),
    Output("filter-attributes", "options"),
    Output("groupby", "options"),
    Input("table-index", "value"),
)
def update_attributes(table_index):
    if table_index is None:
        raise PreventUpdate
    return [
        {
            column: f"{interactive_tables[table_index][column].dtype}: {column}"
            for column in interactive_tables[table_index].columns
        }
    ] * 3


# update filters and groupby
@app.callback(
    Output("choose-zone", "children"),
    Output("filter-zone", "children"),
    Input("chosen-attributes", "value"),
    Input("filter-attributes", "value"),
    State("table-index", "value"),
)
def update_fg(chosen_attributes, filter_attributes, table_index):
    if chosen_attributes is None or filter_attributes is None or table_index is None:
        raise PreventUpdate
    return [
        get_selectivity(interactive_tables[table_index], chosen_attributes, type="chosen"),
        get_selectivity(interactive_tables[table_index], filter_attributes, type="filter"),
    ]


@app.callback(
    Output("line-plot", "figure"),
    Output("function-call", "value"),
    Input("title", "value"),
    Input("display-value", "value"),
    Input("groupby", "value"),
    Input({"type": "chosen", "attr": ALL}, "value"),
    State({"type": "chosen", "attr": ALL}, "id"),
    Input({"type": "filter", "attr": ALL}, "value"),
    State({"type": "filter", "attr": ALL}, "id"),
    State("table-index", "value"),
)
def update_figure(
        title,
        display_value,
        groupby,
        chosen,
        chosen_id,
        filter,
        filter_id,
        table_index,
):
    if display_value is None or groupby is None or table_index is None:
        raise PreventUpdate

    data = interactive_tables[table_index]
    chosen_query = form_query(data, chosen, [a["attr"] for a in chosen_id])
    filter_query = form_query(data, filter, [a["attr"] for a in filter_id])
    figure = go.Figure()
    try:
        data = compute_groups(data, groupby, chosen_query, filter_query)
        figure: go.Figure = plot_groups(data, groupby, display_value, title)
    except Exception as e:
        figure.add_annotation(big_annotation("ERROR", "red"))
        print(e)
    return [
        figure,
        f"data = compute_groups({table_names[table_index]!r}, {groupby=!r}, {chosen_query=!r}, {filter_query=!r})\nfigure = plot_groups(data, {groupby=!r}, value={display_value!r}, {title=!r})",
    ]


app.run_server(port=8054, debug=True, use_reloader=True)