In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import scipy.stats
import geopandas as gpd
%matplotlib inline
from IPython.display import Markdown
from functools import reduce
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

In [None]:
if 1:
    style = {
        "background-color": "#1b1b1b",  # rgb(27, 27, 27)
        "color": "white",  # font
    }
    pio.templates.default = "plotly_dark"
else:
    style = None
    pio.templates.default = "plotly"

In [None]:
import sys

sys.path.append("..")

from os.path import join

from categorize_education import EDUCATION_CATEGORY_MAP
from isco_occupation import OCCUPATION_ISCO_MAP
import preprocessing

data_path = "../data"
tables = [
    preprocessing.translate_sex(
        preprocessing.rename_columns(pd.read_csv(join(data_path, table), sep=";"))
    )
    for table in [
        "RV_O_010_L_OK_SK.CSV",
        "RV_O_040_L_OK_SK.CSV",
        "RV_O_047_L_OK_SK.CSV",
        "RV_O_067_L_OK_SK.CSV",
    ]
]
table_10, table_40, table_47, table_67 = tables

districts_url = "https://bbrejova.github.io/viz/data/districts.json"
# https://raw.githubusercontent.com/drakh/slovakia-gps-data/master/GeoJSON/epsg_4326/districts_epsg_4326.geojson
districts_geojson_url = join(data_path, "districts.geojson")

table_40["age"] = pd.to_numeric(table_40["age"].replace({"90 a viac rokov": "90"}))
table_40["education_category"] = table_40["education"].map(EDUCATION_CATEGORY_MAP)
districts = gpd.read_file(districts_url)
districts_geojson = gpd.read_file(districts_geojson_url, crs="EPSG:4326")

In [None]:
display(table_10.columns)
display(table_40.columns)
display(table_47.columns)
display(table_67.columns)
display(districts.columns)
display(districts_geojson.columns)

In [None]:
districts_geojson_indexed = districts_geojson.set_index("IDN3")
districts_indexed = districts.set_index("IDN3")
districts_indexed[["geometry", "Area", "AreaHA"]] = districts_geojson_indexed[
    ["geometry", "Shape_Area", "VYMERA_ha"]
]
geo_frame = districts_indexed
table_40_indexed = table_40.set_index("LAU1_CODE")
districts_indexed = districts.set_index("LAU1_CODE")
table_40_indexed[["region_name", "NUTS3_CODE", "ecoregion_name", "NUTS2_CODE"]] = (
    districts_indexed[["NUTS3", "NUTS3_CODE", "NUTS2", "NUTS2_CODE"]]
)

In [None]:
centroid = districts_geojson.to_crs({"proj": "cea"}).centroid
fig = px.choropleth_mapbox(
    geo_frame,
    geojson=geo_frame.geometry,
    locations=geo_frame.index,
    color="AreaHA",
    mapbox_style="carto-positron",
    center={"lat": 48.6737532, "lon": 19.696058},
    zoom=6,
    opacity=0.5,
    labels={"color": "NM3"},
    hover_data=["NM3"],
)
fig.update_layout(height=300, margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()

In [None]:
def compute_groups(data, query, groupby, value, restriction=""):
    """
    Arguments
        value - number | percent
    """
    if restriction != "":
        data = data.query(restriction)
    if query != "":
        filtered = data.query(query)
    else:
        filtered = data
    aggregated = filtered.groupby(groupby)["count"].sum().rename("number").to_frame()
    aggregated["total"] = data.groupby(groupby)["count"].sum()
    aggregated["percent"] = aggregated["number"] / aggregated["total"] * 100
    return aggregated


def plot_treemap(data, query, groupby, value, restriction=""):
    data = compute_groups(data, query, groupby, value, restriction)
    if groupby not in ["NUTS2_CODE", "NUTS3_CODE", "LAU1_CODE"]:
        return px.treemap(
            data.reset_index(),
            path=[groupby],
            values=value,
            hover_data=["number", "percent"],
        )
    else:
        merged = geo_frame.merge(data, on=groupby)
        return px.choropleth_mapbox(
            merged,
            geojson=merged.geometry,
            locations=merged.index,
            color=value,
            mapbox_style="carto-positron",
            center={"lat": 48.6737532, "lon": 19.696058},
            zoom=6,
            opacity=0.5,
            hover_data=["LAU1", "number", "percent"],
        )


fig = plot_treemap(
    table_40_indexed,
    "`education` == 'vysokoškolské vzdelanie - 1. stupeň (Bc.)'",
    groupby="region_name",
    value="number",
)
fig.show()

In [None]:
display(table_40)
filter_attributes = [
    "current_economic_activity",
    "region_name",
    "education_category",
    "education",
    "sex",
]
groupby_attributes = [
    "current_economic_activity",
    "education",
    "education_category",
    "sex",
    "ecoregion_name",
    "region_name",
    "district_name",
    "NUTS2_CODE",
    "NUTS3_CODE",
    "LAU1_CODE",
]
unique_data = [table_40_indexed[property].unique() for property in filter_attributes]
# selected_options = [list(range(len(unique_data[i]))) for i in range(len(properties))]
selected_options = [[] for i in range(len(filter_attributes))]

In [None]:
# %%script true # Skip
app = Dash(__name__)

checklists = [
    html.Div(
        [
            html.H4(f"Select {filter_attributes[i]}"),
            dcc.Checklist(
                id=f"selected_{i}",
                options=[
                    {"label": v, "value": j} for j, v in enumerate(unique_data[i])
                ],
                value=selected_options[i],
            ),
        ]
    )
    for i in range(len(filter_attributes))
] + [
    html.Div(
        [
            html.H4("Select age"),
            dcc.RangeSlider(
                id="age-filter",
                min=0,
                max=90,
                step=1,
                marks={i: str(i) for i in range(0, 91, 10)},
                value=[0, 90],  # Initial range
            ),
        ]
    )
]

app.layout = html.Div(
    style=style,
    children=[
        html.Div(checklists[::2], style={"width": "50%", "display": "inline-block"}),
        html.Div(checklists[1::2], style={"width": "50%", "display": "inline-block"}),
        html.Br(),
        html.Br(),
        dcc.Graph(id="line-plot", style={"height": "50vw"}),
        html.Div(
            [
                html.Div(
                    [
                        html.H4("Enter title"),
                        dcc.Input(id="title", type="text"),
                    ],
                    style={"flex": 1, "padding": 10},
                ),
                html.Div(
                    [
                        html.H4("Select groupby"),
                        dcc.RadioItems(
                            id="groupby",
                            options=groupby_attributes,
                            value=groupby_attributes[0],
                        ),
                    ],
                    style={"flex": 1, "padding": 10},
                ),
                html.Div(
                    [
                        html.H4("Select display value"),
                        dcc.RadioItems(
                            ["number", "percent"], "number", id="display_value"
                        ),
                    ],
                    style={"flex": 1, "padding": 10},
                ),
            ],
            style={"display": "flex", "flexDirection": "row"},
        ),
    ],
)


@app.callback(
    Output("line-plot", "figure"),
    [
        Input("groupby", "value"),
        Input("title", "value"),
        Input("display_value", "value"),
        Input("age-filter", "value"),
    ]
    + [Input(f"selected_{i}", "value") for i in range(len(filter_attributes))],
)
def update_figure(groupby, title, display_value, age_filter, *arg):
    selected_options[:] = list(arg)
    query = " and ".join(
        [
            f"`{filter_attributes[i]}`.isin({[unique_data[i][s] for s in selected]})"
            for i, selected in enumerate(arg)
            if len(selected) > 0
        ]
    )
    figure: go.Figure = plot_treemap(
        table_40_indexed,
        query,
        groupby,
        display_value,
        restriction=f"({age_filter[0]} <= `age` <= {age_filter[1]})",
    )
    figure.update_layout(title=title)
    return figure


app.run_server(mode="inline", port=8054)