In [1]:
import sqlite3

import altair as alt
import ipywidgets as widgets
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import seaborn as sn
from IPython.display import Image, Markdown, clear_output, display

alt.data_transformers.disable_max_rows()
plt.style.use("ggplot")

# Tiltaksovervakingen: opsjon for kvalitetskontroll av analysedata
## Data visualisation

In [2]:
# Connect to database
dbname = "kalk_data.db"
eng = sqlite3.connect(dbname, detect_types=sqlite3.PARSE_DECLTYPES)

In [3]:
# Read tables
stn_df = pd.read_sql("SELECT * FROM stations", eng)
par_df = pd.read_sql("SELECT * FROM parameters_units", eng)
wc_df = pd.read_sql("SELECT * FROM water_chemistry", eng)
wc_df["sample_date"] = pd.to_datetime(wc_df["sample_date"], format="%Y-%m-%d %H:%M:%S")

In [4]:
def make_tick_plot(df, ax_scale):
    """Make the tick plot."""

    base = alt.Chart(df, height=150, width=450, title="Strip plot")

    # Ticks
    ticks = (
        base.mark_tick(thickness=2, size=30, opacity=0.3)
        .encode(
            x=alt.X("value:Q", title="Value", scale=alt.Scale(type=ax_scale.lower())),
            y=alt.Y(
                "lab:N",
                title="",
                sort=[
                    "NIVA (historic)",
                    "VestfoldLAB (historic)",
                    "VestfoldLAB (2020)",
                ],
            ),
            color="lab:N",
            tooltip=[
                "vannmiljo_code:N",
                "sample_date:T",
                "lab:N",
                "parameter:N",
                "unit:N",
                "value:Q",
            ],
        )
        .interactive()
    )

    # Q1 and Q99
    q_1_99 = (
        base.transform_quantile(
            "value",
            probs=[0.01, 0.99],
        )
        .mark_rule(thickness=5, color="red", strokeDash=[5, 5])
        .encode(x=alt.X("value:Q", title="Value"))
    )

    # Q5 and Q95
    q_5_95 = (
        base.transform_quantile(
            "value",
            probs=[0.05, 0.95],
        )
        .mark_rule(thickness=5, color="black", strokeDash=[5, 5])
        .encode(x=alt.X("value:Q", title="Value"))
    )

    # Build plot
    plot = ticks + q_1_99 + q_5_95

    return plot


def make_qq_plot(df, ax_scale):
    """Make the Q-Q plot."""

    # Calculate quantiles
    his_df = df.query("period == 'historic'")
    new_df = df.query("period == 'new'")
    percs = np.arange(0, 100, 0.05)

    his_qs = np.percentile(his_df["value"], percs)
    new_qs = np.percentile(new_df["value"], percs)

    qq_df = pd.DataFrame({"percentile": percs, "historic": his_qs, "new": new_qs})

    # Plot
    base = alt.Chart(qq_df, height=300, width=450, title="Q-Q plot")

    scatter = (
        base.mark_point()
        .encode(
            x=alt.X("historic:Q", title="Historic data"),
            y=alt.Y("new:Q", title="New data"),
            color=alt.Color("percentile:Q", scale=alt.Scale(scheme="turbo")),
            tooltip=["percentile:Q", "historic:Q", "new:Q"],
        )
        .interactive()
    )

    line = base.mark_line().encode(
        x=alt.X("historic:Q", title="", scale=alt.Scale(type=ax_scale.lower())),
        y=alt.Y("historic:Q", title="", scale=alt.Scale(type=ax_scale.lower())),
    )

    plot = scatter + line

    return plot


def make_kde_plot(df, ax_scale):
    """Make a KDE plot."""
    base = alt.Chart(df, height=160, width=450, title="Density plot")

    kde = (
        base.transform_density(
            density="value",
            groupby=["lab"],
        )
        .mark_area(opacity=0.3)
        .encode(
            x=alt.X("value:Q", title="Value", scale=alt.Scale(type=ax_scale.lower())),
            y=alt.Y("density:Q", title=""),
            color="lab:N",
            row=alt.Row(
                "lab:N",
                title="",
                sort=[
                    "NIVA (historic)",
                    "VestfoldLAB (historic)",
                    "VestfoldLAB (2020)",
                ],
            ),
        )
        .interactive()
    )

    return kde

In [5]:
def filter_data(b):
    with output:
        clear_output()

        # Get user options
        par = pars_dropdown.value
        ax_scale = log_radio.value

        # Get data for par
        df = wc_df.query("parameter == @par")

        if (df["value"].min() <= 0) and (ax_scale == "Log"):
            display(Markdown("#### **WARNING:** The dataset contains values less than or equal to zero. These will be removed before applying the log-transform."))
            df = df.query("value > 0")

        ticks = make_tick_plot(df, ax_scale)
        qq = make_qq_plot(df, ax_scale)
        kde = make_kde_plot(df, ax_scale)

        plot = (ticks & qq) | kde
        plot.configure_axis(labelFontSize=16, titleFontSize=20).configure_legend(
            labelFontSize=16
        )

        display(plot)

In [6]:
style = {"description_width": "initial"}

par_list = sorted(list(wc_df["parameter"].unique()))
pars_dropdown = widgets.Dropdown(
    options=par_list,
    value=par_list[0],
    description="Select parameter:",
    disabled=False,
)

log_radio = widgets.RadioButtons(
    options=["Linear", "Log"],
    description="Axis scale:",
    disabled=False,
    style=style,
)

start = widgets.Button(
    description="Start", disabled=False, style={"font_weight": "bold"}
)

output = widgets.Output()

display(pars_dropdown, log_radio, start, output)

start.on_click(filter_data)

Dropdown(description='Select parameter:', options=('ALK', 'ANC', 'CA', 'CL', 'ILAL', 'K', 'KOND', 'LAL', 'MG',…

RadioButtons(description='Axis scale:', options=('Linear', 'Log'), style=DescriptionStyle(description_width='i…

Button(description='Start', style=ButtonStyle(font_weight='bold'))

Output()

In [7]:
# grid = sn.FacetGrid(
#     df, col="par_unit", hue="lab", col_wrap=4, sharex=False, sharey=False
# )
# grid.map(sn.kdeplot, "value")
# [ax.set_xscale("log") for ax in grid.axes]
# grid.add_legend()

In [8]:
# g = grid = sn.catplot(
#     data=df,
#     x="lab",
#     y="value",
#     col="par_unit",
#     col_wrap=4,
#     kind="box",
#     sharex=False,
#     sharey=False,
# )
# g.set(yscale="log")

In [9]:
# import altair as alt
# alt.data_transformers.enable('json')

# alt.Chart(df).mark_boxplot().encode(
#     x='lab:O',
#     y='value',
#     color=alt.Color('lab'),
#     facet=alt.Facet('par_unit:N',
#                     columns=4),
# ).interactive()

In [10]:
# alt.data_transformers.disable_max_rows()

# # Build drop-down list
# par_list = ["None"] + sorted(df["par_unit"].unique())
# input_dropdown = alt.binding_select(options=par_list)
# selection = alt.selection_single(
#     fields=["par_unit"], bind=input_dropdown, name="Select"
# )

# alt.Chart(df).mark_tick(thickness=2, size=40, opacity=0.3).encode(
#     y="lab:N",
#     x="value:Q",
#     color="lab:N",  # alt.Color("value:Q", scale=alt.Scale(scheme="reds")),
#     tooltip=["vannmiljo_code:N", "sample_date:T", "lab:N", "par_unit:N", "value:Q"],
# ).properties(width=600, height=200,).add_selection(selection).transform_filter(
#     selection
# ).interactive()