In [None]:
import json
from pathlib import Path

import altair as alt
import numpy as np
import pandas as pd
from ipywidgets import Button, FloatSlider, IntSlider, Output, VBox
from sklearn.decomposition import PCA

In [None]:
alt.data_transformers.enable("vegafusion")

In [None]:
data = np.array(json.loads(Path("results/real-ice.json").read_bytes()))

In [None]:
data.shape

In [None]:
pca = PCA(n_components=8, svd_solver="full", whiten=True)
pca.fit(data)

In [None]:
reduced = pca.transform(data)
df_reduced = pd.DataFrame(reduced)
df_reduced.describe()

In [None]:
def get_clusters(
    pca,
    cluster_sizes,
    between_deviation,
    within_deviation,
):
    rng = np.random.default_rng()

    start = rng.normal(loc=0, scale=1, size=pca.n_components_)

    clusters = []

    for cluster_size in cluster_sizes:
        center = start + rng.normal(
            loc=0, scale=between_deviation, size=pca.n_components_
        )
        cluster_lines = center + rng.normal(
            loc=0, scale=within_deviation, size=(cluster_size, pca.n_components_)
        )
        clusters.append(pca.inverse_transform(cluster_lines))

    return clusters


def plot_clusters(clusters):
    dfs = []

    offset = 0

    for i, cluster in enumerate(clusters):
        num_lines, resolution = cluster.shape

        df_cluster = pd.DataFrame(
            {
                "id": np.repeat(np.arange(num_lines) + offset, resolution),
                "step": np.tile(np.arange(resolution), num_lines),
                "value": cluster.flatten(),
                "cluster": i,
            }
        )

        dfs.append(df_cluster)

        offset += num_lines

    df = pd.concat(dfs)

    faceted = (
        alt.Chart(df)
        .mark_line(opacity=0.1)
        .encode(
            x="step",
            y="value",
            detail="id",
            color=alt.Color("cluster:N").legend(None),
            column="cluster:N",
        )
    )

    base = (
        alt.Chart(df)
        .mark_line(opacity=0.1, stroke="black")
        .encode(x="step", y="value", detail="id")
    )

    return base & faceted

In [None]:
def on_generate_click(_):
    clusters = get_clusters(
        pca,
        cluster_sizes=[1200 // num_clusters_slider.value] * num_clusters_slider.value,
        between_deviation=between_deviation_slider.value,
        within_deviation=within_deviation_slider.value,
    )

    output.clear_output(wait=True)
    with output:
        display(plot_clusters(clusters))


num_clusters_slider = IntSlider(min=2, max=5, step=1, description="Num clusters")
between_deviation_slider = FloatSlider(
    min=0, max=1, value=0.7, step=0.05, description="Between"
)
within_deviation_slider = FloatSlider(
    min=0, max=1, value=0.2, step=0.05, description="Within"
)
generate_button = Button(description="Generate")
generate_button.on_click(on_generate_click)

inputs = VBox(
    children=[
        num_clusters_slider,
        between_deviation_slider,
        within_deviation_slider,
        generate_button,
    ]
)

output = Output()

display(inputs, output)