In [None]:
%matplotlib notebook
%load_ext dotenv
%dotenv mantik.env

import functools
import itertools

import hdbscan
import mantik
import mlflow
from sklearn import cluster

import lifetimes

lifetimes.utils.log_to_stdout()
mantik.init_tracking()

In [None]:
# Create fake dataset of two temporarily variable elliptical data regions on a grid
#dataset = lifetimes.testing.create_dummy_ecmwf_ifs_hres_dataset(
#    grid_size=(10, 10)
#)
#ds = dataset.as_xarray()

# Or load from local file
path = '/home/fabian/Documents/MAELSTROM/data/pca/temperature_level_128_daily_averages_2020.nc'
ds = lifetimes.features.EcmwfIfsHresDataset(
    paths=[path],
    overlapping=False,
)

data = ds.as_xarray()["t"]
data

In [None]:
anim = lifetimes.plotting.animate_timeseries(data)

In [None]:
modes = [lifetimes.modes.Modes(feature=data)]
pca_partial_method = functools.partial(
    lifetimes.modes.methods.spatio_temporal_pca,
    time_coordinate="time",
    latitude_coordinate="latitude",
)
[pca] = lifetimes.modes.determine_modes(modes=modes, method=pca_partial_method)

In [None]:
lifetimes.plotting.plot_first_three_components_timeseries(pca)

In [None]:
lifetimes.plotting.plot_scree_test(pca, variance_ratio=0.95)

In [None]:
n_components_range = range (3, 4)
min_cluster_size_range = range(30, 31)
for (
    n_components, 
    min_cluster_size,
) in itertools.product(
    n_components_range, 
    min_cluster_size_range,
):
    #with mlflow.start_run():
        algorithm = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size)
        clusters = lifetimes.modes.methods.find_pc_space_clusters(
            algorithm=algorithm, 
            pca=pca, 
            n_components=n_components, 
            use_varimax=False,
        )
        #mlflow.log_param("n_components", n_components)
        #mlflow.log_param("hdbscan_min_cluster_size", min_cluster_size)
        #mlflow.log_metric("n_clusters", clusters.n_clusters)

In [None]:
lifetimes.plotting.plot_first_three_components_timeseries_clusters(clusters)

In [None]:
lifetimes.plotting.plot_condensed_tree(clusters)

In [None]:
lifetimes.plotting.plot_single_linkage_tree(clusters)

In [None]:
import xarray as xr
xr.DataArray(clusters.labels).plot()

In [None]:
cluster_lifetimes = lifetimes.modes.methods.determine_lifetimes_of_modes(
    modes=clusters.labels,
    time_coordinate="time",
)
cluster_lifetimes

In [None]:
algorithm = cluster.KMeans(n_clusters=4)
clusters = lifetimes.modes.methods.find_pc_space_clusters(
    algorithm=algorithm, 
    pca=pca, 
    n_components=n_components, 
    use_varimax=False,
)

In [None]:
lifetimes.plotting.plot_first_three_components_timeseries_clusters(clusters)

In [None]:
import pandas as pd
path = "/home/fabian/Downloads/runs.csv"
df = pd.read_csv(path)

In [None]:
pivoted = df.pivot(index='hdbscan_min_cluster_size', columns='n_components', values='n_clusters')
pivoted

In [None]:
ax = pivoted.plot(logy=False)
ax.set_ylabel("n_clusters")

In [None]:
import xarray as xr
import numpy as np
xr.DataArray(pca.components_in_original_shape[0]).plot()

In [None]:
center_1 = clusters.model.weighted_cluster_centroid(0)
center_2 = clusters.model.weighted_cluster_centroid(1)
print(center_1, center_2)

In [None]:
xr.DataArray(pca.inverse_transform(center_1, n_components=clusters.n_components)).plot()

In [None]:
xr.DataArray(pca.inverse_transform(center_2, n_components=clusters.n_components)).plot()

In [None]:
xr.DataArray(pca.inverse_transform(center_2, n_components=clusters.n_components) - pca.inverse_transform(center_1, n_components=clusters.n_components)).plot()

In [None]:
# druck luftfeuchte verwirbelung (3 therme)
data.isel({"time": 0}).plot()