In [2]:
import os
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import keyfi as kf
from keyfi.dimred import UMAP
from keyfi.cluster import HDBSCAN


params

In [9]:
DATA_PATH = "/home/azureuser/project/data/plane"
SNAPSHOT = "285140.078369"

#savepaths
figpath="/home/azureuser/project/figures/test"
embedding_path = os.path.join("/home/azureuser/project/data/embeddings", SNAPSHOT)

os.makedirs(figpath, exist_ok=True)
os.makedirs(embedding_path, exist_ok=True)

helper functions

In [5]:
def get_data(snapshot):
    return kf.import_vtk_data(
        os.path.join(DATA_PATH, snapshot, "data.vtk")
    )

data prep

In [6]:
df, mesh = get_data(SNAPSHOT)

cleaned_data = kf.clean_data(df, dim=2,
                             vars_to_drop=["N2", "NO2", "rho"]
                            )

variables = cleaned_data.columns

print(df.head())
variables

            T      N2O4    N2       NO2       rho      Qdot       U:0  \
0  299.994415  0.747802  0.05  0.202198  2.839309 -1.619457  0.006443   
1  299.999786  0.747809  0.05  0.202191  2.839273  1.347651  0.009065   
2  300.000031  0.747810  0.05  0.202190  2.839273 -4.644562  0.005039   
3  300.004242  0.747824  0.05  0.202176  2.839263  0.473055  0.001704   
4  299.994385  0.747803  0.05  0.202197  2.839310 -2.447919  0.009460   

        U:1       U:2  
0 -0.000145  0.000348  
1 -0.000294  0.000466  
2 -0.000062  0.000203  
3 -0.000006  0.000202  
4 -0.000088  0.000608  


Index(['T', 'N2O4', 'Qdot', 'U:0', 'U:1'], dtype='object')

UMAP params

In [7]:
n_neighbors_range = [50, 100, 150, 200, 250, 300, 350, 400]
min_dist_range = [0.05, 0.1, 0.15]

dimensionality reduction

In [None]:
print("Starting embedding")
saved_embeddings = os.listdir(embedding_path)

for n_neighbors in n_neighbors_range:
    for min_dist in min_dist_range:

        start_time = time.time()

        print(n_neighbors, min_dist)

        if f"{n_neighbors}_{min_dist}.npy" in saved_embeddings:
            embedding = np.load(
                os.path.join(embedding_path, f"{n_neighbors}_{min_dist}.npy")
                )
            print("loaded from existing")

        else:

            embedding, mapper = kf.embed_data(
                data=cleaned_data,
                algorithm=UMAP,
                scale=True,
                n_neighbors=n_neighbors,
                min_dist=min_dist,
                #ensures reproducibility, disable for faster compute
                # random_state=0,
                #how many dimensions to reduce to
                n_components=2,
            )

            np.save(
                os.path.join(embedding_path, f"{n_neighbors}_{min_dist}"),
                embedding
            )
            
        print("time:", time.time()-start_time)

Starting embedding
50 0.05
loaded from existing
time: 0.0013496875762939453
50 0.1
loaded from existing
time: 0.0011224746704101562
50 0.15
loaded from existing
time: 0.0007863044738769531
100 0.05
loaded from existing
time: 0.0006401538848876953
100 0.1
loaded from existing
time: 0.000640869140625
100 0.15
loaded from existing
time: 0.0009827613830566406
150 0.05
loaded from existing
time: 0.0010030269622802734
150 0.1
loaded from existing
time: 0.0009617805480957031
150 0.15
loaded from existing
time: 0.0009753704071044922
200 0.05
time: 118.36742925643921
200 0.1
time: 103.94423866271973
200 0.15


figure creation

In [None]:
for n_neighbors in n_neighbors_range:
    for min_dist in min_dist_range:

        print(n_neighbors, min_dist)

        for var in variables:

            if var == "Qdot":
                cmap_minmax=(-300, 300)
            else:
                cmap_minmax=[]

            kf.plot_embedding(
                embedding=embedding,
                data=cleaned_data,
                scale_points = True,
                cmap_var=var,
                cmap_minmax=cmap_minmax,
                save=True,
                title=f"{n_neighbors}_{min_dist}_{var}",
                figname=f"{n_neighbors}_{min_dist}_{var}",
                figpath=figpath,
                view=(None, None)
            )

50 0.05
50 0.1
50 0.15
