In [1]:
import os
import time
from io import StringIO

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import keyfi as kf
from keyfi.dimred import UMAP
from keyfi.cluster import HDBSCAN

from sklearn.preprocessing import MaxAbsScaler


In [2]:
n_neighbors = 250
min_dist = 0.1
qdot_clip = 300

In [3]:
DATA_PATH = "/home/azureuser/project/data/plane"

embeddings_path = os.path.join("/home/azureuser/project/data/embeddings/all_snapshots", f"{n_neighbors}_{min_dist}_{qdot_clip}c")

logfile = "/home/azureuser/project/all_snapshots.log"

os.makedirs(embeddings_path, exist_ok=True)

saved_embeddings = os.listdir(embeddings_path)

num_snapshots = len(os.listdir(DATA_PATH))

In [4]:
def get_data(snapshot):
    return kf.import_vtk_data(
        os.path.join(DATA_PATH, snapshot, "data.vtk")
    )

def log(*msg):
    with open(logfile, "a") as file:
        print(*msg, file=file)

In [5]:
features_to_scale = ['T', 'N2O4', 'Qdot', ['U:0', 'U:1']]
scalers = [MaxAbsScaler] * 4

In [None]:
for index, snapshot in enumerate(os.listdir(DATA_PATH)):
    start_time = time.time()
    #data prep
    df, mesh = get_data(snapshot)
    
    data = kf.clean_data(df, dim=2,
                             vars_to_drop=["N2", "NO2", "rho"]
                            )
    data["Qdot"].clip(-qdot_clip, qdot_clip)
    
    data = kf.scale_data(data, features_to_scale, scalers)

    log(snapshot)
    print(snapshot)
    
    if f"{snapshot}.npy" in saved_embeddings:
        embedding = np.load(
            os.path.join(embeddings_path, f"{snapshot}.npy")
            )
        log("loaded from existing")
        print("loaded from existing")
    
    else:
        embedding, mapper = kf.embed_data(
            data=data,
            algorithm=UMAP,
            n_neighbors=n_neighbors,
            min_dist=min_dist,
            #reproducible
            random_state=0,
            n_components=2,
        )
        
        np.save(
            os.path.join(embeddings_path, f"{snapshot}"),
            embedding
        )
    log("time:", time.time()-start_time)
    log(f"{100*(index+1)/num_snapshots:.2f}%")
    print("time:", time.time()-start_time)
    print(f"{100*(index+1)/num_snapshots:.2f}%")

285573.578369
loaded from existing
time: 0.03415656089782715
0.12%
285104.078369
loaded from existing
time: 0.03274941444396973
0.23%
286301.078369
loaded from existing
time: 0.031613826751708984
0.35%
285276.578369
loaded from existing
time: 0.0316617488861084
0.46%
285651.578369
loaded from existing
time: 0.030477523803710938
0.58%
285308.078369
loaded from existing
time: 0.03209638595581055
0.69%
285068.078369
loaded from existing
time: 0.0320279598236084
0.81%
285791.078369
loaded from existing
time: 0.03326702117919922
0.92%
286037.078369
loaded from existing
time: 0.03137826919555664
1.04%
286254.578369
loaded from existing
time: 0.031597137451171875
1.15%
285309.578369
loaded from existing
time: 0.03172636032104492
1.27%
285636.578369
loaded from existing
time: 0.03241467475891113
1.38%
286113.578369
loaded from existing
time: 0.031780242919921875
1.50%
285198.578369
loaded from existing
time: 0.03151988983154297
1.61%
285687.578369
loaded from existing
time: 0.03293108940124512

286251.578369
loaded from existing
time: 0.03235650062561035
14.65%
285900.578369
loaded from existing
time: 0.032813072204589844
14.76%
285849.578369
loaded from existing
time: 0.032256126403808594
14.88%
286191.578369
loaded from existing
time: 0.032094717025756836
14.99%
286119.578369
loaded from existing
time: 0.0324099063873291
15.11%
285240.578369
loaded from existing
time: 0.0323641300201416
15.22%
286293.578369
loaded from existing
time: 0.031701087951660156
15.34%
286299.578369
loaded from existing
time: 0.03293943405151367
15.46%
286193.078369
loaded from existing
time: 0.03208351135253906
15.57%
285503.078369
loaded from existing
time: 0.03219914436340332
15.69%
285507.578369
loaded from existing
time: 0.03225970268249512
15.80%
285437.078369
loaded from existing
time: 0.032178640365600586
15.92%
285809.078369
loaded from existing
time: 0.031187057495117188
16.03%
285972.578369
loaded from existing
time: 0.03167271614074707
16.15%
285236.078369
loaded from existing
time: 0.0

285050.078369
loaded from existing
time: 0.03429818153381348
29.18%
286061.078369
loaded from existing
time: 0.0335538387298584
29.30%
285992.078369
loaded from existing
time: 0.032538414001464844
29.41%
285480.578369
loaded from existing
time: 0.03240823745727539
29.53%
285405.578369
loaded from existing
time: 0.03217029571533203
29.64%
285156.578369
loaded from existing
time: 0.03236865997314453
29.76%
285810.578369
loaded from existing
time: 0.032289981842041016
29.87%
285540.578369
loaded from existing
time: 0.03346848487854004
29.99%
285210.578369
loaded from existing
time: 0.03279519081115723
30.10%
285693.578369
loaded from existing
time: 0.032814979553222656
30.22%
285470.078369
loaded from existing
time: 0.03272867202758789
30.33%
285845.078369
loaded from existing
time: 0.032263755798339844
30.45%
285593.078369
loaded from existing
time: 0.03226923942565918
30.57%
285723.578369
loaded from existing
time: 0.032285451889038086
30.68%
285681.578369
loaded from existing
time: 0.0

285072.578369
loaded from existing
time: 0.032232046127319336
43.71%
285642.578369
loaded from existing
time: 0.03150486946105957
43.83%
285780.578369
loaded from existing
time: 0.03244447708129883
43.94%
285500.078369
loaded from existing
time: 0.03204226493835449
44.06%
285504.578369
loaded from existing
time: 0.03148961067199707
44.18%
285398.078369
loaded from existing
time: 0.032056331634521484
44.29%
285623.078369
loaded from existing
time: 0.031906843185424805
44.41%
285788.078369
loaded from existing
time: 0.03339719772338867
44.52%
285993.578369
loaded from existing
time: 0.03128838539123535
44.64%
286002.578369
loaded from existing
time: 0.03231239318847656
44.75%
285143.078369
loaded from existing
time: 0.03160858154296875
44.87%
285884.078369
loaded from existing
time: 0.03145885467529297
44.98%
286292.078369
loaded from existing
time: 0.03160738945007324
45.10%
285627.578369
loaded from existing
time: 0.03202962875366211
45.21%
285042.578369
loaded from existing
time: 0.03

285969.578369
loaded from existing
time: 0.032665252685546875
58.25%
285929.078369
loaded from existing
time: 0.0318913459777832
58.36%
285647.078369
loaded from existing
time: 0.03174901008605957
58.48%
285345.578369
loaded from existing
time: 0.03183126449584961
58.59%
285134.078369
loaded from existing
time: 0.03209209442138672
58.71%
285117.578369
loaded from existing
time: 0.031062602996826172
58.82%
285705.578369
loaded from existing
time: 0.030965805053710938
58.94%
285149.078369
loaded from existing
time: 0.033281564712524414
59.05%
285699.578369
loaded from existing
time: 0.031319379806518555
59.17%
285180.578369
loaded from existing
time: 0.031463623046875
59.28%
286131.578369
loaded from existing
time: 0.03136086463928223
59.40%
286170.578369
loaded from existing
time: 0.031622886657714844
59.52%
285899.078369
loaded from existing
time: 0.03215456008911133
59.63%
285828.578369
loaded from existing
time: 0.03139162063598633
59.75%
285674.078369
loaded from existing
time: 0.03

285279.578369
loaded from existing
time: 0.03301548957824707
72.78%
285566.078369
loaded from existing
time: 0.034223079681396484
72.90%
285873.578369
loaded from existing
time: 0.032919883728027344
73.01%
285749.078369
loaded from existing
time: 0.03307628631591797
73.13%
285864.578369
loaded from existing
time: 0.03242182731628418
73.24%
285905.078369
loaded from existing
time: 0.032135009765625
73.36%
285597.578369
loaded from existing
time: 0.032189130783081055
73.47%
285254.078369
loaded from existing
time: 0.03538966178894043
73.59%
285299.078369
loaded from existing
time: 0.03301548957824707
73.70%
285386.078369
loaded from existing
time: 0.032945871353149414
73.82%
285645.578369
loaded from existing
time: 0.03221917152404785
73.93%
285756.578369
loaded from existing
time: 0.03314089775085449
74.05%
286140.578369
loaded from existing
time: 0.03232884407043457
74.16%
286022.078369
loaded from existing
time: 0.03269147872924805
74.28%
285581.078369
loaded from existing
time: 0.032

286122.578369
loaded from existing
time: 0.033211469650268555
87.08%
285690.578369
loaded from existing
time: 0.03195500373840332
87.20%
285440.078369
loaded from existing
time: 0.03201150894165039
87.31%
285329.078369
loaded from existing
time: 0.032007694244384766
87.43%
286308.578369
loaded from existing
time: 0.030972003936767578
87.54%
285567.578369
loaded from existing
time: 0.032640695571899414
87.66%
285486.578369
loaded from existing
time: 0.03195309638977051
87.77%
285246.578369
loaded from existing
time: 0.03385329246520996
87.89%
285867.578369
loaded from existing
time: 0.032705068588256836
88.00%
285599.078369
loaded from existing
time: 0.03229856491088867
88.12%
286085.078369
loaded from existing
time: 0.03155946731567383
88.24%
285932.078369
loaded from existing
time: 0.03234386444091797
88.35%
285381.578369
loaded from existing
time: 0.032163143157958984
88.47%
285611.078369
loaded from existing
time: 0.032569169998168945
88.58%
285779.078369
loaded from existing
time: 