In [1]:
from load_data import parameters_dict, simulation_file_paths

import numpy as np
# import saved feature array
feature_arr = np.load("feature_arr.npy")

In [2]:
# check gpu
import tensorflow as tf
print("Num GPUs Available: ", len(
    tf.config.experimental.list_physical_devices('GPU')))

2024-03-27 10:58:18.769527: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available:  1


2024-03-27 10:58:19.920589: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-27 10:58:19.947678: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-27 10:58:19.947982: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [3]:
from sklearn.cluster import DBSCAN


# function that returns the std of the distance between the centroids of the clusters
def std_distance_centroids(feature_arr, cluster_labels):
    centroids =[] 
    # fill the array with the centroids
    for i in cluster_labels:
        centroids.append(np.mean(feature_arr[cluster_labels == i], axis=0))
    # initialize the array that will contain the distances
    distances =[] 
    # fill the array with the distances
    for i in range(len(centroids)):
        for j in range(len(centroids)):
            distances.append(np.linalg.norm(centroids[i] - centroids[j]))
    # return the std of the distances
    return np.std(distances)

In [4]:
import optuna


def objective(trial):
    eps = trial.suggest_float("eps", 0.5, 1.5)
    min_samples = trial.suggest_int("min_samples", 2, 10)
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    cluster_labels = dbscan.fit_predict(feature_arr)
    std_dist = std_distance_centroids(feature_arr, cluster_labels)
    return std_dist


study = optuna.create_study(direction="minimize", study_name="dbscan_centroids_min",
                            storage="sqlite:///test.db", load_if_exists=True)
study.optimize(objective, n_trials=0)

[I 2024-03-27 10:58:20,629] Using an existing study with name 'dbscan_centroids_min' instead of creating a new one.


In [13]:
dbscan = DBSCAN(eps=study.best_params["eps"], min_samples=study.best_params["min_samples"])

dbscan.fit(feature_arr)
print(len(set(dbscan.labels_)) - (1 if -1 in dbscan.labels_ else 0))

import plotly.graph_objs as go
import plotly.express as ptx

class_dict = {f"Class {i}": list() for i in range(len(set(dbscan.labels_)))}
class_dict["Outliers"] = list()
for index, path in enumerate(simulation_file_paths):
    class_ = dbscan.labels_[index]
    if class_ == -1:
        class_dict["Outliers"].append(str(path))
    else:
        class_dict[f"Class {class_}"].append(str(path))

traces = list()
for class_id, colour in zip(class_dict, ptx.colors.qualitative.Dark24):
    traces.append(
        go.Scatter(
            x=[
                parameters_dict[f]["H"]
                for f in class_dict[class_id]
            ],
            y=[
                parameters_dict[f]["E"]
                for f in class_dict[class_id]
            ],
            mode="markers",
            name=class_id,
            opacity=0.7,
            marker=dict(
                size=11,
                color=colour,
            ),
        )
    )

fig = go.Figure(data=traces)
fig.update_layout(
    autosize=False,
    width=990,
    height=600,
    margin=dict(
        l=10,
        r=10,
        b=25,
        t=25,
    ),
    xaxis_title=r"$\mu_0 \mathbf{H} \text{ (T)}$",
    yaxis_title=r"$\Delta \text{E (J)}$",
)

25


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed