In [None]:
def load_data_by_dimension(dimension):
    root = "./datasets/"
    data_filename = root + f'kryptonite-{dimension}-X.npy'
    labels_filename = root + f'kryptonite-{dimension}-y.npy'

    try:
        data = np.load(data_filename)
        labels = np.load(labels_filename)
        print(f"Loaded dataset with dimension {dimension}. Data shape: {data.shape}, Labels shape: {labels.shape}")
        return data, labels
    except FileNotFoundError:
        print(f"Dataset for dimension {dimension} not found.")
        return None, None

In [None]:
import time
from datetime import datetime
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

cs = {
    "layer_width": tune.randint(32, 256),
    "layer_depth": tune.randint(3, 6),
    "activation": tune.choice(['relu','tanh','logistic']),
    "lr": tune.loguniform(1e-4, 1e-2), "momentum": tune.uniform(0.1, 0.9)
    # alpha: Any = 0.0001,
    # batch_size: Any = "auto",
    # learning_rate: Any = "constant",
    # learning_rate_init: Any = 0.001,
    # power_t: Any = 0.5,
    # max_iter: Any = 200,
    # shuffle: Any = True,
    # random_state: Any = None,
    # tol: Any = 1e-4,
    # momentum: Any = 0.9,
    # nesterovs_momentum: Any = True,
    # early_stopping: Any = False,
    # validation_fraction: Any = 0.1,
    # beta_1: Any = 0.9,
    # beta_2: Any = 0.999,
    # epsilon: Any = 1e-8,
    # n_iter_no_change: Any = 10,
    # max_fun: Any = 15000
}

def objective(config):
    global dim
    data, labels = load_data_by_dimension(dim)
    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)
    layers = [config["width"]*config["depth"]]
    model = MLPClassifier(hidden_layer_sizes=layers, max_iter=1000, activation=config["activation"], learning_rate_init=config["lr"], momentum=config["momentum"])
    while True:
        model.fit(X_train, y_train)
        acc = model.score(X_test, y_test)
        train.report({"mean_accuracy": acc})  # Report to Tune

In [None]:
# hyperparameter search
def search():
    global dim
    algo = OptunaSearch()
    tuner = tune.Tuner(
        objective,
        tune_config=tune.TuneConfig(
            metric="mean_accuracy",
            mode="max",
            search_alg=algo,
            num_samples=-1,
            time_budget_s=60 * 2,
            trial_dirname_creator=lambda x: datetime.now().strftime("%H_%M_%S")
        ),
        run_config=train.RunConfig(
            storage_path="./results",
            name="simpleneuralnetwork",
            verbose=0,
        ),
        param_space=cs,
    )
    begin = time.time()
    results = tuner.fit()
    end = time.time() - begin
    best_result = results.get_best_result(metric="mean_accuracy", mode="max")
    best_config = best_result.config
    print("auto optimization finished")
    print('time for optimisation (seconds):' + str(end))
    print("Best trial config: {}".format(best_config))
    print("Best accuracy: {}".format(best_result.metrics["mean_accuracy"]))
    return best_config

In [None]:
import numpy as np
import matplotlib.pyplot as plt

dimensions = [9, 12, 15, 18, 24, 30, 45]
best_ks = []
accuracies = []

for i in range(6):
    global dim
    dim = dimensions[i]
    best = search()
    data, labels = load_data_by_dimension(dim)
    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)
    layers = [best["width"]*best["depth"]]
    model = MLPClassifier(hidden_layer_sizes=layers, max_iter=1000, activation=best["activation"],learning_rate_init=best["lr"], momentum=best["momentum"])
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    accuracies.append(accuracy)

# Plot the performance of models with their best k
plt.figure(figsize=(10, 5))
plt.plot(dimensions, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Number of Dimensions')
plt.ylabel('Accuracy')
plt.title('kNN Performance vs. Dimensionality')
plt.grid(True)
plt.show()