In [None]:
import pathlib

import autosklearn.classification
import numpy as np
import pandas as pd
import sklearn.datasets
import sklearn.metrics
import tensorflow as tf
from sklearn.utils.multiclass import type_of_target

In [None]:
cleaned_data_path = pathlib.Path("../data/interim/ctg_cleaned_manually.xlsx")
data = pd.read_excel(cleaned_data_path, header=0)
x_raw = data.loc[:, "Min":"SUSP"].to_numpy()
y_raw_class = data.loc[:, "CLASS"].to_numpy()
y_raw_nsp = data.loc[:, "NSP"].to_numpy()

In [None]:
y_class_one_hot = tf.one_hot(indices=y_raw_class, depth=max(y_raw_class)).numpy()
y_nsp_one_hot = tf.one_hot(indices=y_raw_nsp, depth=max(y_raw_nsp)).numpy()

In [None]:
x_train = x_raw[:1913]
x_test = x_raw[1913:]
y_train = y_raw_nsp[:1913]
y_test = y_raw_nsp[1913:]

In [None]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    initial_configurations_via_metalearning=0,
    smac_scenario_args={"runcount_limit": 1},
)

In [None]:
automl.fit(x_train, y_train)

In [None]:
print(automl.show_models())

In [None]:
print(automl.sprint_statistics())

In [None]:
predictions = automl.predict(x_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

In [None]:
sklearn.metrics.confusion_matrix(y_test, predictions)

In [None]:
%%timeit
automl.predict(x_test[0:1])