In [1]:
import optuna

In [2]:
from pathlib import Path
import numpy as np
import pandas as pd

In [3]:
from graph_description.training_utils import my_accuracy, LinearScheduler, ExponentialScheduler

In [4]:
prefix = "../"

In [5]:
train_per_class = 50
round = 0
dataset="citeseer"

In [6]:
input = [Path(prefix+f"/snakemake_base/splits/{dataset}_planetoid/{train_per_class}_500_rest_0.npz").resolve().absolute(),
         Path(prefix+f"snakemake_base/aggregated_datasets/{dataset}_planetoid_{round}_dense.pkl").resolve().absolute()]

In [7]:
splits = np.load(input[0])
train_mask = splits["train_mask"]
val_mask = splits["val_mask"]

df  = pd.read_pickle(input[1])
train_df = df[train_mask]
#print("number_of_columns", len(df.columns))
X_train = train_df.drop("labels", axis=1)

y_train = train_df["labels"]
print(df.shape)

(3327, 3704)


In [8]:
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model._stochastic_gradient import DEFAULT_EPSILON, MAX_INT, _prepare_fit_binary, _get_plain_sgd_function
from sklearn.base import clone
from sklearn.utils import check_random_state, compute_class_weight, deprecated
from sklearn.utils.parallel import Parallel, delayed
from sklearn.linear_model._base import make_dataset

class SGDClassifierFixedSplit(SGDClassifier):
    def __init__(
        self,
        validation_mask,
        loss="hinge",
        *,
        penalty="l2",
        alpha=0.0001,
        l1_ratio=0.15,
        fit_intercept=True,
        max_iter=1000,
        tol=1e-3,
        shuffle=True,
        verbose=0,
        epsilon=DEFAULT_EPSILON,
        n_jobs=None,
        random_state=None,
        learning_rate="optimal",
        eta0=0.0,
        power_t=0.5,
        early_stopping=False,
        n_iter_no_change=5,
        class_weight=None,
        warm_start=False,
        average=False,
    ):
        self.validation_mask=validation_mask
        super().__init__(
            loss=loss,
            penalty=penalty,
            alpha=alpha,
            l1_ratio=l1_ratio,
            fit_intercept=fit_intercept,
            max_iter=max_iter,
            tol=tol,
            shuffle=shuffle,
            verbose=verbose,
            epsilon=epsilon,
            n_jobs=n_jobs,
            random_state=random_state,
            learning_rate=learning_rate,
            eta0=eta0,
            power_t=power_t,
            early_stopping=early_stopping,
            validation_fraction=0,
            n_iter_no_change=n_iter_no_change,
            class_weight=class_weight,
            warm_start=warm_start,
            average=average,
        )

    def _make_validation_split(self, y, sample_mask):
        return self.validation_mask

    def _make_validation_score_cb(
            self, validation_mask, X, y, sample_weight, classes=None
        ):
            if not self.early_stopping:
                return None
    
            return _ValidationScoreCallback(
                self,
                X[validation_mask],
                y[validation_mask],
                sample_weight[validation_mask],
                classes=classes,
            )




    def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, max_iter):
        """Fit a multi-class classifier by combining binary classifiers

        Each binary classifier predicts one class versus all others. This
        strategy is called OvA (One versus All) or OvR (One versus Rest).
        """
        # Precompute the validation split using the multiclass labels
        # to ensure proper balancing of the classes.
        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)

        # Use joblib to fit OvA in parallel.
        # Pick the random seed for each job outside of fit_binary to avoid
        # sharing the estimator random state between threads which could lead
        # to non-deterministic behavior
        random_state = check_random_state(self.random_state)
        seeds = random_state.randint(MAX_INT, size=len(self.classes_))
        result = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose, require="sharedmem"
        )(
            delayed(fit_binary)(
                self,
                i,
                X,
                y,
                alpha,
                C,
                learning_rate,
                max_iter,
                self._expanded_class_weight[i],
                1.0,
                sample_weight,
                validation_mask=validation_mask,
                random_state=seed,
            )
            for i, seed in enumerate(seeds)
        )

        # take the maximum of n_iter_ over every binary fit
        n_iter_ = 0.0
        for i, (_, intercept, n_iter_i) in enumerate(result):
            self.intercept_[i] = intercept
            n_iter_ = max(n_iter_, n_iter_i)

        self.t_ += n_iter_ * X.shape[0]
        self.n_iter_ = n_iter_

        if self.average > 0:
            if self.average <= self.t_ - 1.0:
                self.coef_ = self._average_coef
                self.intercept_ = self._average_intercept
            else:
                self.coef_ = self._standard_coef
                self._standard_intercept = np.atleast_1d(self.intercept_)
                self.intercept_ = self._standard_intercept

In [9]:
def fit_binary(
    est,
    i,
    X,
    y,
    alpha,
    C,
    learning_rate,
    max_iter,
    pos_weight,
    neg_weight,
    sample_weight,
    validation_mask=None,
    random_state=None,
):
    """Fit a single binary classifier.

    The i'th class is considered the "positive" class.

    Parameters
    ----------
    est : Estimator object
        The estimator to fit

    i : int
        Index of the positive class

    X : numpy array or sparse matrix of shape [n_samples,n_features]
        Training data

    y : numpy array of shape [n_samples, ]
        Target values

    alpha : float
        The regularization parameter

    C : float
        Maximum step size for passive aggressive

    learning_rate : str
        The learning rate. Accepted values are 'constant', 'optimal',
        'invscaling', 'pa1' and 'pa2'.

    max_iter : int
        The maximum number of iterations (epochs)

    pos_weight : float
        The weight of the positive class

    neg_weight : float
        The weight of the negative class

    sample_weight : numpy array of shape [n_samples, ]
        The weight of each sample

    validation_mask : numpy array of shape [n_samples, ], default=None
        Precomputed validation mask in case _fit_binary is called in the
        context of a one-vs-rest reduction.

    random_state : int, RandomState instance, default=None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.
    """
    # if average is not true, average_coef, and average_intercept will be
    # unused
    y_i, coef, intercept, average_coef, average_intercept = _prepare_fit_binary(
        est, y, i, input_dtye=X.dtype
    )
    assert y_i.shape[0] == y.shape[0] == sample_weight.shape[0]

    random_state = check_random_state(random_state)
    dataset, intercept_decay = make_dataset(
        X, y_i, sample_weight, random_state=random_state
    )

    penalty_type = est._get_penalty_type(est.penalty)
    learning_rate_type = est._get_learning_rate_type(learning_rate)

    if validation_mask is None:
        validation_mask = est._make_validation_split(y_i, sample_mask=sample_weight > 0)
    classes = np.array([-1, 1], dtype=y_i.dtype)
    validation_score_cb = est._make_validation_score_cb(
        validation_mask, X, y_i, sample_weight, classes=classes
    )

    # numpy mtrand expects a C long which is a signed 32 bit integer under
    # Windows
    seed = random_state.randint(MAX_INT)

    tol = est.tol if est.tol is not None else -np.inf

    _plain_sgd = _get_plain_sgd_function(input_dtype=coef.dtype)
    coef, intercept_out, average_coef, average_intercept, n_iter_ = _plain_sgd(
        coef,
        intercept,
        average_coef,
        average_intercept,
        est._loss_function_,
        penalty_type,
        alpha,
        C,
        est.l1_ratio,
        dataset,
        validation_mask,
        est.early_stopping,
        validation_score_cb,
        int(est.n_iter_no_change),
        max_iter,
        tol,
        int(est.fit_intercept),
        int(est.verbose),
        int(est.shuffle),
        seed,
        pos_weight,
        neg_weight,
        learning_rate_type,
        est.eta0,
        est.power_t,
        0,
        est.t_,
        intercept_decay,
        est.average,
    )
    if est.verbose>=2:
        print(f"best epoch was epoch {validation_score_cb.best_call} of {n_iter_} epochs. Best score was {validation_score_cb.best_score}")

        #print(validation_score_cb(coef, intercept_out), validation_score_cb(validation_score_cb.best_coef_, validation_score_cb.best_intercept_))
        #print(validation_score_cb.best_intercept_)
    
    coef[:]=validation_score_cb.best_coef_[:]
    intercept=validation_score_cb.best_intercept_[0]

    if est.average:
        if len(est.classes_) == 2:
            est._average_intercept[0] = average_intercept
        else:
            est._average_intercept[i] = average_intercept

    return coef, intercept, n_iter_

In [27]:
class _ValidationScoreCallback:
    """Callback for early stopping based on validation score"""

    def __init__(self, estimator, X_val, y_val, sample_weight_val, classes=None, verbose=False):
        self.estimator = clone(estimator)
        self.estimator.t_ = 1  # to pass check_is_fitted
        if classes is not None:
            self.estimator.classes_ = classes
        self.X_val = X_val
        self.y_val = y_val
        self.sample_weight_val = sample_weight_val
        self.best_score=-1
        self.best_coef_ = None
        self.best_intercept_ = None
        self.verbose=verbose
        self.num_calls = 0
        self.best_call = -1

    def __call__(self, coef, intercept):
        est = self.estimator
        #print(type(est.coef_))
        est.coef_ = coef.reshape(1, -1)
        est.intercept_ = np.atleast_1d(intercept)
        score =  est.score(self.X_val, self.y_val, self.sample_weight_val)
        if score > self.best_score:
            if self.verbose>=3:
                print("new_best_score", score)
            self.best_score=score
            self.best_call=self.num_calls
            if self.best_coef_ is None:
                self.best_coef_ = coef.reshape(1, -1).copy()
            else:
                self.best_coef_[:] = coef.reshape(1, -1)
            if self.best_intercept_ is None:
                self.best_intercept_ =  np.atleast_1d(intercept).copy()
            else:
                self.best_intercept_[:] =  np.atleast_1d(intercept)
        self.num_calls+=1
        return score

In [10]:
import os
print(os.environ.get('OMP_NUM_THREADS'))
print(os.environ.get('MKL_NUM_THREADS'))
print(os.environ.get('OPENBLAS_NUM_THREADS'))
print(os.environ.get('BLIS_NUM_THREADS'))

None
None
None
None


In [11]:
def load_dataset_splitted(path_splits, path_df, return_train=True, return_val=True, return_test=False, return_full=False):
    splits = np.load(input[0])
    df  = pd.read_pickle(input[1])

    def get_by_split(split_name):
        mask = splits[split_name]
        mask_df = df[mask]
        X = mask_df.drop("labels", axis=1)
        y = mask_df["labels"]
        return X, y

    out = tuple()
    if return_train:
        out += get_by_split("train_mask")
    if return_val:
        out += get_by_split("val_mask")
    if return_test:
        out += get_by_split("test_mask")
    if return_full:
        out +=(df,)
    return out

In [12]:
(X_train, y_train, X_val, y_val)=load_dataset_splitted(input[0], input[1])

In [13]:
# sklearn expects a unified train/val dataset
X_train_val = pd.concat((X_train, X_val), axis=0)
y_train_val  = pd.concat((y_train, y_val), axis=0)

sklearn_val_mask = np.hstack( (np.zeros(len(y_train),dtype=bool),  np.ones(len(y_val),dtype=bool)) )

In [14]:
num_classes = len(np.bincount(y_train))

In [15]:
from sklearn.metrics import accuracy_score

In [16]:
import warnings
warnings.filterwarnings("ignore", message="invalid value encountered in scalar subtract")
warnings.filterwarnings("ignore", message="overflow encountered in reduce")

In [17]:
max_rules=10

In [18]:
import time

In [19]:
from threadpoolctl import threadpool_limits
from threadpoolctl import threadpool_info

In [20]:
print(threadpool_info())

[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 64, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 64, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 64, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


In [34]:
def SGDClassifier_objective(trial, sklearn_val_mask):
    loss = trial.suggest_categorical("loss", ["hinge", "log_loss", "modified_huber", "squared_hinge", 
                                                "perceptron", "squared_error", "huber", "epsilon_insensitive",
                                                "squared_epsilon_insensitive"])
    if loss in ("huber", "epsilon_insensitive","squared_epsilon_insensitive"):
        epsilon=trial.suggest_float("epsilon", 0.01, 10, log=True)
    else:
        epsilon=0.1

    penalty = trial.suggest_categorical("penalty", ["l2", "l1", "elasticnet"])
    if penalty == "elasticnet":
        l1_ratio = trial.suggest_float("l1_ratio", 0, 1)
    else:
        l1_ratio=0.15
    params = dict(
        loss=loss,
        penalty=penalty,
        alpha=trial.suggest_float("alpha", 1e-6, 10, log=True),
        l1_ratio=l1_ratio,
        tol = trial.suggest_float("tol", 1e-6, 0.1, log=True),
        epsilon=epsilon,
        random_state=0,

        early_stopping=True,
        validation_mask=sklearn_val_mask,
        n_iter_no_change=trial.suggest_int('n_iter_no_change',1,100),
        verbose=False,
    )
    with threadpool_limits(limits=1, user_api='blas'):
        with threadpool_limits(limits=1, user_api='openmp'):
            print(threadpool_info())
            clf = SGDClassifierFixedSplit(**params)
            clf.fit(X_train_val, y_train_val)
    prediction = clf.predict(X_val)
    score= accuracy_score(prediction, y_val)
    #print("final_score", score)
    return score

In [35]:
from functools import partial
objective = partial(SGDClassifier_objective, sklearn_val_mask=sklearn_val_mask)

In [36]:
import os
try:
    this_file = Path(__file__)
except NameError:
    this_file = Path(os.path.abspath(''))
if this_file.stem in ("notebooks", "scripts"):
    root_folder = this_file.parent
else:
    root_folder = this_file

In [37]:
journal_path = root_folder/"hyper_param_journal.log"
print("journal_path", journal_path)
storage = optuna.storages.JournalStorage(
    optuna.storages.JournalFileStorage(str(journal_path)),
)

study = optuna.create_study(
    storage=storage,  # Specify the storage URL here.
    study_name=f"{dataset}-{round}-{train_per_class}-sklearnSGDClassifier",
    load_if_exists=True,
    direction='maximize'
)

journal_path /home/stamm/projects/graph_description/hyper_param_journal.log


  storage = optuna.storages.JournalStorage(
[I 2024-02-01 15:59:03,793] Using an existing study with name 'citeseer-0-50-sklearnSGDClassifier' instead of creating a new one.


In [38]:
# 3. Create a study object and optimize the objective function.
#study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:08,061] Trial 192 finished with value: 0.546 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.980009892814962, 'tol': 0.00044935713597376193, 'n_iter_no_change': 64}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:10,611] Trial 193 finished with value: 0.66 and parameters: {'loss': 'huber', 'epsilon': 0.04813906877756914, 'penalty': 'l2', 'alpha': 0.2826633489315314, 'tol': 5.912258738220381e-06, 'n_iter_no_change': 58}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:13,390] Trial 194 finished with value: 0.654 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.46414720557881994, 'tol': 1.564701966625357e-06, 'n_iter_no_change': 63}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:16,267] Trial 195 finished with value: 0.668 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.25028875903703857, 'tol': 4.021192248490968e-06, 'n_iter_no_change': 68}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:18,841] Trial 196 finished with value: 0.666 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.2008461598340967, 'tol': 2.870580876712535e-06, 'n_iter_no_change': 61}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:21,785] Trial 197 finished with value: 0.668 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.3777608613399328, 'tol': 0.01348860776148731, 'n_iter_no_change': 70}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:24,563] Trial 198 finished with value: 0.578 and parameters: {'loss': 'squared_error', 'penalty': 'l2', 'alpha': 0.5972047044165811, 'tol': 0.07443459483501881, 'n_iter_no_change': 66}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:27,209] Trial 199 finished with value: 0.664 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.1422838847839563, 'tol': 1.577063320156328e-05, 'n_iter_no_change': 63}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:29,776] Trial 200 finished with value: 0.668 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.26736678162180005, 'tol': 7.469271819609049e-06, 'n_iter_no_change': 60}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:31,791] Trial 201 finished with value: 0.414 and parameters: {'loss': 'squared_hinge', 'penalty': 'l2', 'alpha': 0.08802386224551728, 'tol': 0.022011826779880773, 'n_iter_no_change': 58}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:35,592] Trial 202 finished with value: 0.072 and parameters: {'loss': 'log_loss', 'penalty': 'l1', 'alpha': 0.3883041332536753, 'tol': 0.03390623957703605, 'n_iter_no_change': 64}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[I 2024-02-01 15:59:38,591] Trial 203 finished with value: 0.67 and parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.1834735303459163, 'tol': 4.788045737526218e-06, 'n_iter_no_change': 68}. Best is trial 48 with value: 0.674.


[{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so', 'version': '0.3.23.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}, {'user_api': 'openmp', 'internal_api': 'openmp', 'num_threads': 1, 'prefix': 'libgomp', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0', 'version': None}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 1, 'prefix': 'libopenblas', 'filepath': '/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so', 'version': '0.3.21.dev', 'threading_layer': 'pthreads', 'architecture': 'SkylakeX'}]


[W 2024-02-01 15:59:39,044] Trial 204 failed with parameters: {'loss': 'log_loss', 'penalty': 'l2', 'alpha': 0.29621677585696543, 'tol': 9.626062537406411e-06, 'n_iter_no_change': 57} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_2268610/2426869387.py", line 33, in SGDClassifier_objective
    clf.fit(X_train_val, y_train_val)
  File "/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/sklearn/linear_model/_stochastic_gradient.py", line 917, in fit
    return self._fit(
  File "/home/stamm/projects/graph_description/env/lib/python3.10/site-packages/sklearn/linear_model/_sto

In [None]:
#optuna.study.delete_study(study_name=f"{dataset}-{round}-{train_per_class}-sklearnSGDClassifier", storage=storage)