In [1]:
import optuna

In [2]:
from pathlib import Path
import numpy as np
import pandas as pd

In [3]:
from graph_description.training_utils import my_accuracy, LinearScheduler, ExponentialScheduler

In [4]:
prefix = "../"

In [5]:
train_per_class = 20
round = 0
dataset="citeseer"

In [6]:
input = [Path(prefix+f"/snakemake_base/splits/{dataset}_planetoid/{train_per_class}_500_rest_0.npz").resolve().absolute(),
         Path(prefix+f"snakemake_base/aggregated_datasets/{dataset}_planetoid_{round}.pkl").resolve().absolute()]

In [7]:
splits = np.load(input[0])
train_mask = splits["train_mask"]
val_mask = splits["val_mask"]

df  = pd.read_pickle(input[1])
train_df = df[train_mask]
#print("number_of_columns", len(df.columns))
X_train = train_df.drop("labels", axis=1)

y_train = train_df["labels"]
print(df.shape)

(3327, 3704)


In [8]:
val_df = df[val_mask]
X_val = val_df.drop("labels", axis=1)
y_val = val_df["labels"].to_numpy()


In [9]:
from graph_description.gnn.run import main

import numpy as np
import pandas as pd
splits = np.load(input[0])
splits = {"train" : splits["train_mask"],
     "valid" : splits["val_mask"],
     "test" : splits["test_mask"]}


In [10]:
num_classes = len(np.bincount(y_train))

In [11]:
import os

In [12]:
try:
    this_file = Path(__file__)
except NameError:
    this_file = Path(os.path.abspath(''))
if this_file.stem in ("notebooks", "scripts"):
    root_folder = this_file.parent
else:
    root_folder = this_file



In [13]:
gt_labels = df["labels"].to_numpy()
from sklearn.metrics import accuracy_score

In [21]:
from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf

In [17]:
def objective(trial):

    with initialize_config_dir(config_dir=str(config_dir), job_name="test_app"):
        cfg = compose(config_name="main",
                      overrides=["cuda=0",
                                 f"model={gnn_kind}",
                                 f"dataset={dataset}",
                                 f"data_root={data_root}",
                                 f"patience={trial.suggest_int('patience',0,100)}",
                                 f"optim.learning_rate={trial.suggest_float('lr',1e-3,100,log=True)}",
                                 f"optim.weight_decay={trial.suggest_float('lr_wdecay',0,.1)}",
                                 f"model.hidden_dim={trial.suggest_categorical('hidden_dim',[32,64,128,265])}",
                                 f"model.dropout_p={trial.suggest_float('dropout', 0,1)}",
                                 f"model.n_layers={trial.suggest_int('n_layers', 2,4)}",
        ])


        prediction = main(cfg, splits, init_seed=0, train_seed=0, silent=True)
        val_prediction = prediction[val_mask]
        return accuracy_score(val_prediction, y_val)

In [18]:
gnn_kind = "gat2017"
dataset="citeseer"


config_dir = root_folder/"src"/"graph_description"/"gnn"/"config"
#print(config_dir)
data_root = root_folder/"pytorch_datasets"
    


In [19]:
journal_path = root_folder/"hyper_param_journal.log"
print("journal_path", journal_path)
storage = optuna.storages.JournalStorage(
    optuna.storages.JournalFileStorage(str(journal_path)),
)

study = optuna.create_study(
    storage=storage,  # Specify the storage URL here.
    study_name=f"{dataset}-{round}-{train_per_class}-{gnn_kind}",
    load_if_exists=True,
    direction='maximize'
)

journal_path /home/stamm/projects/graph_description/hyper_param_journal.log


  storage = optuna.storages.JournalStorage(
[I 2024-01-29 10:47:39,465] Using an existing study with name 'citeseer-0-20-gat2017' instead of creating a new one.


In [22]:
# 3. Create a study object and optimize the objective function.
#study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  with initialize_config_dir(config_dir=str(config_dir), job_name="test_app"):
Seed set to 0
Seed set to 0
Seed set to 0
Seed set to 0
[I 2024-01-29 10:48:27,689] Trial 211 finished with value: 0.676 and parameters: {'patience': 81, 'lr': 0.00845959384583575, 'lr_wdecay': 0.08471272292056373, 'hidden_dim': 128, 'dropout': 0.4772958399754069, 'n_layers': 2}. Best is trial 144 with value: 0.722.
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  with initialize_config_dir(config_dir=str(config_dir), job_name="test_app"):
Seed set to 0
Seed set to 0
Seed set to 0
Seed set to 0
[W 2024-01-29 10:48:30,687] Trial 212 failed with parameters: {'patience': 83, 'lr': 0.0037829978252185877, 'lr_wdecay': 0.08332428195582818, 'hidden_dim': 64, 'dropout': 0.4588421045256036, 'n_

KeyboardInterrupt: 