In [1]:
import os
import sys
import platform
from pathlib import Path
from GraphTsetlinMachine.graphs import Graphs
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
import pickle
import logging
import optuna
from optuna.exceptions import TrialPruned 
from tqdm.auto import tqdm
from sklearn.metrics import f1_score, precision_score, recall_score
import math
from functools import partial

In [2]:
optuna.logging.set_verbosity(optuna.logging.WARNING)

In [3]:
def get_machine_info():
    machine_name = platform.node()  
    user = os.getenv("USER") or os.getenv("USERNAME") 
    os_name = platform.system()  # Get os
    print(f"Machine: {machine_name}")
    print(f"OS: {os_name}")
    print(f"User: {user}")
    
    # Print machine info
    return machine_name, os_name, user

In [4]:
machine_name, os_name, user = get_machine_info()

Machine: jupyter-jalarssen
OS: Linux
User: jalarssen


In [5]:
if machine_name == "Corsair" and os_name == "Linux" and user == "jon":
    windows_drive = Path("/mnt/b/TsetlinModels")
    os.makedirs(windows_drive / "data", exist_ok=True)
    os.makedirs(windows_drive / "models", exist_ok=True)
    os.makedirs(windows_drive / "graphs", exist_ok=True)

    paths = {
        "data": windows_drive / "data",
        "models": windows_drive / "models",
        "graphs": windows_drive / "graphs",
    }
    DB = "ja_tsehex_local.db"
else:
    os.makedirs("data", exist_ok=True)
    os.makedirs("models", exist_ok=True)
    os.makedirs("graphs", exist_ok=True)

    paths = {
        "data": Path("data"),
        "models": Path("models"),
        "graphs": Path("graphs"),
    }
    DB = "ja_tsehex.db"

In [6]:
hypervector_bits = 2
hypervector_size = 64

open_pos = [40] # , 50, 60
samples = [1000,10000,100000]
board_sizes = [13,14,15] #11,12,
moves_before = [0, 2, 5]

In [7]:
def stop_when_100_accuracy(study, trial):
    if trial.value >= 100: 
        study.stop()

In [8]:
def objective(trial, graphs_train, graphs_test, X_train, Y_train, X_test, Y_test, board_size, mbf, n_samples, dataset):
    #max_clauses = n_samples*10
    #max_epochs = math.sqrt(n_samples)*max(1,mbf)
    
    number_of_clauses = trial.suggest_int('number_of_clauses', 5000, 200000)
    T = trial.suggest_float('T_factor', 0.5, number_of_clauses * 1.2)
    s = trial.suggest_float('s', 0.01, board_size*0.4)
    depth = trial.suggest_int('depth', 3, 16)
    epochs = trial.suggest_int('epochs', 15, 100)
    message_size = 32
    message_bits = 2

    tm = MultiClassGraphTsetlinMachine(
        number_of_clauses,
        T,
        s,
        depth=depth,
        message_size=message_size,
        message_bits=message_bits,
        number_of_state_bits=8,
        boost_true_positive_feedback=1,
        grid=(16*13,1,1),
        block=(128,1,1),
    )

    print(f"Start trial with c={number_of_clauses}, T={T}, s={s}, d={depth}, e={epochs}")

    best_test_acc = 0
    best_f1 = 0
    best_prec = 0
    best_rec = 0
    
    progress_bar = tqdm(range(epochs), desc=f"{dataset}", leave=True)
    for epoch in progress_bar:
        tm.fit(graphs_train, Y_train, epochs=1, incremental=True)

        result_test = 100 * (tm.predict(graphs_test) == Y_test).mean()

        f1_score_test = f1_score(Y_test, tm.predict(graphs_test), average='weighted', zero_division=0)
        precision_test = precision_score(Y_test, tm.predict(graphs_test), average='weighted', zero_division=0)
        recall_test = recall_score(Y_test, tm.predict(graphs_test), average='weighted', zero_division=0)

        if result_test > best_test_acc:
            best_test_acc = result_test
            best_f1 = f1_score_test
            best_prec = precision_test
            best_rec = recall_test

        trial.set_user_attr("f1", f1_score_test)
        trial.set_user_attr("precision", precision_test)
        trial.set_user_attr("recall", recall_test)


        progress_bar.set_postfix({
            'Acc':f'{result_test:.2f}%',
            'BestAcc': f'{best_test_acc:.2f}%',
            'F1': f'{best_f1:.2f}',
            'Prec': f'{best_prec:.2f}',
            'Rec': f'{best_rec:.2f}'
        })

        
        if result_test >= 100 and f1_score_test >= 1:
            return result_test
            
        trial.report(result_test, epoch)
        
        if trial.should_prune():
            raise TrialPruned()

    return result_test

In [None]:
# Running multiple studies for different configurations of the dataset
for pos in tqdm(open_pos, desc="Open positions"):
    for n_samples in tqdm(samples, desc="Samples", leave=False):
        for board_size in tqdm(board_sizes, desc="Board Sizes", leave=False):
            for mbf in tqdm(moves_before, desc="Moves Before", leave=False):
                dataset = f"{board_size}x{board_size}_{n_samples}_{pos}_{mbf}"
                file_path = paths["graphs"] / f"{dataset}.pkl"
                with open(file_path, 'rb') as f:
                    graphs_train, graphs_test, X_train, Y_train, X_test, Y_test = pickle.load(f)
    
                study = optuna.create_study(
                    #directions=["maximize", "minimize"],  # Maximize accuracy, minimize number of clauses
                    direction="maximize",
                    study_name=f"Study_{dataset}",
                    storage=f"sqlite:///results/optuna/{DB}",
                    load_if_exists=True,
                    pruner=optuna.pruners.PatientPruner(optuna.pruners.SuccessiveHalvingPruner(), patience=5)
                )
    
                if len(study.trials) > 0:
                    try:
                        if study.best_trial.value >= 100:
                            print(f"Study {study.study_name} already has 100% accuracy. Skipping further optimization.")
                            continue
                    except ValueError:
                        print(f"No valid trials found for {study.study_name}, continuing with optimization.")
                else:
                    print(f"No trials found for {study.study_name}. Running new optimization.")
    
                objective_with_params = partial(objective, graphs_train=graphs_train, graphs_test=graphs_test, 
                                                X_train=X_train, Y_train=Y_train, X_test=X_test, Y_test=Y_test, 
                                                board_size=board_size, mbf=mbf, n_samples=n_samples, dataset=dataset)
    
                try:
                    study.optimize(objective_with_params, n_trials=1000, callbacks=[stop_when_100_accuracy])
                except KeyboardInterrupt:
                    print("Optimization interrupted!")
                    print(f"Best result so far: {study.best_params}")

Open positions:   0%|          | 0/1 [00:00<?, ?it/s]

Samples:   0%|          | 0/3 [00:00<?, ?it/s]

Board Sizes:   0%|          | 0/3 [00:00<?, ?it/s]

Moves Before:   0%|          | 0/3 [00:00<?, ?it/s]

  pruner=optuna.pruners.PatientPruner(optuna.pruners.SuccessiveHalvingPruner(), patience=5)


No trials found for Study_13x13_1000_40_0. Running new optimization.
Initialization of sparse structure.
Start trial with c=94189, T=5787.588623774524, s=1.2415460446638984, d=8, e=63


13x13_1000_40_0:   0%|          | 0/63 [00:00<?, ?it/s]

Initialization of sparse structure.
Start trial with c=104776, T=71851.0090415516, s=3.091469006878781, d=14, e=54


13x13_1000_40_0:   0%|          | 0/54 [00:00<?, ?it/s]