In [1]:
import os
import platform
from pathlib import Path
from tqdm.auto import tqdm
from src.datahandler import DataHandler
from src.graphhandler import GraphHandler
from sklearn.model_selection import train_test_split
import pickle
from concurrent.futures import ProcessPoolExecutor, as_completed
import fcntl

In [2]:
windows_drive = Path("/mnt/f/TsetlinModels")

In [3]:
def get_machine_info():
    machine_name = platform.node()  
    user = os.getenv("USER") or os.getenv("USERNAME") 
    os_name = platform.system()  # Get os
    print(f"Machine: {machine_name}")
    print(f"OS: {os_name}")
    print(f"User: {user}")
    
    # Print machine info
    return machine_name, os_name, user

In [4]:
def process_combination(board_size, hv_size, mb, double_hash, n_samples, op, mbf, paths):
    seen_files = set()
    dh_string = "dh2" if double_hash else "dh1"
    dataset = f"{board_size}x{board_size}_{n_samples}_{op}_{mbf}_{hv_size}_{mb}_{dh_string}"
    file_path = paths['graphs'] / f"{dataset}.pkl"

    # Check if file has been processed
    if file_path in seen_files or os.path.exists(file_path):
        return str(f"{board_size}x{board_size} - skipping...")

    seen_files.add(file_path)
    
    # Load data
    dataset_label = f"{board_size}x{board_size}_{n_samples*2}_{op}_{mbf}"
    data_handler = DataHandler(paths=paths, files={'data': dataset_label}, dataloader='np.genfromtxt', n_samples=n_samples)
    data = data_handler.data[:n_samples]

    # Prepare training and testing sets
    X_data, Y_data = data[:, :-1], data[:, -1]
    X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2)
    
    # Prepare graph configuration
    graphs_train = GraphHandler(paths=paths,
                                board_size=board_size,
                                data_array=X_train,
                                symbols=['RED', 'BLUE','UP', 'DOWN', 'RIGHT','LEFT'],
                                hypervector_size=hv_size,
                                hypervector_bits=mb,
                                double_hashing=double_hash)
    
    graphs_train.build_complete_graphs()

    graphs_test = GraphHandler(paths=paths,
                                board_size=board_size,
                                data_array=X_test,
                                init_with=graphs_train)
    
    graphs_test.build_complete_graphs()
                                
    with open(file_path, 'wb') as f:
        fcntl.flock(f, fcntl.LOCK_EX) 
        pickle.dump((graphs_train.graphs, graphs_test.graphs, X_train, Y_train, X_test, Y_test), f)
        fcntl.flock(f, fcntl.LOCK_UN)

    return str(f"{board_size}x{board_size} - building...")

In [5]:
machine_name, os_name, user = get_machine_info()

if machine_name == "Corsair" and os_name == "Linux" and user == "jon":
    os.makedirs(windows_drive / "data", exist_ok=True)
    os.makedirs(windows_drive / "models", exist_ok=True)
    os.makedirs(windows_drive / "graphs", exist_ok=True)

    paths = {
        "data": windows_drive / "data",
        "models": windows_drive / "models",
        "graphs": windows_drive / "graphs",
    }
else:
    os.makedirs("data", exist_ok=True)
    os.makedirs("models", exist_ok=True)
    os.makedirs("graphs", exist_ok=True)

    paths = {
        "data": Path("data"),
        "models": Path("models"),
        "graphs": Path("graphs"),
    }

Machine: Corsair
OS: Linux
User: jon


In [6]:
board_sizes = [4,5,6,7,8,9,10,11,12,13,14,15] #4,,6,7,8,9,10,11,12,13,14,15
hv_sizes = [128,512] # (256 kills the kernel) 512,1024,2048,4096,8192,16384
double_hashing = [True] #,True ,False
hv_mbs = [2,4,8,16,32] #1,,4,8,16,32,64
open_pos = [40] #5,15,25,35,45,0,10,20,,,50 40
samples = [1000, 10000]
moves_before = [0]

In [7]:
with ProcessPoolExecutor(max_workers=1) as executor: 
    futures = []
    for board_size in board_sizes:
        for hv_size in hv_sizes:
            for hv_mb in hv_mbs:
                for double_hash in double_hashing:
                    for n_samples in samples:
                        for op in open_pos:
                            for mbf in moves_before:
                                futures.append(executor.submit(
                                    process_combination, board_size, hv_size, hv_mb, double_hash, n_samples, op, mbf, paths
                                ))

    with tqdm(total=len(futures)) as pbar:
        for future in as_completed(futures):
            try:
                result = future.result()
                pbar.set_description(f"{result}")
                pbar.update(1)
            except Exception as e:
                print(f"Error processing a combination: {e}")

  0%|          | 0/1 [00:00<?, ?it/s]