In [1]:
import os
import sys
from pathlib import Path
from GraphTsetlinMachine.graphs import Graphs
import numpy as np
from tqdm.auto import tqdm 
from src.datahandler import DataHandler
from sklearn.model_selection import train_test_split
import pickle

In [2]:
os.makedirs("data", exist_ok=True)
os.makedirs("models", exist_ok=True)
os.makedirs("graphs", exist_ok=True)

In [3]:
paths = {
    "data": Path("data"),
    "models": Path("models"),
    "graphs": Path("graphs"),
}

In [4]:
hypervector_bits = 2
hypervector_size = 64

samples = [100000]
board_sizes = [3,4,5,6,7,8,9,10,11,12,13,14,15]
moves_before = [0, 2, 5]

In [5]:
def getConnections(size, index):
    x = index % size
    y = index // size
    max_index = size*size-1
    right = max_index + 1
    left = max_index + 2
    down = max_index + 3
    up = max_index + 4
    
    connections = []
    directions = []

    # Upper connections
    if y > 0:
        connections.append(x + (y - 1) * size)  # Directly above
        directions.append("dir_up")
        if y % 2 == 0 and x > 0:  # Even row: diagonal left
            connections.append(x - 1 + (y - 1) * size)
            directions.append("up_left")
        elif y % 2 == 1 and x < size - 1:  # Odd row: diagonal right
            connections.append(x + 1 + (y - 1) * size)
            directions.append("up_right")
        else:
            connections.append(up)
            directions.append("up")
    else :
        connections.append(up)
        directions.append("up")
        
    

    # Left and right connections
    if x > 0:
        connections.append(x - 1 + y * size)  # Left
        directions.append("dir_left")
    else:
        connections.append(left)
        directions.append("left")
    if x < size - 1:
        connections.append(x + 1 + y * size)  # Right
        directions.append("dir_right")
    else:
        connections.append(right)
        directions.append("right")

    # Lower connections
    if y < size - 1:
        connections.append(x + (y + 1) * size)  # Directly below
        directions.append("dir_down")
        if y % 2 == 0 and x > 0:  # Even row: diagonal left
            connections.append(x - 1 + (y + 1) * size)
            directions.append("down_left")
        elif y % 2 == 1 and x < size - 1:  # Odd row: diagonal right
            connections.append(x + 1 + (y + 1) * size)
            directions.append("down_right")
        else:
            connections.append(down)
            directions.append("down")
    else:
        connections.append(down)
        directions.append("down")

    return connections, directions

In [None]:
for n_samples in tqdm(samples, desc="Samples"):
    for mbf in tqdm(moves_before, desc="Moves Before", leave=False):
        for board_size in tqdm(board_sizes, desc="Board Sizes", leave=False):
            dataset = f"{board_size}x{board_size}_{mbf}"
            dh = DataHandler(paths=paths, files={'data': dataset}, dataloader='np.genfromtxt', n_samples=n_samples)
            data = dh.data
            data.shape
            getConnections(board_size, 28)
            data = data[:n_samples]
            X_data = data[:, :-1]
            Y_data = data[:, -1]
            X_data[0].shape
            Y_data 
            
            X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2)
            number_of_nodes = board_size*board_size
            symbol_names = ['RED', 'BLUE','UP', 'DOWN', 'RIGHT','LEFT']
            
            for i in range(board_size):
                symbol_names.append(f'ROW_{i}')
                symbol_names.append(f'COL_{i}')
            max_index = number_of_nodes-1
            right_index = max_index + 1
            left_index = max_index + 2
            down_index = max_index + 3
            up_index = max_index + 4

            graphs_train = Graphs(X_train.shape[0],symbols=symbol_names, hypervector_size=hypervector_size, hypervector_bits=hypervector_bits, double_hashing = False)

            for graph_id in range(X_train.shape[0]):
                graphs_train.set_number_of_graph_nodes(graph_id, number_of_nodes+4)
            
            graphs_train.prepare_node_configuration()
            # add up, down, left, right nodes
            
            # Add nodes to each graph
            for graph_id in tqdm(range(X_train.shape[0]), desc="Train nodes", leave=True):
                for node_id in range(number_of_nodes):
                    nr_neighbours = len(getConnections(board_size, node_id)[0])
                    graphs_train.add_graph_node(graph_id, node_id, nr_neighbours) 
                graphs_train.add_graph_node(graph_id, right_index, board_size)
                graphs_train.add_graph_node(graph_id, left_index, board_size)
                graphs_train.add_graph_node(graph_id, down_index,board_size)
                graphs_train.add_graph_node(graph_id, up_index, board_size)
            
            graphs_train.prepare_edge_configuration()

            for graph_id in tqdm(range(X_train.shape[0]), desc="Train edges & props", leave=True):
                for node_id in range(number_of_nodes+4):
                  
                    neighbors, directions = getConnections(board_size,node_id) 
                    edge_type = 0
                    if node_id<number_of_nodes:
                        
                        for neighbor_id,dir in zip(neighbors,directions):
                            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,dir)
            
                        node_value = X_train[graph_id, node_id]
                        
                        if node_value == 1:
                            graphs_train.add_graph_node_property(graph_id, node_id, 'RED')
                            
                        elif node_value == -1:
                            graphs_train.add_graph_node_property(graph_id, node_id, 'BLUE')
            
                            
                        row = node_id // board_size
                        col = node_id % board_size
                        
                        graphs_train.add_graph_node_property(graph_id, node_id, f'ROW_{row}')
                        graphs_train.add_graph_node_property(graph_id, node_id, f'COL_{col}')
                    if node_id == right_index:
                        neighbors = [i for i in range(board_size-1,board_size*board_size,board_size)]
                        edge_type = 0
                        for neighbor_id in neighbors:
                            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_train.add_graph_node_property(graph_id, node_id, 'RIGHT')
                    if node_id == left_index:
                        neighbors = [i for i in range(0,board_size*board_size,board_size)]
                        edge_type = 1
                        for neighbor_id in neighbors:
                            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_train.add_graph_node_property(graph_id, node_id, 'LEFT')
                    if node_id == down_index:
                        neighbors = [i for i in range(board_size*board_size-board_size,board_size*board_size,1)]
                        edge_type = 2
                        for neighbor_id in neighbors:
                            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_train.add_graph_node_property(graph_id, node_id, 'DOWN')
                    if node_id == up_index:
                        neighbors = [i for i in range(board_size)]
                        edge_type = 3
                        for neighbor_id in neighbors:
                            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_train.add_graph_node_property(graph_id, node_id, 'UP')

            [i for i in range(board_size-1,board_size*board_size,board_size)]
    
            graphs_train.encode()
    
            X_test.shape[0]
    
            graphs_test = Graphs(X_test.shape[0], init_with=graphs_train)
    
            for graph_id in range(X_test.shape[0]):
                graphs_test.set_number_of_graph_nodes(graph_id, number_of_nodes+4)
    
            graphs_test.prepare_node_configuration()
            
            for graph_id in tqdm(range(X_test.shape[0]), desc="Test nodes", leave=True):
                graphs_test.add_graph_node(graph_id, right_index, board_size)
                graphs_test.add_graph_node(graph_id, left_index, board_size)
                graphs_test.add_graph_node(graph_id, down_index,board_size)
                graphs_test.add_graph_node(graph_id, up_index, board_size)
                for node_id in range(number_of_nodes):
                    nr_neighbours = len(getConnections(board_size, node_id)[0])
                    graphs_test.add_graph_node(graph_id, node_id, nr_neighbours)  
            
            graphs_test.prepare_edge_configuration()
    
            for graph_id in tqdm(range(X_test.shape[0]), desc="Test edges & props", leave=True):
                for node_id in range(number_of_nodes+4):
                 
                    neighbors, directions = getConnections(board_size,node_id) 
                    edge_type = 0
                    if node_id<number_of_nodes:
                        for neighbor_id, dir in zip(neighbors,directions):
                            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,dir)
                
                        node_value = X_test[graph_id, node_id]
                
                        if node_value == 1:
                            graphs_test.add_graph_node_property(graph_id, node_id, 'RED')
                    
                        elif node_value == -1:
                            
                            graphs_test.add_graph_node_property(graph_id, node_id, 'BLUE')
                   
                        row = node_id // board_size
                        col = node_id % board_size
            
                        graphs_test.add_graph_node_property(graph_id, node_id, f'ROW_{row}')
                        graphs_test.add_graph_node_property(graph_id, node_id, f'COL_{col}')
                
                    if node_id == right_index:
                        neighbors = [i for i in range(board_size-1,board_size*board_size,board_size)]
                        edge_type = 0
                        for neighbor_id in neighbors:
                            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_test.add_graph_node_property(graph_id, node_id, 'RIGHT')
                    if node_id == left_index:
                        neighbors = [i for i in range(0,board_size*board_size,board_size)]
                        edge_type = 1
                        for neighbor_id in neighbors:
                            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_test.add_graph_node_property(graph_id, node_id, 'LEFT')
                    if node_id == down_index:
                        neighbors = [i for i in range(board_size*board_size-board_size,board_size*board_size,1)]
                        edge_type = 2
                        for neighbor_id in neighbors:
                            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_test.add_graph_node_property(graph_id, node_id, 'DOWN')
                    if node_id == up_index:
                        neighbors = [i for i in range(board_size)]
                        edge_type = 3
                        for neighbor_id in neighbors:
                            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
                        graphs_test.add_graph_node_property(graph_id, node_id, 'UP')
           
    
            graphs_test.encode()
    
            with open(f'graphs/{board_size}x{board_size}_{mbf}_{n_samples}.pkl', 'wb') as f:
                pickle.dump((graphs_train, graphs_test, X_train, Y_train, X_test, Y_test), f)

Samples:   0%|          | 0/1 [00:00<?, ?it/s]

Moves Before:   0%|          | 0/3 [00:00<?, ?it/s]

Board Sizes:   0%|          | 0/13 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]

Test nodes:   0%|          | 0/20000 [00:00<?, ?it/s]

Test edges & props:   0%|          | 0/20000 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]

Test nodes:   0%|          | 0/20000 [00:00<?, ?it/s]

Test edges & props:   0%|          | 0/20000 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]

Test nodes:   0%|          | 0/20000 [00:00<?, ?it/s]

Test edges & props:   0%|          | 0/20000 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]

Test nodes:   0%|          | 0/20000 [00:00<?, ?it/s]

Test edges & props:   0%|          | 0/20000 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]

Test nodes:   0%|          | 0/20000 [00:00<?, ?it/s]

Test edges & props:   0%|          | 0/20000 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]

Test nodes:   0%|          | 0/20000 [00:00<?, ?it/s]

Test edges & props:   0%|          | 0/20000 [00:00<?, ?it/s]

Train nodes:   0%|          | 0/80000 [00:00<?, ?it/s]

Train edges & props:   0%|          | 0/80000 [00:00<?, ?it/s]