In [1]:
pip install graphtsetlinmachine

Note: you may need to restart the kernel to use updated packages.


In [2]:
from GraphTsetlinMachine.graphs import Graphs
import numpy as np
from scipy.sparse import csr_matrix
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
from time import time
import argparse
import random
import csv

In [3]:
def default_args(**kwargs):
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", default=25, type=int)
    parser.add_argument("--number-of-clauses", default=20000, type=int)
    parser.add_argument("--T", default=np.power((20000/0.8377),1/2.2099), type=int)
    parser.add_argument("--s", default=2.534*np.log((20000/3.7579)), type=float)
    parser.add_argument("--depth", default=2, type=int)
    parser.add_argument("--hypervector-size", default=128, type=int)
    parser.add_argument("--hypervector-bits", default=2, type=int)
    parser.add_argument("--message-size", default=256, type=int)
    parser.add_argument("--message-bits", default=2, type=int)
    parser.add_argument('--double-hashing', dest='double_hashing', default=False, action='store_true')
    parser.add_argument("--noise", default=0.01, type=float)
    parser.add_argument("--number-of-examples", default=10000, type=int)
    parser.add_argument("--max-included-literals", default=32, type=int)

    args, unknown = parser.parse_known_args()
    for key, value in kwargs.items():
        if key in args.__dict__:
            setattr(args, key, value)
    return args

args = default_args()

In [4]:
print("Loading data")
input_file = 'hex_games_1_000_000_size_7.csv'

# Read the entire CSV file into a list of rows
with open(input_file, mode='r', newline='') as infile:
    reader = csv.reader(infile)
    data = list(reader)


delete_index = int((0.98 * len(data)))
data = data[delete_index:]
# Split the data into training (80%) and testing (20%) sets
split_index = int(0.8 * len(data))
train_data = data[:split_index]
test_data = data[split_index:]

# Separate X_data and Y_data for training set
X_train = np.array([row[:-1] for row in train_data], dtype=int)
Y_train = np.array([row[-1:] for row in train_data], dtype=int)

# Separate X_data and Y_data for testing set
X_test = np.array([row[:-1] for row in test_data], dtype=int)
Y_test = np.array([row[-1:] for row in test_data], dtype=int)

print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

#X_train = X_train.reshape(-1, 7, 7)
#X_test = X_test.reshape(-1, 7, 7)

Y_train = Y_train.reshape(-1)
Y_test = Y_test.reshape(-1)

Y_train = np.where(Y_train == -1, 0, Y_train)
Y_test = np.where(Y_test == -1, 0, Y_test)


# Print the shapes of the arrays
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)


Loading data
X_train shape: (16000, 49)
Y_train shape: (16000, 1)
X_test shape: (4000, 49)
Y_test shape: (4000, 1)
X_train shape: (16000, 49)
Y_train shape: (16000,)
X_test shape: (4000, 49)
Y_test shape: (4000,)


In [5]:
patch_size = 1
dim = 7 - patch_size + 1

number_of_nodes = dim * dim

print(patch_size, dim, number_of_nodes)

1 7 49


In [6]:
print("Creating training data")
symbol_names = []
# Create train data
for i in range(dim*dim):
    symbol_names.append(i)
    
graphs_train = Graphs(
    X_train.shape[0],
    symbols=symbol_names,
    hypervector_size=args.hypervector_size,
    hypervector_bits=args.hypervector_bits,
    double_hashing = args.double_hashing
)



for graph_id in range(X_train.shape[0]):
    graphs_train.set_number_of_graph_nodes(graph_id, number_of_nodes)
    
graphs_train.prepare_node_configuration()
print("Finished preparing node configuration")

map_size = 7 # 7 x 7
data_size = 10000
directions = [
    (-1, 0), (1, 0),  # Left and Right
    (0, -1), (0, 1),  # Up and Down
    (-1, 1), (1, -1)  # Upper-left and Lower-right for staggered rows
]

for graph_id in range(X_train.shape[0]):
    number_of_outgoing_edges = 2
    for node_id in range(3):
        graphs_train.add_graph_node(graph_id, node_id, number_of_outgoing_edges)
            
    
print("All nodes are acounted for")
graphs_train.prepare_edge_configuration()
edge_type = "Plain"

for graph_id in range(X_train.shape[0]):
    for node_id in range(3):
        for neighbor_id in range(3):
            if node_id != neighbor_id:
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id, edge_type)
print("Added neighbor edges")


Creating training data
Finished preparing node configuration
All nodes are acounted for
Added neighbor edges


In [7]:
print("Preparing to add bricks to the board")

# Define the bricks array: 0 for node_id 0, 1 for node_id 1, and -1 for node_id 2
bricks = np.array([0, 1, -1])

for graph_id in range(X_train.shape[0]):
    for node_id, brick in enumerate(bricks):
        # Find cells that match the current brick for this graph_id
        matching_cells = np.where(X_train[graph_id] == brick)[0]

        # Iterate over matching cells and add each one separately
        for cell in matching_cells:
            graphs_train.add_graph_node_property(graph_id, node_id, cell)

graphs_train.encode()
print("All cells now have a brick")


Preparing to add bricks to the board
All cells now have a brick


In [8]:
print("Creating test data")
symbol_names = []
# Create train data
for i in range(dim*dim):
    symbol_names.append(i)
    
graphs_test = Graphs(
    X_train.shape[0],
    symbols=symbol_names,
    hypervector_size=args.hypervector_size,
    hypervector_bits=args.hypervector_bits,
    double_hashing = args.double_hashing
)



for graph_id in range(X_test.shape[0]):
    graphs_test.set_number_of_graph_nodes(graph_id, number_of_nodes)
    
graphs_test.prepare_node_configuration()
print("Finished preparing node configuration")

map_size = 7 # 7 x 7
data_size = 10000
directions = [
    (-1, 0), (1, 0),  # Left and Right
    (0, -1), (0, 1),  # Up and Down
    (-1, 1), (1, -1)  # Upper-left and Lower-right for staggered rows
]

for graph_id in range(X_test.shape[0]):
    number_of_outgoing_edges = 2
    for node_id in range(3):
        graphs_test.add_graph_node(graph_id, node_id, number_of_outgoing_edges)
            
    
print("All nodes are acounted for")
graphs_test.prepare_edge_configuration()
edge_type = "Plain"

for graph_id in range(X_test.shape[0]):
    for node_id in range(3):
        for neighbor_id in range(3):
            if node_id != neighbor_id:
                graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id, edge_type)
print("Added neighbor edges")


Creating test data
Finished preparing node configuration
All nodes are acounted for
Added neighbor edges


In [None]:
print("Preparing to add bricks to the board")

# Define the bricks array: 0 for node_id 0, 1 for node_id 1, and -1 for node_id 2
bricks = np.array([0, 1, -1])

for graph_id in range(X_test.shape[0]):
    for node_id, brick in enumerate(bricks):
        # Find cells that match the current brick for this graph_id
        matching_cells = np.where(X_test[graph_id] == brick)[0]

        # Iterate over matching cells and add each one separately
        for cell in matching_cells:
            graphs_test.add_graph_node_property(graph_id, node_id, cell)

graphs_test.encode()
print("All cells now have a brick")


Preparing to add bricks to the board


In [None]:
tm = MultiClassGraphTsetlinMachine(
    args.number_of_clauses,
    args.T,
    args.s,
    depth = args.depth,
    message_size = args.message_size,
    message_bits = args.message_bits,
    max_included_literals = args.max_included_literals
)

for i in range(args.epochs):
    start_training = time()
    tm.fit(graphs_train, Y_train, epochs=1, incremental=True)
    stop_training = time()

    start_testing = time()
    #print(tm.predict(graphs_test))
    result_test = 100*(tm.predict(graphs_test) == Y_test).mean()
    #print(tm.predict(graphs_test))
    stop_testing = time()

    result_train = 100*(tm.predict(graphs_train) == Y_train).mean()

    print("%d %.2f %.2f %.2f %.2f" % (i, result_train, result_test, stop_training-start_training, stop_testing-start_testing))

weights = tm.get_state()[1].reshape(2, -1)
for i in range(tm.number_of_clauses):
        print("Clause #%d W:(%d %d)" % (i, weights[0,i], weights[1,i]), end=' ')
        l = []
        for k in range(args.hypervector_size * 2):
            if tm.ta_action(0, i, k):
                if k < args.hypervector_size:
                    l.append("x%d" % (k))
                else:
                    l.append("NOT x%d" % (k - args.hypervector_size))

        # for k in range(args.message_size * 2):
        #     if tm.ta_action(1, i, k):
        #         if k < args.message_size:
        #             l.append("c%d" % (k))
        #         else:
        #             l.append("NOT c%d" % (k - args.message_size))

        print(" AND ".join(l))

print(graphs_test.hypervectors)
print(tm.hypervectors)
print(graphs_test.edge_type_id)