In [1]:
from GraphTsetlinMachine.graphs import Graphs
import numpy as np


In [2]:

hypervector_bits = 2
hypervector_size = 128
size = 7

In [3]:
def getConnections(size, index):
    x = index % size  # Column (x-coordinate)
    y = index // size  # Row (y-coordinate)

    connections = []

    # Neighboring coordinates in a hex grid
    neighbors = [
        (-1, -1),  # Top-left
        (0, -1),   # Top-right
        (-1, 0),   # Left
        (1, 0),    # Right
        (-1, 1),   # Bottom-left
        (0, 1)     # Bottom-right
    ]

    # Check each neighbor
    for dx, dy in neighbors:
        nx, ny = x + dx, y + dy
        # Ensure the neighbor is within bounds of the board
        if 0 <= nx < size and 0 <= ny < size:
            neighbor_index = nx + ny * size
            connections.append(neighbor_index)

    return connections
def getConnections(size, index):
    x = index % size
    y = index // size

    connections = []

    # Upper connections
    if y > 0:
        connections.append(x + (y - 1) * size)  # Directly above
        if y % 2 == 0 and x > 0:  # Even row: diagonal left
            connections.append(x - 1 + (y - 1) * size)
        elif y % 2 == 1 and x < size - 1:  # Odd row: diagonal right
            connections.append(x + 1 + (y - 1) * size)

    # Left and right connections
    if x > 0:
        connections.append(x - 1 + y * size)  # Left
    if x < size - 1:
        connections.append(x + 1 + y * size)  # Right

    # Lower connections
    if y < size - 1:
        connections.append(x + (y + 1) * size)  # Directly below
        if y % 2 == 0 and x > 0:  # Even row: diagonal left
            connections.append(x - 1 + (y + 1) * size)
        elif y % 2 == 1 and x < size - 1:  # Odd row: diagonal right
            connections.append(x + 1 + (y + 1) * size)

    return connections

In [4]:
# read the data from data/hex_games_1_000_000_size_7.csv
data = np.genfromtxt('data/hex_games_1_000_000_size_7.csv', delimiter=',', dtype=np.int32, skip_header=1)
data

collum_names =['cell0_0', 'cell0_1', 'cell0_2', 'cell0_3', 'cell0_4', 'cell0_5', 'cell0_6', 'cell1_0', 'cell1_1', 'cell1_2', 'cell1_3', 'cell1_4', 'cell1_5', 'cell1_6', 'cell2_0', 'cell2_1', 'cell2_2', 'cell2_3', 'cell2_4', 'cell2_5', 'cell2_6', 'cell3_0', 'cell3_1', 'cell3_2', 'cell3_3', 'cell3_4', 'cell3_5', 'cell3_6', 'cell4_0', 'cell4_1', 'cell4_2', 'cell4_3', 'cell4_4', 'cell4_5', 'cell4_6', 'cell5_0', 'cell5_1', 'cell5_2', 'cell5_3', 'cell5_4', 'cell5_5', 'cell5_6', 'cell6_0', 'cell6_1', 'cell6_2', 'cell6_3', 'cell6_4', 'cell6_5', 'cell6_6']


In [5]:
#sumsamples = 30
#np.random.shuffle(data)
#data = data[:sumsamples]

# duplicate the data 100 times
#data = np.repeat(data, 1000, axis=0)

In [6]:
data.shape
# shyffle the data
np.random.shuffle(data)



In [7]:
getConnections(7, 28)


[21, 29, 35]

In [8]:
# select the first 100000 samples
data = data[:100000]

#data = np.repeat(data, 1000, axis=0)
# shuffle the data

np.random.shuffle(data)

In [9]:
# separate the last column from the rest of the data
X_data = data[:, :-1]
Y_data = data[:, -1]
X_data[0]

array([ 1,  1,  1,  1, -1, -1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,
       -1,  1, -1,  0, -1, -1, -1,  1, -1, -1, -1,  1, -1,  0,  1,  1,  1,
       -1,  0,  1, -1, -1,  1, -1, -1, -1,  1, -1, -1,  1, -1, -1])

In [10]:
# for y data we need to convert the -1 t0 0

Y_data = np.where(Y_data == -1, 0, 1)

In [11]:
Y_data 

array([0, 1, 1, ..., 0, 1, 1])

In [12]:
from sklearn.model_selection import train_test_split
# train test splitt

X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42)

number_of_nodes = 7*7
symbol_names = ['RED', 'BLUE', 'EMPTY']

In [13]:
graphs_train = Graphs(X_train.shape[0],symbol_names=symbol_names, hypervector_size=hypervector_size, hypervector_bits=hypervector_bits)

In [14]:
for graph_id in range(X_train.shape[0]):
    graphs_train.set_number_of_graph_nodes(graph_id, number_of_nodes)


graphs_train.prepare_node_configuration()

# Add nodes to each graph
for graph_id in range(X_train.shape[0]):
    for node_id in range(number_of_nodes):
        nr_neighbours = len(getConnections(7, node_id))
        graphs_train.add_graph_node(graph_id, node_id, nr_neighbours)  # The '0' can be used as a placeholder

graphs_train.prepare_edge_configuration()



In [15]:

# Add edges to each graph
for graph_id in range(X_train.shape[0]):
    for node_id in range(number_of_nodes):
        # Get the list of adjacent nodes for the current node_id
        neighbors = getConnections(7,node_id) # add_graph_node_edge(graph_id, node_id, destination_node_id, edge_type)
        edge_type = 0
        for neighbor_id in neighbors:
            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)

        node_value = X_train[graph_id, node_id]
        # Assign the value to the node
        
        if node_value == 1:
            graphs_train.add_graph_node_feature(graph_id, node_id, 'RED')
            
        elif node_value == -1:
            graphs_train.add_graph_node_feature(graph_id, node_id, 'BLUE')
        else:
            graphs_train.add_graph_node_feature(graph_id, node_id, 'EMPTY')
       

In [16]:
graphs_train.encode()

In [17]:
X_test.shape[0]

20000

In [18]:
# create the test graphs object
graphs_test = Graphs(X_test.shape[0], init_with=graphs_train)


In [19]:

for graph_id in range(X_test.shape[0]):
    graphs_test.set_number_of_graph_nodes(graph_id, number_of_nodes)


graphs_test.prepare_node_configuration()

# Add nodes to each graph
for graph_id in range(X_test.shape[0]):
    for node_id in range(number_of_nodes):
        nr_neighbours = len(getConnections(7, node_id))
        graphs_test.add_graph_node(graph_id, node_id, nr_neighbours)  

graphs_test.prepare_edge_configuration()

In [20]:
# Add edges to each graph

for graph_id in range(X_test.shape[0]):
    for node_id in range(number_of_nodes):
        # Get the list of adjacent nodes for the current node_id
        neighbors = getConnections(7,node_id) # add_graph_node_edge(graph_id, node_id, destination_node_id, edge_type)
        edge_type = 0
        for neighbor_id in neighbors:
            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
    
        node_value = X_test[graph_id, node_id]
        # Assign the value to the node
        if node_value == 1:
            graphs_test.add_graph_node_feature(graph_id, node_id, 'RED')
       #     print('blue')
        elif node_value == -1:
            
            graphs_test.add_graph_node_feature(graph_id, node_id, 'BLUE')
        else:
            graphs_test.add_graph_node_feature(graph_id, node_id, 'EMPTY')

In [21]:
graphs_test.encode()

In [22]:
# pickle dump all the  (graphs_train, graphs_test, X_train, Y_train, X_test, Y_test)
import pickle
with open('data.pkl', 'wb') as f:
    pickle.dump((graphs_train, graphs_test, X_train, Y_train, X_test, Y_test), f)


In [23]:

# load all the data
import pickle

   # graphs_train, graphs_test, X_train, Y_train, X_test, Y_test = pickle.load(f)

In [24]:
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine

In [25]:
number_of_nodes = 7*7
number_of_clauses = 2000
T = 25000
depth =5
s = 10.0
message_size = 256
message_bits = 2
max_included_literals = 32
number_of_states = 200
epochs = 25

In [26]:
# Verify sizes and shapes
print(f"Number of Graphs: {graphs_train.number_of_graphs}")
print(f"Number of Nodes: {graphs_train.number_of_nodes}")
print(f"X Shape: {graphs_train.X.shape}")
print(f"Edge Array Shape: {graphs_train.edge.shape}")

# Check indices
print(f"Node Index Array: {graphs_train.node_index}")
print(f"Edge Index Array: {graphs_train.edge_index}")


Number of Graphs: 80000
Number of Nodes: 3920000
X Shape: (3920000, 8)
Edge Array Shape: (19200000, 2)
Node Index Array: [      0      49      98 ... 3919853 3919902 3919951]
Edge Index Array: [       0        2        6 ... 19199989 19199993 19199997]


In [27]:
tm = MultiClassGraphTsetlinMachine(number_of_clauses, T, s, depth=depth, message_size = message_size, message_bits = message_bits)


Initialization of sparse structure.


In [64]:
Y_test.mean()

0.406

In [None]:


#torch.cuda.empty_cache()
#torch.cuda.reset_max_memory_allocated() # Clear GPU memory
for i in range(epochs):
    tm.fit(graphs_train, Y_train, epochs=1, incremental=True)
  
    result_test = 100 * ( tm.predict(graphs_test) == Y_test).mean()
    result_train = 100 * (tm.predict(graphs_train) == Y_train).mean()
    print("#%d Testing Accuracy: %.2f%% Training Accuracy: %.2f%%" % (i+1, result_test, result_train))
   
  

#1 Testing Accuracy: 54.43% Training Accuracy: 57.29%
#2 Testing Accuracy: 58.55% Training Accuracy: 58.20%


In [29]:
tm.predict(graphs_test)

array([1, 0, 1, ..., 0, 1, 0], dtype=int64)

In [30]:
Y_test.mean()

0.5433