In [1]:
from GraphTsetlinMachine.graphs import Graphs
import numpy as np


In [2]:

hypervector_bits = 2
hypervector_size = 64
size = 11

In [3]:

def getConnections(size, index):
    x = index % size
    y = index // size
    max_index = size*size-1
    right = max_index + 1
    left = max_index + 2
    down = max_index + 3
    up = max_index + 4
    
    connections = []
    directions = []

    # Upper connections
    if y > 0:
        connections.append(x + (y - 1) * size)  # Directly above
        directions.append("dir_up")
        if y % 2 == 0 and x > 0:  # Even row: diagonal left
            connections.append(x - 1 + (y - 1) * size)
            directions.append("up_left")
        elif y % 2 == 1 and x < size - 1:  # Odd row: diagonal right
            connections.append(x + 1 + (y - 1) * size)
            directions.append("up_right")
        else:
            connections.append(up)
            directions.append("up")
    else :
        connections.append(up)
        directions.append("up")
        
    

    # Left and right connections
    if x > 0:
        connections.append(x - 1 + y * size)  # Left
        directions.append("dir_left")
    else:
        connections.append(left)
        directions.append("left")
    if x < size - 1:
        connections.append(x + 1 + y * size)  # Right
        directions.append("dir_right")
    else:
        connections.append(right)
        directions.append("right")

    # Lower connections
    if y < size - 1:
        connections.append(x + (y + 1) * size)  # Directly below
        directions.append("dir_down")
        if y % 2 == 0 and x > 0:  # Even row: diagonal left
            connections.append(x - 1 + (y + 1) * size)
            directions.append("down_left")
        elif y % 2 == 1 and x < size - 1:  # Odd row: diagonal right
            connections.append(x + 1 + (y + 1) * size)
            directions.append("down_right")
        else:
            connections.append(down)
            directions.append("down")
    else:
        connections.append(down)
        directions.append("down")

    return connections, directions

In [4]:
# read the data from data/hex_games_1_000_000_size_7.csv
data = np.genfromtxt('data/hex_games_11.csv', delimiter=',', dtype=np.int32, skip_header=1, max_rows=1100000)
data

collum_names =['cell0_0', 'cell0_1', 'cell0_2', 'cell0_3', 'cell0_4', 'cell0_5', 'cell0_6', 'cell1_0', 'cell1_1', 'cell1_2', 'cell1_3', 'cell1_4', 'cell1_5', 'cell1_6', 'cell2_0', 'cell2_1', 'cell2_2', 'cell2_3', 'cell2_4', 'cell2_5', 'cell2_6', 'cell3_0', 'cell3_1', 'cell3_2', 'cell3_3', 'cell3_4', 'cell3_5', 'cell3_6', 'cell4_0', 'cell4_1', 'cell4_2', 'cell4_3', 'cell4_4', 'cell4_5', 'cell4_6', 'cell5_0', 'cell5_1', 'cell5_2', 'cell5_3', 'cell5_4', 'cell5_5', 'cell5_6', 'cell6_0', 'cell6_1', 'cell6_2', 'cell6_3', 'cell6_4', 'cell6_5', 'cell6_6']


In [5]:
#sumsamples = 30
#np.random.shuffle(data)
#data = data[:sumsamples]

# duplicate the data 100 times
#data = np.repeat(data, 1000, axis=0)

In [6]:
data.shape


(1100000, 122)

In [7]:
getConnections(11, 28)


([17, 16, 27, 29, 39, 38],
 ['dir_up', 'up_left', 'dir_left', 'dir_right', 'dir_down', 'down_left'])

In [8]:
# select the first 100000 samples
data = data[:100000]

#data = np.repeat(data, 1000, axis=0)
# shuffle the data



In [9]:
# separate the last column from the rest of the data
X_data = data[:, :-1]
Y_data = data[:, -1]
X_data[0].shape

(121,)

In [10]:
# for y data we need to convert the -1 t0 0



In [11]:
Y_data 

array([1, 0, 1, ..., 0, 1, 1], dtype=int32)

In [12]:
from sklearn.model_selection import train_test_split
# train test splitt

X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2)
board_size = 11
number_of_nodes = board_size*board_size
symbol_names = ['RED', 'BLUE','UP', 'DOWN', 'RIGHT','LEFT']

for i in range(board_size):
    symbol_names.append(f'ROW_{i}')
    symbol_names.append(f'COL_{i}')
max_index = number_of_nodes-1
right_index = max_index + 1
left_index = max_index + 2
down_index = max_index + 3
up_index = max_index + 4
    


In [13]:
graphs_train = Graphs(X_train.shape[0],symbols=symbol_names, hypervector_size=hypervector_size, hypervector_bits=hypervector_bits, double_hashing = False)

In [14]:
for graph_id in range(X_train.shape[0]):
    graphs_train.set_number_of_graph_nodes(graph_id, number_of_nodes+4)




graphs_train.prepare_node_configuration()
# add up, down, left, right nodes


# Add nodes to each graph
for graph_id in range(X_train.shape[0]):
    for node_id in range(number_of_nodes):
        nr_neighbours = len(getConnections(board_size, node_id)[0])
        graphs_train.add_graph_node(graph_id, node_id, nr_neighbours) 
    graphs_train.add_graph_node(graph_id, right_index, board_size)
    graphs_train.add_graph_node(graph_id, left_index, board_size)
    graphs_train.add_graph_node(graph_id, down_index,board_size)
    graphs_train.add_graph_node(graph_id, up_index, board_size)

graphs_train.prepare_edge_configuration()



In [15]:


for graph_id in range(X_train.shape[0]):
    for node_id in range(number_of_nodes+4):
      
        neighbors, directions = getConnections(board_size,node_id) 
        edge_type = 0
        if node_id<number_of_nodes:
            
            for neighbor_id,dir in zip(neighbors,directions):
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,dir)

        

            node_value = X_train[graph_id, node_id]
            

        
            if node_value == 1:
                graphs_train.add_graph_node_property(graph_id, node_id, 'RED')
                
            elif node_value == -1:
                graphs_train.add_graph_node_property(graph_id, node_id, 'BLUE')

                
            row = node_id // board_size
            col = node_id % board_size
            
                
    
     
            graphs_train.add_graph_node_property(graph_id, node_id, f'ROW_{row}')
            graphs_train.add_graph_node_property(graph_id, node_id, f'COL_{col}')
        if node_id == right_index:
            neighbors = [i for i in range(board_size-1,board_size*board_size,board_size)]
            edge_type = 0
            for neighbor_id in neighbors:
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_train.add_graph_node_property(graph_id, node_id, 'RIGHT')
        if node_id == left_index:
            neighbors = [i for i in range(0,board_size*board_size,board_size)]
            edge_type = 1
            for neighbor_id in neighbors:
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_train.add_graph_node_property(graph_id, node_id, 'LEFT')
        if node_id == down_index:
            neighbors = [i for i in range(board_size*board_size-board_size,board_size*board_size,1)]
            edge_type = 2
            for neighbor_id in neighbors:
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_train.add_graph_node_property(graph_id, node_id, 'DOWN')
        if node_id == up_index:
            neighbors = [i for i in range(board_size)]
            edge_type = 3
            for neighbor_id in neighbors:
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_train.add_graph_node_property(graph_id, node_id, 'UP')



       

In [16]:
[i for i in range(board_size-1,board_size*board_size,board_size)]

[10, 21, 32, 43, 54, 65, 76, 87, 98, 109, 120]

In [17]:
graphs_train.encode()

In [18]:
X_test.shape[0]

20000

In [19]:

graphs_test = Graphs(X_test.shape[0], init_with=graphs_train)


In [20]:

for graph_id in range(X_test.shape[0]):
    graphs_test.set_number_of_graph_nodes(graph_id, number_of_nodes+4)


 
graphs_test.prepare_node_configuration()


for graph_id in range(X_test.shape[0]):
    graphs_test.add_graph_node(graph_id, right_index, board_size)
    graphs_test.add_graph_node(graph_id, left_index, board_size)
    graphs_test.add_graph_node(graph_id, down_index,board_size)
    graphs_test.add_graph_node(graph_id, up_index, board_size)
    for node_id in range(number_of_nodes):
        nr_neighbours = len(getConnections(board_size, node_id)[0])
        graphs_test.add_graph_node(graph_id, node_id, nr_neighbours)  

graphs_test.prepare_edge_configuration()

In [21]:


for graph_id in range(X_test.shape[0]):
    for node_id in range(number_of_nodes+4):
     
        neighbors, directions = getConnections(board_size,node_id) 
        edge_type = 0
        if node_id<number_of_nodes:
            for neighbor_id, dir in zip(neighbors,directions):
                graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,dir)
    
            node_value = X_test[graph_id, node_id]
    
            if node_value == 1:
                graphs_test.add_graph_node_property(graph_id, node_id, 'RED')
        
            elif node_value == -1:
                
                graphs_test.add_graph_node_property(graph_id, node_id, 'BLUE')
       
            row = node_id // board_size
            col = node_id % board_size

            
    
     
            graphs_test.add_graph_node_property(graph_id, node_id, f'ROW_{row}')
            graphs_test.add_graph_node_property(graph_id, node_id, f'COL_{col}')
    
        if node_id == right_index:
            neighbors = [i for i in range(board_size-1,board_size*board_size,board_size)]
            edge_type = 0
            for neighbor_id in neighbors:
                graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_test.add_graph_node_property(graph_id, node_id, 'RIGHT')
        if node_id == left_index:
            neighbors = [i for i in range(0,board_size*board_size,board_size)]
            edge_type = 1
            for neighbor_id in neighbors:
                graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_test.add_graph_node_property(graph_id, node_id, 'LEFT')
        if node_id == down_index:
            neighbors = [i for i in range(board_size*board_size-board_size,board_size*board_size,1)]
            edge_type = 2
            for neighbor_id in neighbors:
                graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_test.add_graph_node_property(graph_id, node_id, 'DOWN')
        if node_id == up_index:
            neighbors = [i for i in range(board_size)]
            edge_type = 3
            for neighbor_id in neighbors:
                graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id,edge_type)
            graphs_test.add_graph_node_property(graph_id, node_id, 'UP')
       

In [22]:
graphs_test.encode()

In [23]:
# pickle dump all the  (graphs_train, graphs_test, X_train, Y_train, X_test, Y_test)
import pickle
with open('data.pkl', 'wb') as f:
    pickle.dump((graphs_train, graphs_test, X_train, Y_train, X_test, Y_test), f)


In [24]:

# load all the data
import pickle

   # graphs_train, graphs_test, X_train, Y_train, X_test, Y_test = pickle.load(f)

In [25]:
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine

In [26]:
number_of_nodes = board_size*board_size
number_of_clauses = 100000
T = number_of_clauses*0.875
depth = 7
s = 2.0
message_size = 32
message_bits = 2
max_included_literals = 32
number_of_states = 200
epochs = 40

In [27]:
# Verify sizes and shapes
print(f"Number of Graphs: {graphs_train.number_of_graphs}")
print(f"Number of Nodes: {graphs_train.number_of_nodes}")
print(f"X Shape: {graphs_train.X.shape}")
print(f"Edge Array Shape: {graphs_train.edge.shape}")

# Check indices
print(f"Node Index Array: {graphs_train.node_index}")
print(f"Edge Index Array: {graphs_train.edge_index}")


Number of Graphs: 80000
Number of Nodes: 10000000
X Shape: (10000000, 4)
Edge Array Shape: (59840000, 2)
Node Index Array: [      0     125     250 ... 9999625 9999750 9999875]
Edge Index Array: [       0        5       10 ... 59839967 59839978 59839989]


In [28]:
tm = MultiClassGraphTsetlinMachine(
    number_of_clauses, T, s, depth=depth, message_size = message_size,
    message_bits = message_bits, number_of_state_bits = 8, boost_true_positive_feedback = 1,
  #  grid=(16*13,1,1), block=(128,1,1)
)


Initialization of sparse structure.


In [29]:
Y_test.mean()

0.4443

In [None]:


#torch.cuda.empty_cache()<
#torch.cuda.reset_max_memory_allocated() # Clear GPU memory
for i in range(epochs):
    tm.fit(graphs_train, Y_train, epochs=1, incremental=True)
  
    result_test = 100 * ( tm.predict(graphs_test) == Y_test).mean()
    result_train = 100 * (tm.predict(graphs_train) == Y_train).mean()
    
    print("#%d Testing Accuracy: %.2f%% Training Accuracy: %.2f%%" % (i+1, result_test, result_train))
    
   
  

#1 Testing Accuracy: 68.03% Training Accuracy: 67.95%
#2 Testing Accuracy: 66.45% Training Accuracy: 66.30%


In [None]:
tm.score(graphs_test)

In [56]:
predictions = tm.predict(graphs_test)

In [None]:
predictions.mean()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(Y_test,predictions))


In [None]:
confusion_matrix(Y_test,predictions)

In [None]:
graphs_train.hypervectors

In [None]:
weights = tm.get_state()[1].reshape(2, -1)
for i in range(tm.number_of_clauses):
        print("Clause #%d W:(%d %d)" % (i, weights[0,i], weights[1,i]), end=' ')
        l = []
        for k in range(hypervector_size * 2):
            if tm.ta_action(0, i, k):
                if k < hypervector_size:
                    l.append("x%d" % (k))
                else:
                    l.append("NOT x%d" % (k - hypervector_size))
        print(" AND ".join(l))
