In [1]:
#%pip install graphtsetlinmachine pycuda pandas numpy keras tensorflow scikit-learn scipy tqdm pillow opencv-python optuna
#%pip install git+https://github.com/cair/tmu.git


# !pip uninstall numpy
# !pip install numpy==2.0.0

In [2]:
import os
from pathlib import Path
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
from sklearn.metrics import classification_report, confusion_matrix
from src.datahandler import DataHandler
from src.graphhandler import GraphHandler

#from tqdm.auto import tqdm 

In [3]:
os.makedirs("data", exist_ok=True)
os.makedirs("models", exist_ok=True)
os.makedirs("graphs", exist_ok=True)

In [4]:
paths = {
    "data": Path("data"),
    "models": Path("models"),
    "graphs": Path("graphs"),
}

In [5]:
hypervector_bits = 2
hypervector_size = 64
board_size = 11
missing_moves = 1

n_samples = 100000

dataset = f"{board_size}x{board_size}_{missing_moves}"

In [6]:
dh = DataHandler(paths=paths, files={'data': dataset}, dataloader='np.genfromtxt', n_samples=n_samples)

In [7]:
dh.dataset

'11x11_1'

In [8]:
print(dh.n_samples)
print(dh)
print(dh.X_train.shape)
print(dh.y_train.shape)
print(dh.X_test.shape)
print(dh.y_test.shape)
print(dh.get_class_distribution())
print(f"Data balanced? {dh.is_data_balanced}")
dh.show_data_info()

100000
<src.datahandler.DataHandler object at 0x7f9e826ec340>
(80000, 121)
(80000,)
(20000, 121)
(20000,)
[50000 50000]
Data balanced? True
Data shape: (200000, 122)
X shape: (100000, 121)
y shape: (100000,)
Headers: ['cell0_0', 'cell0_1', 'cell0_2', 'cell0_3', 'cell0_4', 'cell0_5', 'cell0_6', 'cell0_7', 'cell0_8', 'cell0_9', 'cell0_10', 'cell1_0', 'cell1_1', 'cell1_2', 'cell1_3', 'cell1_4', 'cell1_5', 'cell1_6', 'cell1_7', 'cell1_8', 'cell1_9', 'cell1_10', 'cell2_0', 'cell2_1', 'cell2_2', 'cell2_3', 'cell2_4', 'cell2_5', 'cell2_6', 'cell2_7', 'cell2_8', 'cell2_9', 'cell2_10', 'cell3_0', 'cell3_1', 'cell3_2', 'cell3_3', 'cell3_4', 'cell3_5', 'cell3_6', 'cell3_7', 'cell3_8', 'cell3_9', 'cell3_10', 'cell4_0', 'cell4_1', 'cell4_2', 'cell4_3', 'cell4_4', 'cell4_5', 'cell4_6', 'cell4_7', 'cell4_8', 'cell4_9', 'cell4_10', 'cell5_0', 'cell5_1', 'cell5_2', 'cell5_3', 'cell5_4', 'cell5_5', 'cell5_6', 'cell5_7', 'cell5_8', 'cell5_9', 'cell5_10', 'cell6_0', 'cell6_1', 'cell6_2', 'cell6_3', 'cell6

In [9]:
g_train = GraphHandler(paths=paths,
                        files={'data': dataset},
                        board_size=board_size,
                        data_array=dh.X_train,
                        symbols=['RED', 'BLUE','UP', 'DOWN', 'RIGHT','LEFT'],
                        hypervector_size=hypervector_size,
                        hypervector_bits=hypervector_bits,
                        verbose_level=2)

g_train.build_complete_graphs()

Setting number of nodes to 125
Adding nodes
Adding edges and properties
Encoding graphs


In [10]:
print(g_train.n_nodes)
print(g_train.symbols)
print(g_train.max_index)
print(g_train.right_index)
print(g_train.left_index)
print(g_train.down_index)
print(g_train.up_index)
g_train.graphs.print_graph(0)

121
['RED', 'BLUE', 'UP', 'DOWN', 'RIGHT', 'LEFT', 'ROW_0', 'COL_0', 'ROW_1', 'COL_1', 'ROW_2', 'COL_2', 'ROW_3', 'COL_3', 'ROW_4', 'COL_4', 'ROW_5', 'COL_5', 'ROW_6', 'COL_6', 'ROW_7', 'COL_7', 'ROW_8', 'COL_8', 'ROW_9', 'COL_9', 'ROW_10', 'COL_10']
120
121
122
123
124
RED * * * * * ROW_0 COL_0 * * * * * * * * * * * * * * * * ROW_9 * * * RED * * * * * ROW_0 * * COL_1 * * * * * * * * * * * * * * * * * * * * * * * * ROW_0 * * * * COL_2 * * * * * * * * * * * * * * * * * * * * * * ROW_0 * * * * * * COL_3 * * * * * * * * * * * * * * RED * * * * * ROW_0 * * * * * * * * COL_4 * * * * * COL_7 * * * * * * * BLUE * * * * ROW_0 * * * * * * * * * * COL_5 * * * * * * * * * * RED * * * * * ROW_0 * * * * * * * * * * * * COL_6 * * * * * * * * * * * * * * ROW_0 * * * * * * * * * * * * * * COL_7 * * * * * * RED * * * * * ROW_0 * * * * * * * * * * * * * * * * COL_8 * * * * * BLUE * * * * ROW_0 * * * * * * * * * * * * * * * * * * COL_9 * * * BLUE * * * * ROW_0 * * * * * * * * * * * * * * * * * * * * COL_

In [11]:
g_test = GraphHandler(data_array=dh.X_test, init_with=g_train.graphs, verbose_level=2)
g_test.build_complete_graphs()

Setting number of nodes to 13
Adding nodes
Adding edges and properties
Encoding graphs


In [12]:
print(g_test.n_nodes)
print(g_test.symbols)
print(g_test.max_index)
print(g_test.right_index)
print(g_test.left_index)
print(g_test.down_index)
print(g_test.up_index)
g_test.graphs.print_graph(0)

9
['RED', 'BLUE', 'UP', 'DOWN', 'RIGHT', 'LEFT', 'ROW_0', 'COL_0', 'ROW_1', 'COL_1', 'ROW_2', 'COL_2']
8
9
10
11
12
* * * * * * ROW_0 COL_0 * * * * * * * * * * * * * * * * ROW_9 * * * * * * * * * ROW_0 * * COL_1 * * * * * * * * * * * * * * * * * * * BLUE * * * * ROW_0 * * * * COL_2 * * * * * * ROW_6 * * * * * * * * * * BLUE * * * * * COL_0 ROW_1 * * * * * * * * * * * * * * * * * * * * * * * * * * * ROW_1 COL_1 * * * * * * * * * * * * * * * * * * * * * * * * * * ROW_1 * * COL_2 * * * * * * * * * * * * * * * * * BLUE * * * * * COL_0 * * ROW_2 * * * * * * * * * * * * * * * * * * * * * * * * * * COL_1 ROW_2 * * * * * * * * * * * * * * * * * * * * * * * * * * * ROW_2 COL_2 * * * * * * * * * * * * * * * * * * * * RIGHT * * * * * * * * * * * * * * * * * * * * * * * * * * * * LEFT * * * * * * * * * * * * * * * * * * * * * * * * * DOWN * * * * * * * * * * * * * * * * * * * * * * * * * * UP * * * * * * * * * * * * * * * * * * * * * * * * * 


In [13]:
# Save and load graphs in order to verify that they will work in other scripts/notebooks
dh.save_graphs(g_train.graphs, g_test.graphs)
graphs_train, graphs_test, X_train, y_train, X_test, y_test = dh.load_graphs()

In [14]:
# Verify sizes and shapes
print(f"Number of Graphs: {graphs_train.number_of_graphs}")
print(f"Number of Nodes: {graphs_train.number_of_nodes}")
print(f"X Shape: {graphs_train.X.shape}")
print(f"Edge Array Shape: {graphs_train.edge.shape}")

# Check indices
print(f"Node Index Array: {graphs_train.node_index}")
print(f"Edge Index Array: {graphs_train.edge_index}")


Number of Graphs: 80000
Number of Nodes: 10000000
X Shape: (10000000, 4)
Edge Array Shape: (59840000, 2)
Node Index Array: [      0     125     250 ... 9999625 9999750 9999875]
Edge Index Array: [       0        5       10 ... 59839967 59839978 59839989]


In [15]:
number_of_nodes = board_size*board_size
number_of_clauses = 100000
T = number_of_clauses*0.875
depth = 7
s = 2.0
message_size = 32
message_bits = 2
max_included_literals = 32
number_of_states = 200
epochs = 40

In [16]:
tm = MultiClassGraphTsetlinMachine(
    number_of_clauses,
    T,
    s,
    depth=depth, 
    message_size = message_size,
    message_bits = message_bits,
    number_of_state_bits = 8,
    boost_true_positive_feedback = 1,
    #grid=(16*13,1,1),
    #block=(128,1,1)
)

Initialization of sparse structure.


In [17]:
for i in range(epochs):
    tm.fit(graphs_train, y_train, epochs=1, incremental=True)
  
    result_test = 100 * (tm.predict(graphs_test) == y_test).mean()
    result_train = 100 * (tm.predict(graphs_train) == y_train).mean()
    
    print("#%d Testing Accuracy: %.2f%% Training Accuracy: %.2f%%" % (i+1, result_test, result_train))
  

#1 Testing Accuracy: 57.77% Training Accuracy: 71.90%


LogicError: cuCtxSynchronize failed: an illegal memory access was encountered

In [None]:
tm.score(graphs_test)

In [None]:
predictions = tm.predict(graphs_test)
predictions.mean()

In [None]:
print(classification_report(y_test, predictions))

In [None]:
confusion_matrix(y_test, predictions)

In [None]:
graphs_train.hypervectors

In [None]:
weights = tm.get_state()[1].reshape(2, -1)
for i in range(tm.number_of_clauses):
        print("Clause #%d W:(%d %d)" % (i, weights[0,i], weights[1,i]), end=' ')
        l = []
        for k in range(hypervector_size * 2):
            if tm.ta_action(0, i, k):
                if k < hypervector_size:
                    l.append("x%d" % (k))
                else:
                    l.append("NOT x%d" % (k - hypervector_size))
        print(" AND ".join(l))