In [1]:
#%pip install graphtsetlinmachine pycuda pandas numpy keras tensorflow scikit-learn scipy tqdm pillow opencv-python optuna
#%pip install git+https://github.com/cair/tmu.git


# !pip uninstall numpy
# !pip install numpy==2.0.0

In [2]:
import os
from pathlib import Path
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
from sklearn.metrics import classification_report, confusion_matrix
from src.datahandler import DataHandler
from src.graphhandler import GraphHandler

#from tqdm.auto import tqdm 

In [3]:
os.makedirs("data", exist_ok=True)
os.makedirs("models", exist_ok=True)
os.makedirs("graphs", exist_ok=True)

In [4]:
paths = {
    "data": Path("data"),
    "models": Path("models"),
    "graphs": Path("graphs"),
}

In [5]:
hypervector_bits = 2
hypervector_size = 64
board_size = 3
n_samples = 10000

dataset = f"{board_size}x{board_size}_2"

In [6]:
dh = DataHandler(paths=paths, files={'data': dataset}, dataloader='np.genfromtxt', n_samples=n_samples)

In [7]:
dh.dataset

'3x3_2'

In [8]:
print(dh.n_samples)
print(dh)
print(dh.X_train.shape)
print(dh.y_train.shape)
print(dh.X_test.shape)
print(dh.y_test.shape)
print(dh.get_class_distribution())
print(f"Data balanced? {dh.is_data_balanced}")
dh.show_data_info()

10000
<src.datahandler.DataHandler object at 0x7faaf424bd30>
(8000, 9)
(8000,)
(2000, 9)
(2000,)
[5000 5000]
Data balanced? True
Data shape: (20000, 10)
X shape: (10000, 9)
y shape: (10000,)
Headers: ['cell0_0', 'cell0_1', 'cell0_2', 'cell1_0', 'cell1_1', 'cell1_2', 'cell2_0', 'cell2_1', 'cell2_2', 'winner']
X_train shape: (8000, 9)
y_train shape: (8000,)
X_test shape: (2000, 9)
y_test shape: (2000,)


In [9]:
g_train = GraphHandler(paths=paths,
                        files={'data': dataset},
                        board_size=board_size,
                        data_array=dh.X_train,
                        symbols=['RED', 'BLUE','UP', 'DOWN', 'RIGHT','LEFT'],
                        hypervector_size=hypervector_size,
                        hypervector_bits=hypervector_bits,
                        verbose_level=2)

g_train.build_complete_graphs()

Setting number of nodes to 13
Adding nodes
Adding edges and properties
Encoding graphs


In [10]:
print(g_train.n_nodes)
print(g_train.symbols)
print(g_train.max_index)
print(g_train.right_index)
print(g_train.left_index)
print(g_train.down_index)
print(g_train.up_index)
g_train.graphs.print_graph(0)

9
['RED', 'BLUE', 'UP', 'DOWN', 'RIGHT', 'LEFT', 'ROW_0', 'COL_0', 'ROW_1', 'COL_1', 'ROW_2', 'COL_2']
8
9
10
11
12
* * * * * * ROW_0 COL_0 * * * * * * * * * * ROW_0 * * COL_1 * * * * * * * * ROW_0 * * * * COL_2 * * * * * * * COL_0 ROW_1 * * * * BLUE * * * * * * ROW_1 COL_1 * * * BLUE * * * * * * ROW_1 * * COL_2 * * * * * * * COL_0 * * ROW_2 * RED * * * * * * * * COL_1 ROW_2 * RED * * * * * * * * * ROW_2 COL_2 * * * * RIGHT * * * * * * * * * * * * LEFT * * * * * * * * * DOWN * * * * * * * * * * UP * * * * * * * * * 


In [11]:
g_test = GraphHandler(data_array=dh.X_test, init_with=g_train.graphs, verbose_level=2)
g_test.build_complete_graphs()

Setting number of nodes to 13
Adding nodes
Adding edges and properties
Encoding graphs


In [12]:
print(g_test.n_nodes)
print(g_test.symbols)
print(g_test.max_index)
print(g_test.right_index)
print(g_test.left_index)
print(g_test.down_index)
print(g_test.up_index)
g_test.graphs.print_graph(0)

9
['RED', 'BLUE', 'UP', 'DOWN', 'RIGHT', 'LEFT', 'ROW_0', 'COL_0', 'ROW_1', 'COL_1', 'ROW_2', 'COL_2']
8
9
10
11
12
* BLUE * * * * ROW_0 COL_0 * * * * * BLUE * * * * ROW_0 * * COL_1 * * RED * * * * * ROW_0 * * * * COL_2 RED * * * * * * COL_0 ROW_1 * * * * * * * * * * * ROW_1 COL_1 * * * BLUE * * * * * * ROW_1 * * COL_2 * * * * * * * COL_0 * * ROW_2 * RED * * * * * * * * COL_1 ROW_2 * * * * * * * * * * * ROW_2 COL_2 * * * * RIGHT * * * * * * * * * * * * LEFT * * * * * * * * * DOWN * * * * * * * * * * UP * * * * * * * * * 


In [13]:
# Save and load graphs in order to verify that they will work in other scripts/notebooks
dh.save_graphs(g_train.graphs, g_test.graphs)
graphs_train, graphs_test, X_train, y_train, X_test, y_test = dh.load_graphs()

In [14]:
# Verify sizes and shapes
print(f"Number of Graphs: {graphs_train.number_of_graphs}")
print(f"Number of Nodes: {graphs_train.number_of_nodes}")
print(f"X Shape: {graphs_train.X.shape}")
print(f"Edge Array Shape: {graphs_train.edge.shape}")

# Check indices
print(f"Node Index Array: {graphs_train.node_index}")
print(f"Edge Index Array: {graphs_train.edge_index}")


Number of Graphs: 8000
Number of Nodes: 104000
X Shape: (104000, 4)
Edge Array Shape: (480000, 2)
Node Index Array: [     0     13     26 ... 103961 103974 103987]
Edge Index Array: [     0      5     10 ... 479991 479994 479997]


In [15]:
number_of_nodes = board_size*board_size
number_of_clauses = 300
T = number_of_clauses*0.875
depth = 3#7
s = 2.0
message_size = 32
message_bits = 2
max_included_literals = 32
number_of_states = 200
epochs = 15

In [16]:
tm = MultiClassGraphTsetlinMachine(
    number_of_clauses,
    T,
    s,
    depth=depth, 
    message_size = message_size,
    message_bits = message_bits,
    number_of_state_bits = 8,
    boost_true_positive_feedback = 1,
    grid=(16*13,1,1),
    block=(128,1,1)
)

Initialization of sparse structure.


In [None]:
for i in range(epochs):
    tm.fit(graphs_train, y_train, epochs=1, incremental=True)
  
    result_test = 100 * (tm.predict(graphs_test) == y_test).mean()
    result_train = 100 * (tm.predict(graphs_train) == y_train).mean()
    
    print("#%d Testing Accuracy: %.2f%% Training Accuracy: %.2f%%" % (i+1, result_test, result_train))
  

#1 Testing Accuracy: 61.75% Training Accuracy: 63.51%
#2 Testing Accuracy: 65.40% Training Accuracy: 65.75%


In [18]:
tm.score(graphs_test)

array([[ 100,  -20],
       [ -69,  137],
       [ 225, -252],
       ...,
       [   9,   97],
       [-107,  253],
       [-318,  333]], dtype=int32)

In [19]:
predictions = tm.predict(graphs_test)
predictions.mean()

0.6215

In [20]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.73      0.55      0.63      1012
           1       0.63      0.80      0.71       988

    accuracy                           0.67      2000
   macro avg       0.68      0.67      0.67      2000
weighted avg       0.68      0.67      0.67      2000



In [21]:
confusion_matrix(y_test, predictions)

array([[556, 456],
       [201, 787]])

In [22]:
graphs_train.hypervectors

array([[39, 30],
       [28, 59],
       [60, 34],
       [40, 35],
       [46, 42],
       [29, 18],
       [61,  8],
       [40, 33],
       [ 0, 34],
       [39, 14],
       [59, 50],
       [33, 60]], dtype=uint32)

In [23]:
weights = tm.get_state()[1].reshape(2, -1)
for i in range(tm.number_of_clauses):
        print("Clause #%d W:(%d %d)" % (i, weights[0,i], weights[1,i]), end=' ')
        l = []
        for k in range(hypervector_size * 2):
            if tm.ta_action(0, i, k):
                if k < hypervector_size:
                    l.append("x%d" % (k))
                else:
                    l.append("NOT x%d" % (k - hypervector_size))
        print(" AND ".join(l))

Clause #0 W:(-36 11) x14 AND x39 AND x50 AND x59 AND NOT x0 AND NOT x1 AND NOT x2 AND NOT x3 AND NOT x4 AND NOT x5 AND NOT x6 AND NOT x7 AND NOT x8 AND NOT x9 AND NOT x10 AND NOT x11 AND NOT x12 AND NOT x13 AND NOT x15 AND NOT x16 AND NOT x17 AND NOT x18 AND NOT x19 AND NOT x20 AND NOT x21 AND NOT x22 AND NOT x23 AND NOT x24 AND NOT x25 AND NOT x26 AND NOT x27 AND NOT x29 AND NOT x30 AND NOT x31 AND NOT x32 AND NOT x33 AND NOT x34 AND NOT x35 AND NOT x36 AND NOT x37 AND NOT x38 AND NOT x40 AND NOT x41 AND NOT x42 AND NOT x43 AND NOT x44 AND NOT x45 AND NOT x46 AND NOT x47 AND NOT x48 AND NOT x49 AND NOT x51 AND NOT x52 AND NOT x53 AND NOT x54 AND NOT x55 AND NOT x56 AND NOT x57 AND NOT x58 AND NOT x60 AND NOT x61 AND NOT x62 AND NOT x63
Clause #1 W:(-29 4) x33 AND x50 AND x59 AND x60 AND NOT x0 AND NOT x1 AND NOT x2 AND NOT x3 AND NOT x4 AND NOT x5 AND NOT x6 AND NOT x7 AND NOT x8 AND NOT x9 AND NOT x10 AND NOT x11 AND NOT x12 AND NOT x13 AND NOT x14 AND NOT x15 AND NOT x16 AND NOT x17