In [5]:
from GraphTsetlinMachine.graphs import Graphs
import numpy as np
from scipy.sparse import csr_matrix
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
from time import time
import argparse
import random
import csv

In [3]:
def default_args(**kwargs):
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", default=10, type=int)
    parser.add_argument("--number-of-clauses", default=100, type=int)
    parser.add_argument("--T", default=100, type=int)
    parser.add_argument("--s", default=1.0, type=float)
    parser.add_argument("--depth", default=2, type=int)
    parser.add_argument("--hypervector-size", default=32, type=int)
    parser.add_argument("--hypervector-bits", default=2, type=int)
    parser.add_argument("--message-size", default=256, type=int)
    parser.add_argument("--message-bits", default=2, type=int)
    parser.add_argument('--double-hashing', dest='double_hashing', default=False, action='store_true')
    parser.add_argument("--noise", default=0.01, type=float)
    parser.add_argument("--number-of-examples", default=10000, type=int)
    parser.add_argument("--max-included-literals", default=4, type=int)

    args, unknown = parser.parse_known_args()
    for key, value in kwargs.items():
        if key in args.__dict__:
            setattr(args, key, value)
    return args

args = default_args()

In [11]:
print("Loading data")
input_file = 'hex_games_1_000_000_size_7.csv'

# Read the entire CSV file into a list of rows
with open(input_file, mode='r', newline='') as infile:
    reader = csv.reader(infile)
    data = list(reader)


delete_index = int(0.995 * len(data))
data = data[delete_index:]
# Split the data into training (80%) and testing (20%) sets
split_index = int(0.02 * len(data))
train_data = data[:split_index]
test_data = data[split_index:]

# Separate X_data and Y_data for training set
X_train = np.array([row[:-1] for row in train_data], dtype=int)
Y_train = np.array([row[-1:] for row in train_data], dtype=int)

# Separate X_data and Y_data for testing set
X_test = np.array([row[:-1] for row in test_data], dtype=int)
Y_test = np.array([row[-1:] for row in test_data], dtype=int)

X_train = X_train.reshape(-1, 7, 7)
X_test = X_test.reshape(-1, 7, 7)

# Print the shapes of the arrays
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)


Loading data
X_train shape: (100, 7, 7)
Y_train shape: (100, 1)
X_test shape: (4900, 7, 7)
Y_test shape: (4900, 1)


In [14]:
print("Creating training data")

# Create train data

graphs_train = Graphs(
    args.number_of_examples,
    symbols=['A', 'B', 'O'],
    hypervector_size=args.hypervector_size,
    hypervector_bits=args.hypervector_bits,
)

patch_size = 1
dim = 7 - patch_size + 1

number_of_nodes = dim * dim

print(patch_size, dim, number_of_nodes)

for graph_id in range(X_train.shape[0]):
    graphs_train.set_number_of_graph_nodes(graph_id, number_of_nodes)
    
graphs_train.prepare_node_configuration()
print("Finished preparing node configuration")

map_size = 7 # 7 x 7
data_size = 10000
directions = [
    (-1, 0), (1, 0),  # Left and Right
    (0, -1), (0, 1),  # Up and Down
    (-1, 1), (1, -1)  # Upper-left and Lower-right for staggered rows
]

for graph_id in range(X_train.shape[0]):
    for x in range(map_size):
        for y in range(map_size):
            node_id = y * map_size + x
            
            
            for dx, dy in directions:
                neighbor_x = x + dx
                neighbor_y = y + dy

            # Check if neighbor exists within bounds
            if 0 <= neighbor_x < map_size and 0 <= neighbor_y < map_size:
                number_of_outgoing_edges += 1

            # Use the node_counter to create a unique label for each node
            graphs_train.add_graph_node(graph_id, node_id, number_of_outgoing_edges)


print("All nodes are acounted for")
graphs_train.prepare_edge_configuration()
edge_type = "Plain"

for graph_id in range(X_train.shape[0]):
    for x in range(map_size):
        for y in range(map_size):
            node_id = y * map_size + x
            
            
            for dx, dy in directions:
                neighbor_x = x + dx
                neighbor_y = y + dy

            # Check if neighbor exists within bounds
            if 0 <= neighbor_x < map_size and 0 <= neighbor_y < map_size:
                neighbor_id = neighbor_y * map_size + neighbor_x
                graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id, edge_type)


Creating training data
1 7 49
Finished preparing node configuration
All nodes are acounted for
