In [1]:
import os
import pandas as pd
from clickhouse_driver import Client
import glob
from tqdm import tqdm
import re

In [2]:
CLICKHOUSE_HOST = 'localhost'
CLICKHOUSE_PORT = 9000
CLICKHOUSE_USER = 'admin'
CLICKHOUSE_PASSWORD = 'Helloworld'
CLICKHOUSE_DATABASE = 'tictactoe'

DATA_DIR = './data/data/data_5x5'

BOARD_SIZE = 5

BATCH_SIZE = 2000000

# Mapping from CSV values (0,1,2) to board symbols
VALUE_MAP = {0: '-', 1: 'X', 2: 'O'}

# Pattern to validate converted board values
CELL_VALUE_PATTERN = re.compile(r'^[XO\-]$')


In [3]:
try:
    client = Client(
        host=CLICKHOUSE_HOST,
        port=CLICKHOUSE_PORT,
        user=CLICKHOUSE_USER,
        password=CLICKHOUSE_PASSWORD
    )
    print("Connected to ClickHouse successfully!")
    # print(f"Server version: {client.connection.server_info.version_major}.{client.connection.server_info.version_minor}")
except Exception as e:
    print(f"Error connecting to ClickHouse: {e}")
    raise

Connected to ClickHouse successfully!


In [4]:
# Create database if it doesn't exist
client.execute(f"CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_DATABASE}")
print(f"Database '{CLICKHOUSE_DATABASE}' created or already exists.")


Database 'tictactoe' created or already exists.


In [4]:
def check_database():
    print("="*80)
    print(f"DATABASE: {CLICKHOUSE_DATABASE}")
    print("="*80)
    
    databases = client.execute("SHOW DATABASES")
    db_exists = any(db[0] == CLICKHOUSE_DATABASE for db in databases)
    
    if not db_exists:
        print(f"Database '{CLICKHOUSE_DATABASE}' does not exist yet.")
        print("Run the database creation cell first.")
        return
    
    # Get all tables
    tables = client.execute(f"SHOW TABLES FROM {CLICKHOUSE_DATABASE}")
    
    if not tables:
        print(f"Database exists but contains no tables yet.")
        return
    
    print(f"\nFound {len(tables)} table(s):\n")
    print(f"{'Table Name':<25} {'Layer':>8} {'Row Count':>15} {'Unique Keys':>15}")
    print("-" * 80)
    
    total_rows = 0
    ttt_tables = []
    other_tables = []
    
    for table in tables:
        table_name = table[0]
        
        # Get row count
        count_query = f"SELECT COUNT(*) FROM {CLICKHOUSE_DATABASE}.{table_name}"
        row_count = client.execute(count_query)[0][0]
        total_rows += row_count
        
        if table_name.startswith('ttt_'):
            # Extract layer number
            layer_match = re.search(r'l(\d+)', table_name)
            layer_num = int(layer_match.group(1)) if layer_match else 0
            
            # Get unique canonical forms count
            unique_query = f"SELECT COUNT(DISTINCT canonical_form) FROM {CLICKHOUSE_DATABASE}.{table_name}"
            unique_count = client.execute(unique_query)[0][0]
            
            ttt_tables.append((table_name, layer_num, row_count, unique_count))
        else:
            other_tables.append((table_name, row_count))
    
    ttt_tables.sort(key=lambda x: x[1])
    
    for table_name, layer_num, row_count, unique_count in ttt_tables:
        print(f"{table_name:<25} {layer_num:>8} {row_count:>15,} {unique_count:>15,}")
    
    for table_name, row_count in other_tables:
        print(f"{table_name:<25} {'N/A':>8} {row_count:>15,} {'N/A':>15}")
    
    print("-" * 80)
    print(f"{'TOTAL ROWS':<25} {total_rows:>32,}")
    print("="*80)

# Run the check
check_database()


DATABASE: tictactoe

Found 4 table(s):

Table Name                   Layer       Row Count     Unique Keys
--------------------------------------------------------------------------------
ttt_5_l9                         9           7,428           7,428
ttt_5_l10                       10          23,462          23,462
ttt_5_l11                       11         349,302         349,302
ttt_5_l12                       12         200,000         200,000
--------------------------------------------------------------------------------
TOTAL ROWS                                         580,192


In [5]:
def clear_database(mode='all', layer_numbers=None):
    print("=" * 80)
    print("CLEAR DATABASE WARNING")
    print("=" * 80)

    # Get list of databases
    databases = client.execute("SHOW DATABASES")

    db_exists = any(db[0] == CLICKHOUSE_DATABASE for db in databases)

    if not db_exists:
        print(f"Database '{CLICKHOUSE_DATABASE}' does not exist.")
        return

    tables = client.execute(f"SHOW TABLES FROM {CLICKHOUSE_DATABASE}")

    if not tables:
        print(f"Database '{CLICKHOUSE_DATABASE}' is already empty.")
        return

    if mode == 'database':
        print(f"\nDropping entire database: {CLICKHOUSE_DATABASE}")
        print(f"This will delete {len(tables)} table(s).")
        print("Uncomment the DROP DATABASE line in the code to execute.")
        # client.execute(f"DROP DATABASE {CLICKHOUSE_DATABASE}")  # Uncomment to run

    elif mode == 'all':
        print(f"\nDropping all {len(tables)} table(s) from database: {CLICKHOUSE_DATABASE}\n")

        for table in tables:
            table_name = table[0]
            # print(f"Dropping table: {table_name}... skipped (safety mode)")
            client.execute(f"DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE}.{table_name}")  # Uncomment to run
            print(f"Dropping table: {table_name}")

        print("\nUncomment the DROP TABLE line in the code to execute.")

    elif mode == 'specific' and layer_numbers:
        print(f"\nDropping tables for layers: {layer_numbers}\n")

        for layer_num in layer_numbers:
            table_name = get_table_name(BOARD_SIZE, layer_num)
            table_exists = any(t[0] == table_name for t in tables)

            if table_exists:
                print(f"Dropping table: {table_name}... skipped (safety mode)")
                # client.execute(f"DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE}.{table_name}")  # Uncomment to run
            else:
                print(f"Table {table_name} does not exist - skipped")

        print("\nUncomment the DROP TABLE line in the code to execute.")

    else:
        print("Invalid mode or missing parameters.")
        print("Use mode='all', mode='database', or mode='specific' with layer_numbers=[9, 10, 11, ...]")

    print("=" * 80)

In [8]:
# ============================================================================
# CLEAR DATABASE OPTIONS - Choose one option below and uncomment it to run
# ============================================================================

# Option 1: Drop ALL tables (keeps database, removes all tables)
clear_database(mode='all')

# Option 2: Drop specific layer tables (e.g., layers 9, 10, 11)
# clear_database(mode='specific', layer_numbers=[9, 10, 11])

# Option 3: Drop the ENTIRE database (removes everything)
# clear_database(mode='database')

# Option 4: Check what's in the database first (safe to run)

Database 'tictactoe' is already empty.


In [9]:
def get_table_name(board_size, layer_num):
    """Generate table name like ttt_5_l9, ttt_5_l10, etc."""
    return f"ttt_{board_size}_l{layer_num}"

def create_layer_table(client, database, board_size, layer_num):
    """
    Create a table for a specific layer with canonical form as primary key
    and win_actor column
    """
    table_name = get_table_name(board_size, layer_num)
    
    # Using FixedString(1) for board cells (stores 'X', 'O', or '-')
    columns = []
    for row in range(1, board_size + 1):
        for col in range(1, board_size + 1):
            columns.append(f"i{row}{col} FixedString(1)")
    
    columns_str = ",\n    ".join(columns)
    
    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS {database}.{table_name} (
        canonical_form String,
        win_actor FixedString(1),
        {columns_str}
    ) ENGINE = MergeTree()
    PRIMARY KEY (canonical_form)
    ORDER BY (canonical_form)
    """
    
    client.execute(create_table_query)
    return table_name

print("Table creation functions defined.")


Table creation functions defined.


In [12]:
# Get all CSV files sorted by layer number
csv_files = sorted(
    glob.glob(os.path.join(DATA_DIR, 'tic_tac_toe_layer_*.csv')),
    key=lambda x: int(re.search(r'layer_(\d+)', x).group(1))
)

print(f"Found {len(csv_files)} CSV files to process:")
for csv_file in csv_files:
    file_size = os.path.getsize(csv_file) / (1024 ** 2)  # Convert to MB
    print(f"  - {os.path.basename(csv_file)} ({file_size:.2f} MB)")


Found 17 CSV files to process:
  - tic_tac_toe_layer_9.csv (0.36 MB)
  - tic_tac_toe_layer_10.csv (1.14 MB)
  - tic_tac_toe_layer_11.csv (16.99 MB)
  - tic_tac_toe_layer_12.csv (39.58 MB)
  - tic_tac_toe_layer_13.csv (257.17 MB)
  - tic_tac_toe_layer_14.csv (439.31 MB)
  - tic_tac_toe_layer_15.csv (1610.47 MB)
  - tic_tac_toe_layer_16.csv (1997.82 MB)
  - tic_tac_toe_layer_17.csv (4491.46 MB)
  - tic_tac_toe_layer_18.csv (3931.41 MB)
  - tic_tac_toe_layer_19.csv (5484.89 MB)
  - tic_tac_toe_layer_20.csv (3199.15 MB)
  - tic_tac_toe_layer_21.csv (2641.43 MB)
  - tic_tac_toe_layer_22.csv (914.60 MB)
  - tic_tac_toe_layer_23.csv (384.28 MB)
  - tic_tac_toe_layer_24.csv (59.05 MB)
  - tic_tac_toe_layer_25.csv (7.13 MB)


In [14]:
def convert_to_symbols(numeric_values):
    """Convert numeric values (0,1,2) to symbols ('-','X','O')"""
    return [VALUE_MAP[v] for v in numeric_values]

def generate_canonical_form(board_values):
    """Generate canonical form string from board values"""
    return ''.join(board_values)

def check_winner(board_values, board_size):
    """
    Check who won the game on the board
    Returns: 'X', 'O', or '-' (tie/ongoing)
    
    board_values: list of 'X', 'O', '-' symbols (flattened board)
    board_size: size of the board (e.g., 5 for 5x5)
    """
    # Convert flat list to 2D board for easier checking
    board = []
    for i in range(board_size):
        row = board_values[i * board_size:(i + 1) * board_size]
        board.append(row)
    
    # Check rows
    for row in board:
        if all(cell == 'X' for cell in row):
            return 'X'
        if all(cell == 'O' for cell in row):
            return 'O'
    
    # Check columns
    for col in range(board_size):
        column = [board[row][col] for row in range(board_size)]
        if all(cell == 'X' for cell in column):
            return 'X'
        if all(cell == 'O' for cell in column):
            return 'O'
    
    # Check main diagonal (top-left to bottom-right)
    main_diag = [board[i][i] for i in range(board_size)]
    if all(cell == 'X' for cell in main_diag):
        return 'X'
    if all(cell == 'O' for cell in main_diag):
        return 'O'
    
    # Check anti-diagonal (top-right to bottom-left)
    anti_diag = [board[i][board_size - 1 - i] for i in range(board_size)]
    if all(cell == 'X' for cell in anti_diag):
        return 'X'
    if all(cell == 'O' for cell in anti_diag):
        return 'O'
    
    # No winner yet (tie or ongoing game)
    return '-'

def validate_board_values(board_values):
    """Validate that all board values are 'X', 'O', or '-'"""
    invalid_values = []
    for idx, value in enumerate(board_values):
        if not CELL_VALUE_PATTERN.match(str(value)):
            invalid_values.append((idx, value))
    
    return len(invalid_values) == 0, invalid_values

def insert_batch_with_validation(client, database, table_name, data_batch):
    """Insert data batch with validation and winner detection"""
    if not data_batch:
        return 0, 0
    
    valid_rows = []
    invalid_count = 0
    
    for board_values in data_batch:
        # Validate using regex
        is_valid, invalid_vals = validate_board_values(board_values)
        
        if is_valid:
            # Generate canonical form
            canonical_form = generate_canonical_form(board_values)
            
            # Detect winner
            win_actor = check_winner(board_values, BOARD_SIZE)
            
            # Prepare row: [canonical_form, win_actor, i11, i12, ..., i55]
            row = [canonical_form, win_actor] + board_values
            valid_rows.append(row)
        else:
            invalid_count += 1
            # Optionally log invalid rows
            # print(f"Invalid row with values at positions: {invalid_vals}")
    
    # Insert valid rows
    if valid_rows:
        insert_query = f"INSERT INTO {database}.{table_name} VALUES"
        client.execute(insert_query, valid_rows)
    
    return len(valid_rows), invalid_count

print("Data validation, winner detection, and insertion functions defined.")


Data validation, winner detection, and insertion functions defined.


In [15]:
# Process each CSV file
total_rows_inserted = 0
total_rows_invalid = 0
layer_stats = []

for csv_file in csv_files:
    # Extract layer number from filename
    layer_match = re.search(r'layer_(\d+)', os.path.basename(csv_file))
    layer_num = int(layer_match.group(1))
    
    print(f"\n{'='*70}")
    print(f"Processing Layer {layer_num}")
    print(f"{'='*70}")
    
    # Create table for this layer
    table_name = create_layer_table(client, CLICKHOUSE_DATABASE, BOARD_SIZE, layer_num)
    print(f"✓ Table '{table_name}' created/verified")
    
    # Count total lines for progress bar
    with open(csv_file, 'r') as f:
        total_lines = sum(1 for _ in f)
    
    print(f"Total rows to process: {total_lines:,}")
    
    # Read and insert data in batches
    batch = []
    rows_read = 0
    layer_valid = 0
    layer_invalid = 0
    
    with open(csv_file, 'r') as f:
        with tqdm(total=total_lines, desc=f"Loading Layer {layer_num}") as pbar:
            for line in f:
                # Parse the line (comma-separated values)
                try:
                    # Read numeric values from CSV
                    numeric_values = [int(x.strip()) for x in line.strip().split(',')]
                    
                    if len(numeric_values) == BOARD_SIZE * BOARD_SIZE:  # Ensure correct number of values
                        # Convert numeric values (0,1,2) to symbols ('-','X','O')
                        symbol_values = convert_to_symbols(numeric_values)
                        batch.append(symbol_values)
                        rows_read += 1
                        
                        # Insert batch when it reaches BATCH_SIZE
                        if len(batch) >= BATCH_SIZE:
                            valid, invalid = insert_batch_with_validation(
                                client, CLICKHOUSE_DATABASE, table_name, batch
                            )
                            layer_valid += valid
                            layer_invalid += invalid
                            batch = []
                except (ValueError, KeyError) as e:
                    # Skip rows that can't be parsed or have invalid values
                    layer_invalid += 1
                
                pbar.update(1)
            
            # Insert remaining data
            if batch:
                valid, invalid = insert_batch_with_validation(
                    client, CLICKHOUSE_DATABASE, table_name, batch
                )
                layer_valid += valid
                layer_invalid += invalid
    
    total_rows_inserted += layer_valid
    total_rows_invalid += layer_invalid
    
    layer_stats.append({
        'layer': layer_num,
        'table': table_name,
        'valid': layer_valid,
        'invalid': layer_invalid,
        'total': rows_read
    })
    
    print(f"✓ Layer {layer_num} completed:")
    print(f"  - Valid rows inserted: {layer_valid:,}")
    print(f"  - Invalid rows skipped: {layer_invalid:,}")
    print(f"  - Total processed: {rows_read:,}")

print(f"\n{'='*70}")
print(f"SUMMARY")
print(f"{'='*70}")
print(f"Total valid rows inserted: {total_rows_inserted:,}")
print(f"Total invalid rows skipped: {total_rows_invalid:,}")
print(f"{'='*70}")



Processing Layer 9
✓ Table 'ttt_5_l9' created/verified
Total rows to process: 7,428


Loading Layer 9: 100%|█████████████████████████████████████████████████████████████████████████████████| 7428/7428 [00:00<00:00, 57669.24it/s]


✓ Layer 9 completed:
  - Valid rows inserted: 7,428
  - Invalid rows skipped: 0
  - Total processed: 7,428

Processing Layer 10
✓ Table 'ttt_5_l10' created/verified
Total rows to process: 23,462


Loading Layer 10: 100%|██████████████████████████████████████████████████████████████████████████████| 23462/23462 [00:00<00:00, 61635.28it/s]


✓ Layer 10 completed:
  - Valid rows inserted: 23,462
  - Invalid rows skipped: 0
  - Total processed: 23,462

Processing Layer 11
✓ Table 'ttt_5_l11' created/verified
Total rows to process: 349,302


Loading Layer 11: 100%|████████████████████████████████████████████████████████████████████████████| 349302/349302 [00:05<00:00, 58370.11it/s]


✓ Layer 11 completed:
  - Valid rows inserted: 349,302
  - Invalid rows skipped: 0
  - Total processed: 349,302

Processing Layer 12
✓ Table 'ttt_5_l12' created/verified
Total rows to process: 813,720


Loading Layer 12: 100%|████████████████████████████████████████████████████████████████████████████| 813720/813720 [00:13<00:00, 60100.80it/s]


✓ Layer 12 completed:
  - Valid rows inserted: 813,720
  - Invalid rows skipped: 0
  - Total processed: 813,720

Processing Layer 13
✓ Table 'ttt_5_l13' created/verified
Total rows to process: 5,287,422


Loading Layer 13: 100%|██████████████████████████████████████████████████████████████████████████| 5287422/5287422 [01:27<00:00, 60183.61it/s]


✓ Layer 13 completed:
  - Valid rows inserted: 5,287,422
  - Invalid rows skipped: 0
  - Total processed: 5,287,422

Processing Layer 14
✓ Table 'ttt_5_l14' created/verified
Total rows to process: 9,032,256


Loading Layer 14: 100%|██████████████████████████████████████████████████████████████████████████| 9032256/9032256 [02:30<00:00, 60203.33it/s]


✓ Layer 14 completed:
  - Valid rows inserted: 9,032,256
  - Invalid rows skipped: 0
  - Total processed: 9,032,256

Processing Layer 15
✓ Table 'ttt_5_l15' created/verified
Total rows to process: 33,111,744


Loading Layer 15: 100%|████████████████████████████████████████████████████████████████████████| 33111744/33111744 [09:11<00:00, 60060.92it/s]


✓ Layer 15 completed:
  - Valid rows inserted: 33,111,744
  - Invalid rows skipped: 0
  - Total processed: 33,111,744

Processing Layer 16
✓ Table 'ttt_5_l16' created/verified
Total rows to process: 41,075,840


Loading Layer 16: 100%|████████████████████████████████████████████████████████████████████████| 41075840/41075840 [11:26<00:00, 59826.14it/s]


✓ Layer 16 completed:
  - Valid rows inserted: 41,075,840
  - Invalid rows skipped: 0
  - Total processed: 41,075,840

Processing Layer 17
✓ Table 'ttt_5_l17' created/verified
Total rows to process: 92,345,838


Loading Layer 17: 100%|████████████████████████████████████████████████████████████████████████| 92345838/92345838 [25:40<00:00, 59942.83it/s]


✓ Layer 17 completed:
  - Valid rows inserted: 92,345,838
  - Invalid rows skipped: 0
  - Total processed: 92,345,838

Processing Layer 18
✓ Table 'ttt_5_l18' created/verified
Total rows to process: 80,831,050


Loading Layer 18: 100%|████████████████████████████████████████████████████████████████████████| 80831050/80831050 [22:54<00:00, 58813.35it/s]


✓ Layer 18 completed:
  - Valid rows inserted: 80,831,050
  - Invalid rows skipped: 0
  - Total processed: 80,831,050

Processing Layer 19
✓ Table 'ttt_5_l19' created/verified
Total rows to process: 112,771,060


Loading Layer 19: 100%|██████████████████████████████████████████████████████████████████████| 112771060/112771060 [32:59<00:00, 56971.04it/s]


✓ Layer 19 completed:
  - Valid rows inserted: 112,771,060
  - Invalid rows skipped: 0
  - Total processed: 112,771,060

Processing Layer 20
✓ Table 'ttt_5_l20' created/verified
Total rows to process: 65,775,534


Loading Layer 20: 100%|████████████████████████████████████████████████████████████████████████| 65775534/65775534 [23:25<00:00, 46805.61it/s]


✓ Layer 20 completed:
  - Valid rows inserted: 65,775,534
  - Invalid rows skipped: 0
  - Total processed: 65,775,534

Processing Layer 21
✓ Table 'ttt_5_l21' created/verified
Total rows to process: 54,308,678


Loading Layer 21: 100%|████████████████████████████████████████████████████████████████████████| 54308678/54308678 [19:07<00:00, 47308.21it/s]


✓ Layer 21 completed:
  - Valid rows inserted: 54,308,678
  - Invalid rows skipped: 0
  - Total processed: 54,308,678

Processing Layer 22
✓ Table 'ttt_5_l22' created/verified
Total rows to process: 18,804,464


Loading Layer 22: 100%|████████████████████████████████████████████████████████████████████████| 18804464/18804464 [06:44<00:00, 46503.07it/s]


✓ Layer 22 completed:
  - Valid rows inserted: 18,804,464
  - Invalid rows skipped: 0
  - Total processed: 18,804,464

Processing Layer 23
✓ Table 'ttt_5_l23' created/verified
Total rows to process: 7,900,866


Loading Layer 23: 100%|██████████████████████████████████████████████████████████████████████████| 7900866/7900866 [02:45<00:00, 47839.24it/s]


✓ Layer 23 completed:
  - Valid rows inserted: 7,900,866
  - Invalid rows skipped: 0
  - Total processed: 7,900,866

Processing Layer 24
✓ Table 'ttt_5_l24' created/verified
Total rows to process: 1,213,996


Loading Layer 24: 100%|██████████████████████████████████████████████████████████████████████████| 1213996/1213996 [00:25<00:00, 46850.12it/s]


✓ Layer 24 completed:
  - Valid rows inserted: 1,213,996
  - Invalid rows skipped: 0
  - Total processed: 1,213,996

Processing Layer 25
✓ Table 'ttt_5_l25' created/verified
Total rows to process: 146,629


Loading Layer 25: 100%|████████████████████████████████████████████████████████████████████████████| 146629/146629 [00:02<00:00, 50716.76it/s]

✓ Layer 25 completed:
  - Valid rows inserted: 146,629
  - Invalid rows skipped: 0
  - Total processed: 146,629

SUMMARY
Total valid rows inserted: 523,799,289
Total invalid rows skipped: 0





In [16]:
# Verify data insertion by querying each layer table
print("\n" + "="*70)
print("DATA VERIFICATION")
print("="*70)
print(f"{'Table Name':<20} {'Layer':>8} {'Row Count':>15} {'Status':>15}")
print("-" * 70)

verification_total = 0
for stat in layer_stats:
    table_name = stat['table']
    layer_num = stat['layer']
    
    # Query the actual count from the database
    count_query = f"SELECT COUNT(*) FROM {CLICKHOUSE_DATABASE}.{table_name}"
    actual_count = client.execute(count_query)[0][0]
    verification_total += actual_count
    
    # Check if count matches expected
    status = "✓ OK" if actual_count == stat['valid'] else "⚠ MISMATCH"
    
    print(f"{table_name:<20} {layer_num:>8} {actual_count:>15,} {status:>15}")

print("-" * 70)
print(f"{'TOTAL':<20} {verification_total:>24,}")
print("="*70)



DATA VERIFICATION
Table Name              Layer       Row Count          Status
----------------------------------------------------------------------
ttt_5_l9                    9           7,428            ✓ OK
ttt_5_l10                  10          23,462            ✓ OK
ttt_5_l11                  11         349,302            ✓ OK
ttt_5_l12                  12         813,720            ✓ OK
ttt_5_l13                  13       5,287,422            ✓ OK
ttt_5_l14                  14       9,032,256            ✓ OK
ttt_5_l15                  15      33,111,744            ✓ OK
ttt_5_l16                  16      41,075,840            ✓ OK
ttt_5_l17                  17      92,345,838            ✓ OK
ttt_5_l18                  18      80,831,050            ✓ OK
ttt_5_l19                  19     112,771,060            ✓ OK
ttt_5_l20                  20      65,775,534            ✓ OK
ttt_5_l21                  21      54,308,678            ✓ OK
ttt_5_l22                  22      18,804,

In [17]:
# Sample query: Get some sample data from first layer
if layer_stats:
    first_layer = layer_stats[0]
    sample_table = first_layer['table']
    sample_layer = first_layer['layer']
    
    print(f"\n{'='*70}")
    print(f"SAMPLE DATA from {sample_table} (Layer {sample_layer})")
    print("="*70)
    
    sample_data = client.execute(f"""
        SELECT * FROM {CLICKHOUSE_DATABASE}.{sample_table} 
        LIMIT 5
    """)
    
    print(f"\nShowing first 5 rows:")
    print("-" * 70)
    for idx, row in enumerate(sample_data, 1):
        canonical_form = row[0]
        board_values = row[1:]
        print(f"\nRow {idx}:")
        print(f"  Canonical Form: {canonical_form}")
        print(f"  Board State (5x5):")
        for i in range(0, 25, 5):
            print(f"    {' '.join(str(v) for v in board_values[i:i+5])}")
    
    print("\n" + "="*70)
    
    # Query to check uniqueness of canonical forms
    print("\nChecking canonical form uniqueness...")
    for stat in layer_stats[:3]:  # Check first 3 layers
        table_name = stat['table']
        total_count = client.execute(f"SELECT COUNT(*) FROM {CLICKHOUSE_DATABASE}.{table_name}")[0][0]
        unique_count = client.execute(f"SELECT COUNT(DISTINCT canonical_form) FROM {CLICKHOUSE_DATABASE}.{table_name}")[0][0]
        print(f"  {table_name}: {unique_count:,} unique / {total_count:,} total")
else:
    print("No data to sample.")



SAMPLE DATA from ttt_5_l9 (Layer 9)

Showing first 5 rows:
----------------------------------------------------------------------

Row 1:
  Canonical Form: ----------------OOOOXXXXX
  Board State (5x5):
    X - - - -
    - - - - -
    - - - - -
    - - O O O
    O X X X X

Row 2:
  Canonical Form: ---------------O-OOOXXXXX
  Board State (5x5):
    X - - - -
    - - - - -
    - - - - -
    - O - O O
    O X X X X

Row 3:
  Canonical Form: ---------------OO-OOXXXXX
  Board State (5x5):
    X - - - -
    - - - - -
    - - - - -
    - O O - O
    O X X X X

Row 4:
  Canonical Form: ---------------XXXXX-OOOO
  Board State (5x5):
    X - - - -
    - - - - -
    - - - - -
    - X X X X
    X - O O O

Row 5:
  Canonical Form: ---------------XXXXXO-OOO
  Board State (5x5):
    X - - - -
    - - - - -
    - - - - -
    - X X X X
    X O - O O


Checking canonical form uniqueness...
  ttt_5_l9: 7,428 unique / 7,428 total
  ttt_5_l10: 23,462 unique / 23,462 total
  ttt_5_l11: 349,302 unique / 349

In [18]:
# List all tables in the database
print("\n" + "="*70)
print("ALL TABLES IN DATABASE")
print("="*70)

tables = client.execute(f"SHOW TABLES FROM {CLICKHOUSE_DATABASE}")
print(f"\nFound {len(tables)} tables:")
for table in tables:
    table_name = table[0]
    if table_name.startswith('ttt_'):
        # Extract layer number from table name
        layer_match = re.search(r'l(\d+)', table_name)
        if layer_match:
            count = client.execute(f"SELECT COUNT(*) FROM {CLICKHOUSE_DATABASE}.{table_name}")[0][0]
            print(f"  - {table_name}: {count:,} rows")
    else:
        print(f"  - {table_name}")

print("="*70)



ALL TABLES IN DATABASE

Found 17 tables:
  - ttt_5_l10: 23,462 rows
  - ttt_5_l11: 349,302 rows
  - ttt_5_l12: 813,720 rows
  - ttt_5_l13: 5,287,422 rows
  - ttt_5_l14: 9,032,256 rows
  - ttt_5_l15: 33,111,744 rows
  - ttt_5_l16: 41,075,840 rows
  - ttt_5_l17: 92,345,838 rows
  - ttt_5_l18: 80,831,050 rows
  - ttt_5_l19: 112,771,060 rows
  - ttt_5_l20: 65,775,534 rows
  - ttt_5_l21: 54,308,678 rows
  - ttt_5_l22: 18,804,464 rows
  - ttt_5_l23: 7,900,866 rows
  - ttt_5_l24: 1,213,996 rows
  - ttt_5_l25: 146,629 rows
  - ttt_5_l9: 7,428 rows


In [19]:
# Close connection
client.disconnect()
print("\n" + "="*70)
print("Disconnected from ClickHouse.")
print("="*70)



Disconnected from ClickHouse.
