## Distance predictor with simple NN

A simple self-supervised setting to predict the number of moves it takes to reach final position

This is a starting place to check things like state space representation (does using state = [1, 2, 3, 1] work), and just generally how well NNs can approximate permutation problems.

### Basic outline:

- Generate a set of `n'` moves by uniformly sampling from available actions, with `n'` randomly sampled from some probability distribution
- Using greedy_reduce to simplify those moves to get `n` number of moves, $n \leq n'$
- Apply these `n` moves on a puzzle to reach `start_state`
- Batch produce pairs of `x = start_state, y = n`
- A network $\mathcal{F}$ takes `start_state` as input, and the target output is `n'`

### Some preliminary details

- Loss function: mean square loss
- Neural network weights: $|s| \times 128$, $128 \times 128$, $128 \times 1$
- For 2x2 puzzles, we only need up to 10 - 14 moves
- State-space representation should be normalized?

### Some hypotheses

- Expecting better performance on low `n` over high `n`
- Expecting weird things to happen at `n` > 10.


In [86]:
import json
from typing import Dict, List
from collections import OrderedDict

import numpy as np
import torch

from tqdm import tqdm

%pprint

Pretty printing has been turned ON


In [2]:
from src.mechanism.permute import reverse_perm, permute_with_swap, perm_to_swap
from src.mechanism.utils import get_inverse_move
from src.mechanism.reduce import iterate_reduce_sequence

### Load a puzzle 
Since each puzzle is trained separately 

In [97]:
def load_puzzle_moves(
    puzzle_name: str, convert_to_swaps=True
) -> (Dict[str, List[int]], int):
    """Retrieves and returns the moves and final position of the puzzle"""
    # load the moves:
    with open(f"puzzles/{puzzle_name}/moves.json") as f:
        moves = json.load(f)

    num_states = len(list(moves.values())[0])
    # add reversed moves
    reversed_moves = {}
    for move_name, perm in moves.items():
        reversed_perm = reverse_perm(perm)
        if reversed_perm == perm:
            continue
        reversed_moves[f"-{move_name}"] = reversed_perm

    moves.update(reversed_moves)

    if convert_to_swaps:
        for move_name, perm in moves.items():
            moves[move_name] = perm_to_swap(perm)

    # # get final position (from the first puzzle), note that the actual state of this position doesn't really matter
    # # we just need to get the structure of the puzzle
    # df = pd.read_csv(f'puzzles/{puzzle_name}/puzzles.csv')
    # state = df.iloc[0].to_numpy()[3]

    return moves, num_states


puzzle_name = "cube_2x2x2"
move_dict, num_states = load_puzzle_moves(puzzle_name)

move_names = np.array(list(move_dict.keys()))

final_state = list(range(num_states))

print(f"Loaded {puzzle_name} with {len(move_names)} moves and {num_states} states")

Loaded cube_2x2x2 with 12 moves and 24 states


### Data generation

The self-supervised part: Generate a set of `n` moves by uniformly sampling from available actions, with `n` randomly sampled from some probability distribution

In [98]:
def sample_moves(move_names: List[str], n: int) -> List[int]:
    return np.random.choice(move_names, n)


def generate_state_from_moves(move_names, move_dict, state, inverse=False):
    for move_name in move_names:
        if inverse:
            move_name = get_inverse_move(move_name)
        move = move_dict[move_name]
        state = permute_with_swap(state, move)

    return state


def normalize_state(state):
    if type(state) == list:
        return [s / len(state) for s in state]
    return state / len(state)


path = list(sample_moves(move_names, 15))

print(path)

path = iterate_reduce_sequence(path, puzzle_name)
n = len(path)
print(path)
print(n)

state = generate_state_from_moves(path, move_dict, final_state)

normalize_state(torch.tensor(state))

['d1', 'f0', 'r0', 'r1', '-d0', '-d0', '-r0', '-d0', 'r0', 'f1', '-r1', '-f1', 'f0', 'r1', 'r0']
['d1', 'f0', 'r0', 'r1', '-d0', '-d0', '-r0', '-d0', 'r0', 'f1', '-r1', '-f1', 'f0', 'r0', 'r1']
15


tensor([0.2917, 0.1250, 0.5833, 0.0417, 0.9583, 0.3750, 0.6250, 0.7083, 0.5000,
        0.2083, 0.0833, 0.0000, 0.3333, 0.8750, 0.6667, 0.2500, 0.4167, 0.4583,
        0.8333, 0.9167, 0.7500, 0.1667, 0.7917, 0.5417])

In [99]:
class Sampler:
    def __init__(self) -> None:
        pass

    def sample(self) -> int:
        pass


class Uniform_sampler(Sampler):
    def __init__(self, low, high) -> None:
        super().__init__()
        self.low = low
        self.high = high

    def sample(self) -> int:
        return np.random.randint(self.low, self.high)

In [100]:
def generate_single_sample(sampler):
    # TODO: shouldn't use variables from outside scope like this
    n = sampler.sample()
    moves = list(sample_moves(move_names, n))
    moves = iterate_reduce_sequence(moves, puzzle_name)
    x = generate_state_from_moves(moves, move_dict, final_state)
    x = normalize_state(x)
    y = len(moves)

    return torch.tensor(x), y

### create a batch of data

In [101]:
def generate_batch(num_samples, sampler):
    X = torch.empty(num_samples, num_states, dtype=float)
    Y = torch.empty(num_samples, dtype=float)
    for i in tqdm(range(num_samples)):
        x, y = generate_single_sample(sampler)
        X[i, :] = x
        Y[i] = y
    return X, Y


sampler = Uniform_sampler(0, 10)
X, Y = generate_batch(1000, sampler)

100%|██████████| 1000/1000 [00:00<00:00, 15105.25it/s]
