In [None]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import random
from functools import *

# Frequency Matrix

In [None]:
def flatten_caps(word):
    ret = ''
    for c in word:
        if c.isupper():
            ret += 'A'
        ret += c.lower()
    return ret

In [None]:
def get_freq_matrix(dictionary, v=False):
    LETTERS = set()
    for word in dictionary:
        LETTERS = LETTERS.union(set(word))
    LETTERS = sorted(list(LETTERS))
    LETTERS_INDEX = dict([el[::-1] for el in enumerate(LETTERS)])
    
    num_letters = len(LETTERS)
    letter_frequency = np.zeros(num_letters)
    transition_frequency = np.zeros((num_letters, num_letters))

    for word in dictionary:
        for i in range(len(word) - 1):
            letter_frequency[LETTERS_INDEX[word[i]]] += 1
            transition_frequency[LETTERS_INDEX[word[i]]][LETTERS_INDEX[word[i + 1]]] += 1
            
    # print(sum(transition_frequency))
    transition_frequency_normalized = np.array([row / sum(row) if sum(row) > 0 else row for row in transition_frequency])
    
    if v:
        plt.bar([el for el in LETTERS], letter_frequency);
        plt.matshow(transition_frequency_normalized);
        plt.axis('off')
        for i in range(len(LETTERS)):
            plt.text(i - 0.5, -1, LETTERS[i])
            plt.text(-1.5, i + 0.5, LETTERS[i])
            
    return transition_frequency_normalized, LETTERS

# Cost Function

In [None]:
@lru_cache(maxsize = None)
def get_distance_matrix(l, w):
    """
    given a length and width of grid, find the minimum distance between each two entries
    
    returns a matrix
    """
    
    ret = np.zeros((l * w, l * w))
    for i in range(l * w):
        for j in range(l * w):
            x1, y1 = divmod(i, w)
            x2, y2 = divmod(j, w)
            ret[i][j] = abs(x1 - x2) + abs(y1 - y2)
            
    return ret

In [None]:
def flatten(board):
    ret = []
    for row in board:
        ret.extend(row)
    return ret

In [None]:
def get_perm_matrix(perm):
    ret = np.zeros((len(perm), len(perm)))
    for i in range(len(perm)):
        ret[i][perm[i]] += 1
    return ret

In [None]:
def get_conjugate_distance_matrix(board):
    """
    board is a grid of letters
    use `get_distance_matrix` to find the matrix of distances between letter pairs
    `get_distance_matrix` should be cached, so this should be faster
    
    in other words, return ret = `get_distance_matrix` with permuted rows and cols
    so that ret[i][j] is the distance between the ith and jth letters instead of the 
    [i // w][i % w] [j // w][j % w] positions
    
    This should be done by conjugating `get_distance_matrix` with the permutation
    matrix for the permutation flatten(board) with respect to sorted(flatten(board))

    """
    
    dist_mat = get_distance_matrix(len(board), len(board[0]))
    flattened_board = flatten(board)
    sorted_flattened_board = sorted(flattened_board)
    perm_matrix = get_perm_matrix([flattened_board.index(c) for c in sorted_flattened_board])
    
    return perm_matrix @ dist_mat @ np.linalg.inv(perm_matrix)

In [None]:
board = [
    ['b', 'f', 'd'],
    ['c', 'e', 'a']
]
print(get_distance_matrix(2, 3))
print()
print(get_conjugate_distance_matrix(board))

In [None]:
def cost(board, freq_matrix):
    """
    board should be rectangle
    freq_matrix gives frequency a matrix populated by a dictionary
    they should be the same dimension
    
    """
    return sum(sum(freq_matrix * get_conjugate_distance_matrix(board)))

# Optimization

In [None]:
dictionary = []
with open('words.txt') as f:
    for word in f.readlines():
        dictionary.append(flatten_caps(word.strip()))

In [None]:
freq_matrix, LETTERS = get_freq_matrix(dictionary)

In [None]:
print("".join(LETTERS))

# Niave tree search

In [None]:
def random_swaps(permutation, num_swaps):
    for i in range(num_swaps):
        r1 = random.randint(0, 25)
        r2 = random.randint(0, 25)
        while r1 == r2:
            r2 = random.randint(0, 25)
        permutation[r1], permutation[r2] = permutation[r2], permutation[r1]
    return permutation

In [None]:
def optimal_num_children(num_swaps):
    total_number_of_children = 350 ** num_swaps
    # percent of population for half confidence of seeing all children
    # https://www.desmos.com/calculator/d1svxeq9uj
    perc = np.math.log(0.7 * (total_number_of_children + 2), np.math.e) + 0.7
    return int(perc * total_number_of_children)

In [None]:
best_permutation = None
new_best_permutation = list(np.random.permutation(26).data)
best_cost = cost(new_best_permutation)

for num_swaps in [10, 10, 9, 9, 8, 8, 7, 6, 5, 4, 4, 4, 4] + ([3] * 10) + ([2] * 20) + ([1] * 10):
    best_permutation = new_best_permutation.copy()
    for _ in range(100):
        new_permutation = random_swaps(best_permutation.copy(), num_swaps)
        c = cost(new_permutation)
        if c < best_cost:
            best_cost = c
            new_best_permutation = new_permutation.copy()
    print(num_swaps, '  best:', best_cost)

In [None]:
print(new_best_permutation)
print_board(new_best_permutation)
cost(new_best_permutation)