In [1]:
import numpy as np
from numpy import array
import tensorflow as tf
from itertools import combinations

In [2]:
def get_inputs_tensor_multiplied_by_transpose_permutations(group_dims_stack, group_size):
    # We have the masked vectorised input rows as a N+1 dimensional tensor
    # Now multiply by N - 1 tensors whose axes have been permuted (on all but 0th axis) 
    cross_multiplied_group_dims_stack = group_dims_stack
    dims_list = list(range(1, group_size + 1))
    perm_list = list(range(0, group_size))
    L = len(dims_list)
    for dim in dims_list[:-1]: #permutations(range(1,current_N + 1)):
        perm = [0] + [((i + dim + L - 1) % L) + 1 for i in dims_list]
#         perm = [0 if i == -1 else perm[i] for i in range(-1, len(perm))]
        group_dims_stack_perm = tf.transpose(group_dims_stack, perm=perm)
        #TODO: Test broadcasting always works when using same number of input rows as unique elements
        cross_multiplied_group_dims_stack = tf.multiply(cross_multiplied_group_dims_stack, group_dims_stack_perm)
    return cross_multiplied_group_dims_stack


In [3]:
from resources.v2.test_resources import expected_occluded_output_3d, input_occluded_output_3d

def get_inputs_filtered_by_possible_combinations(group_dims_stack,
                                                 num_remaining_els, 
                                                 group_size):
    """
    group_dims_stack: the 
    current_els_mask: python binary list with allowed elements as 1.
    """
    # possible_els_idxs -> [0,3,5,10] = 0th, 3rd etc els allowed.
    possible_combinations_idxs = tf.py_func(lambda num_els: 
                                   np.array(list(combinations(range(0,num_els), group_size)), dtype=np.int32),
                                   [num_remaining_els], tf.int32)
    
    mask_shape = tf.map_fn(lambda x: num_remaining_els, tf.Variable(list(range(group_size))))

    values = tf.py_func(lambda perms: np.ones(len(perms), dtype=np.int32),
                        [possible_combinations_idxs], tf.int32)
    
    allowed_combs = tf.sparse_to_dense(sparse_indices=possible_combinations_idxs, 
                                       output_shape=mask_shape,
                                       sparse_values=values,)

    return tf.multiply(allowed_combs, group_dims_stack)
 
    
def _test_get_diagonal_and_upper_zeroed_tensor(expected, group_dims_stack, current_els_mask, group_size):
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        tensor = get_inputs_filtered_by_possible_combinations(group_dims_stack, current_els_mask, group_size)
        result = session.run(tensor)
#     print('Expected:')
#     print(str(np.array(expected)))
#     print('Output:')
#     print(str(result))
    assert np.array_equal(np.array(expected),np.array(result))

_test_get_diagonal_and_upper_zeroed_tensor(expected_occluded_output_3d, input_occluded_output_3d, 5, 3)

In [4]:
def get_sparse_mask_matrix(current_mask):
    def create_indices(mask):
        positive_el_number = 0
        indices = []
        for i, v in enumerate(mask):
            if v == 1:
                indices.append([i,positive_el_number])
                positive_el_number += 1
        return np.array(indices, dtype=np.int32)
    
    indices = tf.py_func(create_indices, [current_mask], tf.int32)
    number_of_remaining_elements = tf.cast(tf.divide(tf.size(indices),2), tf.int32)
    values = tf.py_func(lambda _indices: np.ones(len(_indices), dtype=np.int32),
                        [indices], tf.int32)
    shape = tf.stack([tf.size(current_mask),number_of_remaining_elements])
    mask_matrix = tf.sparse_to_dense(sparse_indices=indices, sparse_values=values, output_shape=shape)
    return mask_matrix


def _test_get_sparse_mask_matrix(curr_mask, expected_mask_matrix):
    mask = get_sparse_mask_matrix(curr_mask)
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        result = session.run(mask)
    assert np.array_equal(result, expected_mask_matrix)
#     return num_remaining

test_current_mask = tf.constant([1,0,1,0,1], dtype=tf.int32)
expected_mask_matrix = np.array([[1,0,0],
                                 [0,0,0],
                                 [0,1,0],
                                 [0,0,0],
                                 [0,0,1]])

_test_get_sparse_mask_matrix(test_current_mask, expected_mask_matrix)

In [5]:
def reduce_input_columns_with_current_mask(input_rows, current_mask):
    """
    First, construct the sparse matrix containing 
    """
    inputs_mask_matrix = get_sparse_mask_matrix(current_mask)
    return tf.matmul(input_rows, inputs_mask_matrix)

def _test_reduce_input_columns_with_current_mask(test_input_rows, test_current_mask, expected_reduced_inputs):
    reduced_inputs = reduce_input_columns_with_current_mask(test_input_rows, test_current_mask)

    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        result = session.run(reduced_inputs)
    assert np.array_equal(result, expected_reduced_inputs)
#     return result

test_inputs = tf.constant([[1,2,1,2,1],
                           [1,2,0,2,1]])
test_current_mask = tf.constant([1,0,1,0,1], dtype=tf.int32)
expected_reduced_inputs = np.array([[1,1,1],
                                    [1,0,1]])
_test_reduce_input_columns_with_current_mask(test_inputs, test_current_mask, expected_reduced_inputs)

In [6]:
from resources.v2.test_resources import gc_2D_3El, input_group_count_t_2D_3El, expected_next_mask_from_2D_3El

def get_next_element_mask(group_counts_t, gc):
    """
    
    TODO: instead, get the actual groups and do with regular python processing since it will be simpler
    to find all subsets and create new mask based on the possible new sets
    
    TODO: rename to generate_next_mask or something
    TODO: remove this and create get_next_mask_and_groups.input_collections_reduced with previous 
    next_frequent_bin_filter i.e. would be better to create next possible from combinations of current groups
    e.g. if A,B,C and B,C,D are groups, but no size 3 group contains A and D, A,B,C,D should not be a possiblity
    """

    group_counts_filter = tf.cast(group_counts_t >= gc["min_support"], tf.int32)
    next_el_occurances = tf.multiply(group_counts_filter, group_counts_t)

    
    ONE = tf.constant(1, dtype=tf.int32)
    ZERO = tf.constant(0, dtype=tf.int32)
    output_mask = tf.zeros(gc["num_remaining_els"], dtype=tf.int32)
    dims_list = list(range(0, gc["current_N"]))
    dim_skips_list = [[d for d in dims_list if d != skip_d] for skip_d in dims_list]
    print(dim_skips_list)
    for dims in dim_skips_list:
        output_mask = tf.add(output_mask, tf.reduce_sum(group_counts_t, dims))
        
    return tf.unstack(tf.map_fn(lambda x: tf.cond(x >= ONE, true_fn=lambda: ONE, false_fn=lambda: ZERO),
                                output_mask, dtype=tf.int32))


def _test_get_next_element_mask(group_count_t, gc, expected_mask):
    next_mask = get_next_element_mask(group_count_t,gc)
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        result = session.run(next_mask)
    
#     assert np.array_equal(result, expected_mask)
    return result

_test_get_next_element_mask(input_group_count_t_2D_3El, gc_2D_3El, expected_next_mask_from_2D_3El)

[[1], [0]]


[1, 1, 1]

In [9]:
def get_next_mask_and_groups(gc):
    """
    Produces the next 'mask' and 'groups'
    mask: the binarised original element vector, 1 representing "element can still be found in groups" and
          0 representing "element no longer found in groups"
    groups: the set of elements (set size = gc["current_N"]) that are found the current group size

    current_N:
    The dimension that is currently being created i.e. the new mask will be this large and the full count 
    representation tensor will be order N+1
    
    TODO: Instead of re-creating the full "possible groups for all inputs" (possible-groups) tensor, instead use 
          the current mask to remove any elements (basis vectors / columns in each dimension) from the previous 
          possible-groups, followed by adding a new dimension 
    """

    print(f"Get_next_mask_and_groups for group size {gc['current_N']}")
    
    input_collections_reduced = reduce_input_columns_with_current_mask(gc["input_rows"], 
                                                                       gc["curr_bin_mask"])
    dim_to_append = input_collections_reduced
    for d in range(2, gc["current_N"]+1):        
        group_dims_stack = tf.stack([dim_to_append for _ in range(0, gc["num_remaining_els"])], axis=-1)
        traspose_multiplied_stack = get_inputs_tensor_multiplied_by_transpose_permutations(group_dims_stack, d)
        dim_to_append = traspose_multiplied_stack
        
    filtered_counts_tensor = get_inputs_filtered_by_possible_combinations(traspose_multiplied_stack,
                                                                          gc["num_remaining_els"],
                                                                          gc["current_N"])
    
    group_counts_t = tf.reduce_sum(filtered_counts_tensor, axis=0)
    
    with tf.Session() as session:
        init = tf.global_variables_initializer()
        session.run(init)
        group_counts_array = session.run(group_counts_t)
    
    
            
#     next_frequent_bin_mask = get_group_count_tensor_reduced_to_1D(next_frequent_bin_filter, 
#                                                                   gc["num_original_els"],
#                                                                   gc["current_N"])
    
    # TODO: return next_frequent_bin_mask for next iter and next_el_occurances and gc["curr_bin_mask"] as output
    return group_counts_array
    

def test_get_next_mask_and_groups(gc):
    
    return get_next_mask_and_groups(gc)
    
original_input_els = ['A','B','C','D','E']
vectorised_inputs_stack_2 = tf.Variable([[1,1,1,1,0],
                                         [1,1,1,1,1], 
                                         [0,1,1,1,0]], tf.int32)
frequent_bin_mask_2 = tf.Variable([0,1,1,1,0], tf.int32)

input_3_5 = {
    "current_N": 3,
    "original_els": original_input_els, 
    "num_original_els": 5,
    "input_rows": vectorised_inputs_stack_2,
    "num_input_rows": 3,
    "curr_bin_mask": frequent_bin_mask_2,
    "num_remaining_els": 3,
    "curr_group_count_totals": [], 
    "min_support": 2
}
results = test_get_next_mask_and_groups(input_3_5)

results

Get_next_mask_and_groups for group size 3


array([[[0, 0, 0],
        [0, 0, 3],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=int32)