In [1]:
import numpy as np

In [2]:
### Helper Functions ###
def generate_all_preds(total_depth, elementary):
  '''Generates (exact) arbitrary depth predicates from an elementary set
  Parameters:
  - total_depth: Tree depth of predicate search tree
  - elementary: The elementary operations for ring / field
  Returns: List of all constructed predicates of exactly depth total_depth
     based on set elementary
  '''
  def gen_preds(curr, r):
    if r == 0:
      predicates.add(curr.format('#'))
      return
    elif r > 0 and '{}' not in curr:
      return
    else:
      for pred in elementary:
        gen_preds(curr.format(pred), r - 1)
  
  predicates = set()
  for pred in elementary:
    gen_preds(pred, total_depth)
  return predicates

def load_predicate(predicate, min, max, num_examples):
  '''Load some number of random examples of a predicate
  Parameters:
  - predicate: The predicate to load examples from
  - min: Minimum number for random example
  - max: Maximum number for random example
  - num_examples: Number of examples to generate from each predicate
  Returns: Generates num_examples random examples from predicate predicate
     with each element in the example bounded by [min, max). 
  '''
  n = predicate.count('#')
  numbers = np.random.randint(min, max, (num_examples, n))
  statements = [predicate.replace('#', '{}').format(*i) for i in numbers]
  dataset = [{
      "Question": s,
      "Answer": eval(s),
      "Predicate": predicate
  } for s in statements]
  return dataset

In [3]:
### Config ###
# As set up:
# - 2 hop (i.e., 1 depth, 3 terms)
# - Data range: [0, 20]
# - 150,000 * 7 (number of predicates) dataset size
DEPTH = 1
MIN = 0
MAX = 20
N_EXAMPLES_PER_PRED = 150000

# Commutative field elementary operations
# elementary_preds = ['# * {}', '(# + {})']

# Non-commutative field elementary operations
elementary_preds = ['# * {}', '{} * #', '(# + {})', '({} + #)']

In [4]:
# If you want a subsample of predicates here's the place to do it
# generate_all_preds provides **every** predicate so that we can subsample from
# the larger dataset.
predicates = generate_all_preds(DEPTH, elementary_preds)
datasets = []

for pred in predicates:
  datasets.append(load_predicate(pred, MIN, MAX, N_EXAMPLES_PER_PRED))

data = [item for sublist in datasets for item in sublist]
len(data)

1050000