In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, SimpleRNN, LSTM, Embedding, Dropout
from keras import utils as np_utils
import os.path
from os import path

np.random.seed(0)


class DataGenerator:
    def __init__(self):
        self.consonnants = ['B', 'D', 'K', 'L', 'M', 'N', 'P', 'R', 'S', 'T']
        self.vowels = ['A', 'E', 'I', 'O', 'U']
        self.present_features_indices = ['']
        self.letters = dict()

        self.words_ortho = dict()
        self.words_ortho[''] = []

        self.non_words_ortho = dict()
        self.non_words_ortho[''] = []

        self.semantic_categories = dict()
        self.semantic_categories[''] = []
        self.semantic_units = []

        self.X = []
        self.y = []

        self.generate_orth_rep_for_letters()
        self.generate_words(128)
        # self.generate_words(128, non_word=True)
        self.generate_semantics()
        self.generate_inputs_and_outputs()

    def generate_orth_rep_for_letters(self):
        all_letters = self.consonnants + self.vowels
        for letter in all_letters:
            rep = [0] * 6

            present_features = ''
            while(present_features in self.present_features_indices):
                present_features = "".join(
                    map(str, np.random.randint(0, 6, (2,))))

            self.present_features_indices.append(present_features)
            rep[int(present_features[0])] = 1
            rep[int(present_features[1])] = 1

            self.letters[letter] = rep

    def generate_words(self, num_words, non_word=False):
        def get_word(non_word):
            if (non_word):
                co_i = np.random.randint(len(self.consonnants))
                vo_i = np.random.randint(len(self.vowels), size=2)
                l1 = self.vowels[vo_i[0]]
                l2 = self.consonnants[co_i]
                l3 = self.vowels[vo_i[1]]
                word = l1 + l2 + l3
                ortho = self.letters[l1] + self.letters[l2] + self.letters[l3]
            else:
                co_i = np.random.randint(len(self.consonnants), size=2)
                vo_i = np.random.randint(len(self.vowels))
                l1 = self.consonnants[co_i[0]]
                l2 = self.vowels[vo_i]
                l3 = self.consonnants[co_i[1]]
                word = l1 + l2 + l3
                ortho = self.letters[l1] + self.letters[l2] + self.letters[l3]
            return word, ortho

        words_dict = self.words_ortho if not non_word else self.non_words_ortho

        for i in range(num_words):
            word = ''
            ortho = []
            while(word in words_dict):
                word, ortho = get_word(non_word)

            words_dict[word] = ortho

        del words_dict['']

    def similarity(self, input, others):
        min = 1000
        sum = min
        input = list(map(int, list(input)))
        if input == []:
            return 0
        for other in others:
            other = list(map(int, list(other)))
            if other != []:
                inp3 = np.logical_xor(input, other)
                sum = np.sum(inp3)
            else:
                sum = 1000

            if sum < min:
                min = sum
        return sum

    def generate_semantics(self):
        MAX_NCATEGORIES = 500
        MAX_NMEMBERS = 100
        MAX_NFEATURES = 2000

        nFeatures = 100  # number of features per pattern
        nCategories = 8  # number of clusters (prototypes)
        nMembers = 16  # number of exemplars per cluster
        minProbOn = 0.1  # maximum sparcity of prototype
        maxProbOn = 0.1  # minimum sparcity of prototype
        minDiff = 4  # minimum bit-wise difference among exemplars
        minProbDistort = 0.2  # min prob that feature is regenerated
        maxProbDistort = 0.4  # max prob that feature is regenerated
        sparse = 1  # generate output in "sparse" (unit numbers) format
        minOn = 1  # Min Number of units to be on in the exemplar
        maxOn = 100 	# Max number of units to be on in the exemplar
        maxWCatDiff = 100

        def flip(prob):
            if np.random.uniform(0, 1) < prob:
                return True
            return False

        proto = [0] * MAX_NFEATURES
        item = [0] * MAX_NFEATURES
        cats = [[[0] * MAX_NFEATURES] * MAX_NMEMBERS] * MAX_NCATEGORIES
        for c in range(nCategories):
            dominance = ''
            category_num = c
            # print(c, '--------------------------------')
            probDistort = minProbDistort
            if c < nCategories / 2:
                probOn = minProbOn
            else:
                probOn = maxProbOn

            # if c < nCategories / 2:
            #     probDistort = minProbDistort  # high dominance
            #     dominance = 'high_dominance'
            # else:
            #     probDistort = maxProbDistort  # low dominance
            #     dominance = 'low_dominance'

            probOn = minProbOn + c * \
                (maxProbOn-minProbOn) / (float((nCategories-1)))

            #  generate new prototype (with exact correct number of ON features)

            # for f in range(nFeatures):
            #   proto.append(0)

            nOn = int(0.5 + probOn * nFeatures)
            n = 0
            while n < nOn:
                f = int(np.random.uniform(0, 1) * nFeatures)
                if(proto[f] == 0):
                    proto[f] = 1
                    n += 1

            m = 0
            attempts = 0
            # while (m < nMembers and attempts < 1000): # add max attemps = 1000
            while (m < nMembers):  # add max attemps = 1000
                if m < nMembers / 2:
                    probDistort = minProbDistort  # high dominance
                    dominance = 'high-dominance'
                else:
                    probDistort = maxProbDistort  # low dominance
                    dominance = 'low-dominance'
                    attempts += 1
                # generate new potential item
                new = True
                numOn = 0
                for f in range(nFeatures):
                    if flip(probDistort):
                        item[f] = int(flip(probOn))
                    else:
                        item[f] = proto[f]

                    if item[f] == 1:
                        numOn += 1

                if (numOn > maxOn):
                    new = False
                if (numOn < minOn):
                    new = False

                om = 0
                while om < m and new:
                    nDiff = 0
                    for f in range(nFeatures):
                        if item[f] != cats[c][om][f]:
                            nDiff += 1
                    if (nDiff < minDiff):
                        new = False
                    if (nDiff > maxWCatDiff):
                        new = False
                    om += 1

                if not new:
                    print(c, ' Failed diff check 1')
                    continue

                oc = 0
                while oc < c and new:
                    om = 0
                    while om < nMembers and new:
                        nDiff = 0
                        for f in range(nFeatures):
                            if item[f] != cats[oc][om][f]:
                                nDiff += 1
                        if nDiff < minDiff:
                            new = False
                        om += 1
                    oc += 1

                if not new:
                    # print(c, ' Failed diff check 2', ' nDiff ', nDiff < minDiff, new)
                    continue

                for f in range(nFeatures):
                    cats[c][m][f] = item[f]
                m += 1
                semantic_unit = {'category': 'category-' + str(category_num),
                                 'dominance': dominance, 'ex': cats[c][m][:nFeatures]}
                self.semantic_units.append(semantic_unit)

            if attempts == 1000:
                print('Max attempts reached')

            # print(proto[:nFeatures])
            # print('Members')

            # for m in range(nMembers):
            #     semantic_unit = {'category': category_num,
            #                      'dominance': dominance, 'ex': cats[c][m][:nFeatures]}
            #     self.semantic_units.append(semantic_unit)
        # self.semantic_units = np.asarray(self.semantic_units)

    def generate_inputs_and_outputs(self):
        # inputs
        word_ortho = list(self.words_ortho.values())
        # non_word_ortho = list(self.non_words_ortho.values())
        # all_ortho = np.asarray(word_ortho + non_word_ortho)
        all_ortho = np.asarray(word_ortho)

        # outputs
        # word_sem = self.semantic_units
        non_word_sem = [[0] * 100] * (len(self.non_words_ortho))

        def convert(elem):
            return list(map(int, list(elem)))

        # all_sem = np.asarray(list(map(convert, word_sem)) + non_word_sem)
        # all_sem = np.asarray(list(map(convert, word_sem)))
        # print(np.asarray(self.semantic_units))

        [self.X] = self.shuffle([all_ortho])
        self.y = self.semantic_units
        
        idx = 0
        res = ''
        p = np.random.permutation(128)
        for ortho in self.X:
            sem = self.y[p[idx]]
            list_ortho = list(ortho)
            inp = self.write_to_ex(list_ortho)
            out = self.write_to_ex(sem['ex'])
            word = self.get_name(list_ortho)
            res += 'name: ' + '{' + str(idx) + '_' + word + '_' + sem['dominance'] + '_' +str(sem['category']) + '}' + \
                        '\n' + inp + '\n' + out + '\n' + ';\n'
            idx += 1
        self.ex_file_buf = res
        

    def write_to_ex(self, row):
        if len(row) == 100:  # target
            data_type = "T: "
        elif len(row) == 18:  # input
            data_type = "I: "
        return data_type + " ".join(map(str, row))   
            
    
    def get_name(self, inp):
        words = self.words_ortho
        for key in words.keys():
            if words[key] == inp:
                return key
        raise Error
    
    def shuffle(self, arrays):
        prev_len = len(arrays[0])
        p = np.random.permutation(prev_len)
        ret = []
        for array in arrays:
            assert prev_len == len(array)
            prev_len = len(array)
            ret.append(array[p])
        return ret

In [2]:
dg = DataGenerator()

0  Failed diff check 1
name: {0_ROB_high-dominance_category-2}
I: 0 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1
T: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 1 0 0 1 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 1 0 0
;
name: {1_LAB_high-dominance_category-1}
I: 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 1
T: 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0
;
name: {2_LUL_low-dominance_category-6}
I: 0 1 0 1 0 0 1 0 1 0 0 0 0 1 0 1 0 0
T: 1 1 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 1 1 1 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 1 1 1 1 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0
;
name: {3_BOS_high-dominance_category-0}
I: 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0
T: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 

In [8]:
def write_to_ex(row):
    if len(row) == 100:  # target
        data_type = "T: "
    elif len(row) == 18:  # input
        data_type = "I: "
    return data_type + " ".join(map(str, row))

def get_name(inp):
    words = dg.words_ortho
    for key in words.keys():
        if words[key] == inp:
            return key
    raise Error

idx = 0
res = ''
p = np.random.permutation(128)
for ortho in dg.X:
    sem = dg.y[p[idx]]
    list_ortho = list(ortho)
    inp = write_to_ex(list_ortho)
    out = write_to_ex(sem['ex'])
    word = get_name(list_ortho)
    res += 'name: ' + '{' + str(idx) + '_' + word + '_' + sem['dominance'] + '_' +str(sem['category']) + '}' + \
                '\n' + inp + '\n' + out + '\n' + ';\n'
    idx += 1
print(res)
    # shuffle given arrays in the same order

name: {0_ROB_high-dominance_category-1}
I: 0 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1
T: 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0
;
name: {1_LAB_low-dominance_category-5}
I: 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 1
T: 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 0 0 1 1 1 0 1 0 0 1 0 1 1 0 0 0 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1
;
name: {2_LUL_low-dominance_category-6}
I: 0 1 0 1 0 0 1 0 1 0 0 0 0 1 0 1 0 0
T: 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 1 1 1 1 0 0 1 0 0 1 1 1 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0
;
name: {3_BOS_high-dominance_category-2}
I: 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0
T: 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 

In [20]:
[1,2] == [1,4]

False