In [1]:
import random
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding,Bidirectional
from keras.optimizers import SGD,RMSprop
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [2]:
def generate_addition_data(dataset, size):
    all_tuples = []
    for i in range(size):
        first_number = random.choice(dataset)
        second_number = random.choice(dataset)
        second_number = random.choice(dataset)
        second_number = random.choice(dataset)
        second_number = random.choice(dataset)
        while second_number == first_number:
            second_number = random.choice(dataset)
        all_tuples.append((first_number,second_number))

    random.shuffle(all_tuples)
    return all_tuples

In [3]:
def generate_max_data(all_numbers, length, num_samples):
    def gaussian_sample(max_index):
        if max_index <= 500: # sample from a small Gaussian
            random_gaussian = np.random.normal(scale=5)
        else: # sample from a big Gaussian
            random_gaussian = np.random.normal(scale=max_index * 0.01)
        new_index = max_index - int(random_gaussian)
        if new_index >= max_index: # if out of bounds
            new_index = max_index - 1
        elif new_index < 0: # if out of bounds
            new_index = 0
        return new_index

    all_numbers = sorted(all_numbers)
    all_lists = []
    minimum = 0; maximum = len(all_numbers) - 1
    for i in range(num_samples):
        max_index = np.random.randint(low=10, high=maximum, size=1)[0] # sample a random number
        temp_list = [all_numbers[max_index]]
        if np.random.uniform() > 0.5: # for half the values, we just randomly sample
            for j in range(length - 1):
                new_int = all_numbers[np.random.randint(low=minimum, high=max_index-1, size=1)[0]]
                while new_int in temp_list: # resample if its already in there
                    new_int = all_numbers[np.random.randint(low=minimum, high=max_index-1, size=1)[0]]
                temp_list.append(new_int)
        else: # for the other half, we sample from a Gaussian to keep the numbers nearby
            for j in range(length - 1):                
                new_int = all_numbers[gaussian_sample(max_index)]
                while new_int in temp_list: # if already oresent, resample                    
                    new_int = all_numbers[gaussian_sample(max_index)]
                temp_list.append(new_int)

        random.shuffle(temp_list) # shuffle inside the examples
        all_lists.append(temp_list)
    random.shuffle(all_lists)
    return all_lists

In [4]:
all_numbers = list(range(0, 1000))
random.shuffle(all_numbers)
#train_numbers = all_numbers[:int(0.8 * len(all_numbers))]
#test_numbers = all_numbers[int(0.8 * len(all_numbers)):]
trainset=generate_max_data(all_numbers,5,100000)

testset=generate_max_data(all_numbers,5,10000)

In [5]:
train_label=[]
test_label=[]
for i in range(0,len(trainset)):
    train_label.append(trainset[i].index(max(trainset[i])))
for i in range(0,len(testset)):
    test_label.append(testset[i].index(max(testset[i])))

In [6]:
trainCate = to_categorical(train_label, num_classes=5)
testCate= to_categorical(test_label, num_classes=5) 

In [7]:
nekg=np.load('./1-999 100d step 1/transe 0-999 100d.npy')

In [8]:
embedding_matrix = np.zeros((1000, 100))
for i in range(1000):
    embedding_matrix[i]=nekg[i]

In [12]:
#max
batch_size=8
#inputs = Input(name='inputs',shape=[5])
embedder = Embedding(1000, 100, input_length=5, weights=[embedding_matrix], trainable=True)
model=Sequential()
model.add(embedder)
model.add(Bidirectional(LSTM(64)))
model.add(Dense(5,activation="softmax",name="FC2"))
model.summary()
model.compile(loss="categorical_crossentropy",optimizer=RMSprop(),metrics=["accuracy"])


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 5, 100)            100000    
                                                                 
 bidirectional_2 (Bidirectio  (None, 128)              84480     
 nal)                                                            
                                                                 
 FC2 (Dense)                 (None, 5)                 645       
                                                                 
Total params: 185,125
Trainable params: 185,125
Non-trainable params: 0
_________________________________________________________________


In [None]:
history=model.fit(np.array(trainset), trainCate, batch_size, epochs=50,validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train','Valid'],loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train','Valid'],loc='upper left')
plt.show()

In [None]:
score = model.evaluate(np.array(testset), testCate,batch_size=16)
print(score)

In [64]:
#decode
batch_size=4
input_shape = (100,100)
model=Sequential()
model.add(Dense(100,activation="ReLU",name="FC1"))
model.add(Dense(32,activation="ReLU",name="FC2"))
model.add(Dense(16,activation="ReLU",name="FC3"))
model.add(Dense(8,activation="ReLU",name="FC4"))
model.add(Dense(1,activation="ReLU",name="FC5"))
model.build(input_shape)
model.summary()
model.compile(loss="mean_squared_error",optimizer=SGD())


Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 FC1 (Dense)                 (100, 100)                10100     
                                                                 
 FC2 (Dense)                 (100, 32)                 3232      
                                                                 
 FC3 (Dense)                 (100, 16)                 528       
                                                                 
 FC4 (Dense)                 (100, 8)                  136       
                                                                 
 FC5 (Dense)                 (100, 1)                  9         
                                                                 
Total params: 14,005
Trainable params: 14,005
Non-trainable params: 0
_________________________________________________________________
