Reference to the course project "Neural Machine Translation" from Deep Learning course series on Coursera.

In [1]:
import random
from keras.layers import Bidirectional, Concatenate, LSTM, Dot, Input, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam, RMSprop
from keras.utils import to_categorical
from keras.models import Model
import keras.backend as K

Using TensorFlow backend.


In [2]:
data = []
hvocab = set()
mvocab = set()
with open('date_data.txt') as inputfile:
    for row in inputfile:
        row = row[:-1]
        cont = row.split('\t')
        if len(cont) < 2:
            print(cont)
        data.append((cont[0].lower(), cont[1]))
        hvocab.update(tuple(cont[0].lower()))
        mvocab.update(tuple(cont[1]))

In [3]:
random.shuffle(data)
print(data[:10])

[('1-9-1973', '1973-01-09'), ('tuesday november 8 1988', '1988-11-08'), ('03.01.1998', '1998-01-03'), ('11/9/74', '1974-11-09'), ('apr 25', '0000-04-25'), ('4.13.2002', '2002-04-13'), ('1985-01-24', '1985-01-24'), ('april 11', '0000-04-11'), ('feb. 26th', '0000-02-26'), ('04-22-1987', '1987-04-22')]


In [4]:
inv_mdict = dict(enumerate(sorted(mvocab)))
mdict = {c:k for k, c in inv_mdict.items()}
hlist = sorted(hvocab)
hlist.append('<unk>')
hlist = ['<pad>'] + hlist
hdict = {hlist[i]:i for i in range(len(hlist))}
datelen = 30
def prehum(x):
    x = x.lower().replace(',', '')
    xnum = [hdict[c] if c in hdict else hdict['<unk>'] for c in x]
    while len(xnum) < datelen:
        xnum.append(0)
    onehot = [to_categorical(i, num_classes=len(hdict)) for i in xnum]
    return onehot

def premac(x):
    xnum = [mdict[c] for c in x]
    onehot = [to_categorical(i, num_classes=len(mdict)) for i in xnum]
    return onehot

In [5]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.05)
print(len(train_data), len(test_data))

47500 2500


In [6]:
import numpy as np

In [7]:
trainX = np.array([prehum(d[0]) for d in train_data])
trainy = np.array([premac(d[1]) for d in train_data])
testX = np.array([prehum(d[0]) for d in test_data])
testy = np.array([premac(d[1]) for d in test_data])

In [8]:
xl = 30
yl = 10

In [9]:
def softmax(x, axis=1):
    dim = K.ndim(x)
    if dim == 2:
        return K.softmax(x)
    elif dim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [10]:
def attention(a, s_prev):
    s_prev = RepeatVector(xl)(s_prev)
    concat = Concatenate(axis=-1)([a, s_prev])
    d1 = Dense(10, activation="tanh")(concat)
    d2 = Dense(1, activation="relu")(d1)
    alphas = Activation(softmax)(d2)
    return Dot(axes=1)([alphas, a])

In [11]:
def modeling(xl, yl, n_a, n_s, hvocab_size, mvocab_size):
    X = Input(shape=(xl, hvocab_size))
    s0 = Input(shape=(n_s,))
    c0 = Input(shape=(n_s,))
    s = s0
    c = c0
    outputs = []
    a=Bidirectional(LSTM(units=n_a, return_sequences=True))(X)
    for i in range(0, yl):
        context=attention(a, s)
        s,_,c = LSTM(n_s, return_state=True)(inputs=context, initial_state=[s,c])
        out = Dense(len(mdict), activation=softmax)(s)
        outputs.append(out)
    return Model(inputs=(X, s0, c0), outputs=outputs)

In [12]:
model = modeling(xl, yl, 32, 64, len(hdict), len(mdict))
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 30, 38)       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 64)           0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 30, 64)       18176       input_1[0][0]                    
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector)  (None, 30, 64)       0           input_2[0][0]                    
____________________________________________________________________________________________

In [13]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

In [14]:
s0 = np.zeros((len(train_data), 64))
c0 = np.zeros((len(train_data), 64))
y = list(trainy.swapaxes(0, 1))

In [15]:
model.fit([trainX, s0, c0], y, epochs=40, batch_size=512)

Epoch 1/40
Epoch 2/40
Epoch 3/40

KeyboardInterrupt: 

In [112]:
s1 = np.zeros((len(test_data), 64))
c1 = np.zeros((len(test_data), 64))

In [113]:
valy = list(testy.swapaxes(0, 1))
preds = model.evaluate([testX, s1, c1], valy)



In [114]:
print(preds)

[0.4170119325429201, 0.0009083629789267434, 1.698205397133279e-05, 5.7944499993755016e-05, 0.005237503501260653, 1.469851673618905e-07, 0.05132191489089746, 0.13222089188024402, 1.5535394179551077e-07, 0.06199941515414976, 0.16524861699771137, 0.9996, 1.0, 1.0, 0.9992, 1.0, 0.972, 0.9408, 1.0, 0.972, 0.9396]


In [115]:
def getDate(s, model):
    x = np.array([prehum(s)])
    s = np.zeros((1, 64))
    c = np.zeros((1, 64))
    d = model.predict([x, s, c])
    ans = ""
    for w in d:
        ans += inv_mdict[int(np.argmax(w, axis=1))]
    return ans

In [122]:
print(getDate("12-7",model))

0000-12-07


In [120]:
model.save_weights("date_model.h5")

In [121]:
with open("h_dict.txt", "w", encoding='utf-8') as outputfile:
    for k, c in hdict.items():
        outputfile.write(k + '\t' + str(c) +'\n')

with open("m_dict.txt", "w", encoding='utf-8') as outputfile:
    for k, c in mdict.items():
        outputfile.write(k + '\t' + str(c) + '\n')