In [1]:
import random
from keras.layers import Bidirectional, Concatenate, LSTM, Dot, Input, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam, RMSprop
from keras.utils import to_categorical
from keras.models import Model
import keras.backend as K

Using TensorFlow backend.


In [2]:
data = []
hvocab = set()
mvocab = set()
with open('time.txt') as inputfile:
    for row in inputfile:
        row = row[:-1]
        cont = row.split('\t')
        if len(cont) < 2:
            print(cont)
        data.append((cont[0].lower(), cont[1]))
        hvocab.update(tuple(cont[0].lower()))
        mvocab.update(tuple(cont[1]))

In [3]:
random.shuffle(data)
print(data[:10])

[('9 am', '09:00-99:99'), ('5am - 4pm', '05:00-16:00'), ('9:21 and 10:11am', '09:21-10:11'), ('12 pm to 9 pm', '12:00-21:00'), ('9:23', '21:23-99:99'), ('8:25', '20:25-99:99'), ('7:56 pm to 11:21 am', '19:56-11:21'), ('8:06am-4:45am', '08:06-04:45'), ('1', '01:00-99:99'), ('11:48pm', '23:48-99:99')]


In [4]:
inv_mdict = dict(enumerate(sorted(mvocab)))
mdict = {c:k for k, c in inv_mdict.items()}
hlist = sorted(hvocab)
hlist.append('<unk>')
hlist = ['<pad>'] + hlist
hdict = {hlist[i]:i for i in range(len(hlist))}
datelen = 30
def prehum(x):
    x = x.lower().replace(',', '')
    xnum = [hdict[c] if c in hdict else hdict['<unk>'] for c in x]
    while len(xnum) < datelen:
        xnum.append(0)
    onehot = [to_categorical(i, num_classes=len(hdict)) for i in xnum]
    return onehot

def premac(x):
    xnum = [mdict[c] for c in x]
    onehot = [to_categorical(i, num_classes=len(mdict)) for i in xnum]
    return onehot

In [5]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.05)
print(len(train_data), len(test_data))

47500 2500


In [6]:
import numpy as np

In [7]:
trainX = np.array([prehum(d[0]) for d in train_data])
trainy = np.array([premac(d[1]) for d in train_data])
testX = np.array([prehum(d[0]) for d in test_data])
testy = np.array([premac(d[1]) for d in test_data])

In [8]:
xl = 30
yl = 11

In [9]:
def softmax(x, axis=1):
    dim = K.ndim(x)
    if dim == 2:
        return K.softmax(x)
    elif dim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [10]:
def attention(a, s_prev):
    s_prev = RepeatVector(xl)(s_prev)
    concat = Concatenate(axis=-1)([a, s_prev])
    d1 = Dense(10, activation="tanh")(concat)
    d2 = Dense(1, activation="relu")(d1)
    alphas = Activation(softmax)(d2)
    return Dot(axes=1)([alphas, a])

In [11]:
def modeling(xl, yl, n_a, n_s, hvocab_size, mvocab_size):
    X = Input(shape=(xl, hvocab_size))
    s0 = Input(shape=(n_s,))
    c0 = Input(shape=(n_s,))
    s = s0
    c = c0
    outputs = []
    a=Bidirectional(LSTM(units=n_a, return_sequences=True))(X)
    for i in range(0, yl):
        context=attention(a, s)
        s,_,c = LSTM(n_s, return_state=True)(inputs=context, initial_state=[s,c])
        out = Dense(len(mdict), activation=softmax)(s)
        outputs.append(out)
    return Model(inputs=(X, s0, c0), outputs=outputs)

In [12]:
model = modeling(xl, yl, 32, 64, len(hdict), len(mdict))
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 30, 26)       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 64)           0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 30, 64)       15104       input_1[0][0]                    
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector)  (None, 30, 64)       0           input_2[0][0]                    
____________________________________________________________________________________________

In [13]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

In [14]:
s0 = np.zeros((len(train_data), 64))
c0 = np.zeros((len(train_data), 64))
y = list(trainy.swapaxes(0, 1))

In [16]:
model.fit([trainX, s0, c0], y, epochs=20, batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1e7929478c8>

In [17]:
s1 = np.zeros((len(test_data), 64))
c1 = np.zeros((len(test_data), 64))

In [18]:
valy = list(testy.swapaxes(0, 1))
preds = model.evaluate([testX, s1, c1], valy)



In [19]:
print(preds)

[0.4785049032211304, 0.18562023956775664, 0.19876998839378357, 1.3291839134126347e-07, 0.00038066453519277276, 0.00029898052734788505, 1.5695148579197848e-07, 0.04456747570242735, 0.04886669226377708, 1.1940003935251296e-07, 2.003913172529792e-07, 2.537977285328452e-07, 0.896, 0.8808, 1.0, 1.0, 1.0, 1.0, 0.972, 0.9736, 1.0, 1.0, 1.0]


In [20]:
def getTime(s, model):
    x = np.array([prehum(s)])
    s = np.zeros((1, 64))
    c = np.zeros((1, 64))
    d = model.predict([x, s, c])
    ans = ""
    for w in d:
        ans += inv_mdict[int(np.argmax(w, axis=1))]
    return ans

In [21]:
model.save_weights("time_model.h5")

In [23]:
print(getTime('7-8pm', model))

19:00-20:00


In [24]:
with open("time_h_dict.txt", "w", encoding='utf-8') as outputfile:
    for k, c in hdict.items():
        outputfile.write(k + '\t' + str(c) +'\n')

with open("time_m_dict.txt", "w", encoding='utf-8') as outputfile:
    for k, c in mdict.items():
        outputfile.write(k + '\t' + str(c) + '\n')