In [18]:
# Keras==1.0.6
import pickle 
import numpy as np 

from sklearn.cross_validation import train_test_split
# from lambdawithmask import Lambda as MaskLambda
from sklearn.metrics import confusion_matrix, accuracy_score

from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Activation, Dense
from keras.preprocessing.sequence import pad_sequences
from keras.layers.embeddings import Embedding
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.layers import Merge

from keras.backend import tf

## Load the data

In [5]:
with open('conll.pkl', 'rb') as f:
    data = pickle.load(f)

In [7]:
X = data['X']
y = data['y']
word2ind = data['word2ind']
ind2word = data['ind2word']
label2ind = data['label2ind']
ind2label = data['ind2label']

In [8]:
print(len(data))
print(len(X))

6
3640


In [12]:
maxlen = max([len(x) for x in X])
print('Maximum sequence length:', maxlen)

Maximum sequence length: 63


In [10]:
def encode(x, n):
    result = np.zeros(n)
    result[x] = 1
    return result

In [13]:
# problem's here 

X_enc = [[word2ind[c] for c in x] for x in X]
X_enc_reverse = [[c for c in reversed(x)] for x in X_enc]
max_label = max(label2ind.values()) + 1
y_enc = [[0] * (maxlen - len(ey)) + [label2ind[c] for c in ey] for ey in y]
y_enc = [[encode(c, max_label) for c in ey] for ey in y_enc]

X_enc_f = pad_sequences(X_enc, maxlen=maxlen)
X_enc_b = pad_sequences(X_enc_reverse, maxlen=maxlen)
y_enc = pad_sequences(y_enc, maxlen=maxlen)

(X_train_f, X_test_f, X_train_b,
 X_test_b, y_train, y_test) = train_test_split(X_enc_f, X_enc_b, y_enc,
                                               test_size=11*32, train_size=45*32, random_state=42)
print('Training and testing tensor shapes:')
print(X_train_f.shape, X_test_f.shape, X_train_b.shape, X_test_b.shape, y_train.shape, y_test.shape)

Training and testing tensor shapes:
(1440, 63) (352, 63) (1440, 63) (352, 63) (1440, 63, 6) (352, 63, 6)


## Build the model 

In [14]:
max_features = len(word2ind)
embedding_size = 128
hidden_size = 32
out_size = len(label2ind) + 1

In [15]:
def reverse_func(x, mask=None):
    return tf.reverse(x, [False, True, False])

In [19]:
model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=embedding_size,
                    input_length=maxlen, mask_zero=True))
model.add(Bidirectional(LSTM(hidden_size, return_sequences=True)))
model.add(TimeDistributed(Dense(out_size)))
model.add(Activation('softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 63, 128)           1060480   
_________________________________________________________________
bidirectional_2 (Bidirection (None, 63, 64)            41216     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 63, 6)             390       
_________________________________________________________________
activation_1 (Activation)    (None, 63, 6)             0         
Total params: 1,102,086.0
Trainable params: 1,102,086.0
Non-trainable params: 0.0
_________________________________________________________________


## Train the model 

In [21]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

batch_size = 32
model.fit(X_train_f, y_train, batch_size=batch_size, nb_epoch=40,
          validation_data=([X_test_f, X_test_b], y_test))
score = model.evaluate([X_test_f, X_test_b], y_test, batch_size=batch_size)
print('Raw test score:', score)



ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 arrays but instead got the following list of 2 arrays: [array([[   0,    0,    0, ..., 4151, 5462, 2666],
       [   0,    0,    0, ..., 3925,  748, 8024],
       [   0,    0,    0, ..., 8234,  448, 8024],
       ..., 
       [   0,    0,    0, ..., 3925,...

In [None]:
def score(yh, pr):
    coords = [np.where(yhh > 0)[0][0] for yhh in yh]
    yh = [yhh[co:] for yhh, co in zip(yh, coords)]
    ypr = [prr[co:] for prr, co in zip(pr, coords)]
    fyh = [c for row in yh for c in row]
    fpr = [c for row in ypr for c in row]
    return fyh, fpr

pr = model.predict_classes([X_train_f, X_train_b])
yh = y_train.argmax(2)
fyh, fpr = score(yh, pr)
print('Training accuracy:', accuracy_score(fyh, fpr))
print('Training confusion matrix:')
print(confusion_matrix(fyh, fpr))

pr = model.predict_classes([X_test_f, X_test_b])
yh = y_test.argmax(2)
fyh, fpr = score(yh, pr)
print('Testing accuracy:', accuracy_score(fyh, fpr))
print('Testing confusion matrix:')
print(confusion_matrix(fyh, fpr))