##Data

In [1]:
!pip install keras-crf
!pip install tensorflow==1.15.0 keras==2.2.4
!pip install sklearn_crfsuite
!pip install git+https://www.github.com/keras-team/keras-contrib.git

Collecting keras-crf
  Downloading https://files.pythonhosted.org/packages/64/a8/e8b11718872f1564787e27375b116288bf66abc2acb2bbdd36539f568238/keras_crf-0.2.0-py3-none-any.whl
Collecting seqeval
[?25l  Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)
[K     |████████████████████████████████| 51kB 4.7MB/s 
[?25hCollecting tensorflow-addons
[?25l  Downloading https://files.pythonhosted.org/packages/66/4b/e893d194e626c24b3df2253066aa418f46a432fdb68250cde14bf9bb0700/tensorflow_addons-0.13.0-cp37-cp37m-manylinux2010_x86_64.whl (679kB)
[K     |████████████████████████████████| 686kB 18.9MB/s 
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-cp37-none-any.whl size=16172 sha256=e13007e6ee12b8127c135e95fa66dfe547c5ef26eceef8f5b41ef54e57363f2e
  Stored in directory: /root/.cache/pip/wh

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import *
from keras.optimizers import Adam
from sklearn_crfsuite import metrics
from keras_contrib.layers import CRF
from keras.utils import to_categorical
from keras_contrib.losses import crf_loss
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras_contrib.metrics import crf_viterbi_accuracy, crf_marginal_accuracy
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Input

Using TensorFlow backend.


In [3]:
!gdown --id 1L6dd0FnYqgn-eoQ-gFnBiNji7R1ul9n_
!gdown --id 1-5mE9XjocmyCKGlkpW1YGuCnNsGwQioD
!gdown --id 1d4er4I7x4VIwy7BsWFpsPuC2z6aZ3P6s

Downloading...
From: https://drive.google.com/uc?id=1L6dd0FnYqgn-eoQ-gFnBiNji7R1ul9n_
To: /content/NER_RFQ_agg.csv
4.39MB [00:00, 29.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-5mE9XjocmyCKGlkpW1YGuCnNsGwQioD
To: /content/NER_RFQ_agg_train.csv
3.48MB [00:00, 112MB/s]
Downloading...
From: https://drive.google.com/uc?id=1d4er4I7x4VIwy7BsWFpsPuC2z6aZ3P6s
To: /content/NER_RFQ_agg_test.csv
100% 909k/909k [00:00<00:00, 60.3MB/s]


In [4]:
df = pd.read_csv('NER_RFQ_agg.csv', converters={'tokens': eval, 'ner_tags': eval})
train = pd.read_csv('NER_RFQ_agg_train.csv', converters={'tokens': eval, 'ner_tags': eval})
test = pd.read_csv('NER_RFQ_agg_test.csv', converters={'tokens': eval, 'ner_tags': eval})

all_dfs = [df, train, test]

In [5]:
all_tags = set()
word_to_ix = {}
for _, row in df.iterrows():
    for tag in row.ner_tags:
        all_tags.add(tag)
    for word in row.tokens:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
all_tags = sorted(list(all_tags))
tag_to_idx = {t: i for i, t in enumerate(all_tags)}
label_list = {i: t for i, t in enumerate(all_tags)}
labels = list(tag_to_idx.keys())
labels.remove('O')
labels = sorted(labels, key=lambda name: (name[1:], name[0]))

In [6]:
def find_token(x):
    res = []
    for w in x['tokens']:
        if w in word_to_ix.keys():
            res.append(word_to_ix[w])
        else:
            res.append(0)
    return res

for d in all_dfs:
    d['encoded_ner_tags'] = d.apply(lambda x: [tag_to_idx[t] for t in x['ner_tags']], axis=1)
    d['int_tokens'] = d.apply(find_token, axis=1)

In [7]:
df

Unnamed: 0,id,tokens,ner_tags,encoded_ner_tags,int_tokens
0,119287R.msg,"[name, 119287r.msg, <aogdesk@nordstar.ru>, <mv...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9, 11, 12, ..."
1,119735R.msg,"[name, 119735r.msg, <aogdesk@nordstar.ru>, <mv...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 72, 2, 3, 4, 5, 6, 73, 74, 75, 8, 9, 10, 9..."
2,120421R.msg,"[name, 120421r.msg, d.klebcha@s7.ru, <d.klebch...","[O, O, O, O, O, O, O, O, B-GoodsString, I-Good...","[2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 2, 2, ...","[0, 90, 91, 92, 93, 94, 5, 95, 96, 97, 98, 8, ..."
3,120660R.msg,"[name, 120660r.msg, yuliya, a., kondratova, <y...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 134, 135, 136, 137, 138, 70, 5, 95, 139, 1..."
4,120660R.msg,"[name, 120660r.msg, yuliya, a., kondratova, <y...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 134, 135, 136, 137, 138, 70, 5, 95, 139, 1..."
...,...,...,...,...,...
1187,123527R.msg,"[name, 123527r.msg, =?utf-8?b?0kprincw0lrqvtcy...","[O, O, O, O, O, O, O, O, B-GoodsString, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, ...","[0, 12334, 1662, 1663, 1664, 70, 5, 1665, 1233..."
1188,123088R.msg,"[name, 123088r.msg, lakshmi, suresh, <lakshmi@...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 8701, 8702, 8703, 8704, 94, 5, 439, 8705, ..."
1189,123508R.msg,"[name, 123508r.msg, <aogdesk@nordstar.ru>, <mv...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 9930, 2, 3, 4, 5, 6, 9931, 8, 9, 10, 9, 11..."
1190,124894R.msg,"[name, 124894r.msg, <anton.peshko@utair.ru>, a...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...","[0, 12341, 1557, 382, 1558, 1557, 5, 271, 1234..."


In [8]:
def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

##CRF

In [9]:
MAX_WORDS = len(word_to_ix)
EMBEDDING_LENGTH = 500
MAX_SEQUENCE_LENGTH = np.max(df['int_tokens'].apply(len))
HIDDEN_SIZE = 100

model = Sequential()
model.add(Embedding(MAX_WORDS, EMBEDDING_LENGTH, input_length=MAX_SEQUENCE_LENGTH))
# model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
# model.add(Dense(50))
model.add(TimeDistributed(Dense(len(label_list), activation="relu")))
crf = CRF(len(label_list), learn_mode='marginal')
model.add(crf)

model.compile(Adam(lr=0.005), loss=focal_loss(), metrics=[crf_marginal_accuracy])
model.summary()




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 3004, 500)         6180500   
_________________________________________________________________
time_distributed_1 (TimeDist (None, 3004, 3)           1503      
_________________________________________________________________
crf_1 (CRF)                  (None, 3004, 3)           27        
Total params: 6,182,030
Trainable params: 6,182,030
Non-trainable params: 0
_________________________________________________________________


In [10]:
callbacks = [EarlyStopping(monitor='val_crf_marginal_accuracy', patience=4),
         ModelCheckpoint(filepath='best_model.h5', monitor='val_crf_marginal_accuracy', save_best_only=True)]

x_train = pad_sequences(train['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH)
y_train = pad_sequences(train['encoded_ner_tags'], padding='post', maxlen=MAX_SEQUENCE_LENGTH, value=tag_to_idx['O'])
y_train = to_categorical(y_train, num_classes=len(label_list))
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=50,
    batch_size=128,
    callbacks=callbacks,
    validation_split=0.1,
)

model.load_weights('best_model.h5')





Train on 857 samples, validate on 96 samples
Epoch 1/50





Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50


In [11]:
preds = model.predict(pad_sequences(test['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH))
tag_preds = [np.argmax(pred, axis=1) for pred in preds]
truncated_preds = []
for i in range(len(tag_preds)):
    truncated_preds.append(tag_preds[i][:len(test['encoded_ner_tags'].iloc[i])])
named_preds = []
for i in truncated_preds:
    named_preds.append([])
    for j in i:
        named_preds[-1].append(label_list[j])
print(metrics.flat_classification_report(test['ner_tags'], named_preds, digits=4, labels=labels))

               precision    recall  f1-score   support

B-GoodsString     0.8277    0.5762    0.6794      1109
I-GoodsString     0.7722    0.6414    0.7008      1718

    micro avg     0.7917    0.6158    0.6928      2827
    macro avg     0.8000    0.6088    0.6901      2827
 weighted avg     0.7940    0.6158    0.6924      2827



##LSTM

In [None]:
MAX_WORDS = len(word_to_ix)
EMBEDDING_LENGTH = 500
MAX_SEQUENCE_LENGTH = np.max(df['int_tokens'].apply(len))
HIDDEN_SIZE = 100

model = Sequential()
model.add(Embedding(MAX_WORDS, EMBEDDING_LENGTH, input_length=MAX_SEQUENCE_LENGTH))
model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
# model.add(Dense(50))
model.add(TimeDistributed(Dense(len(label_list), activation="softmax")))

model.compile(Adam(lr=0.005), loss=focal_loss(), metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 3004, 500)         6180500   
_________________________________________________________________
lstm_2 (LSTM)                (None, 3004, 100)         240400    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 3004, 3)           303       
Total params: 6,421,203
Trainable params: 6,421,203
Non-trainable params: 0
_________________________________________________________________


In [None]:
callbacks = [EarlyStopping(monitor='val_acc', patience=4),
         ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)]

x_train = pad_sequences(train['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH)
y_train = pad_sequences(train['encoded_ner_tags'], padding='post', maxlen=MAX_SEQUENCE_LENGTH, value=tag_to_idx['O'])
y_train = to_categorical(y_train, num_classes=len(label_list))
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=50,
    batch_size=128,
    callbacks=callbacks,
    validation_split=0.1,
)

model.load_weights('best_model.h5')

Train on 857 samples, validate on 96 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


In [None]:
preds = model.predict(pad_sequences(test['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH))
tag_preds = [np.argmax(pred, axis=1) for pred in preds]
truncated_preds = []
for i in range(len(tag_preds)):
    truncated_preds.append(tag_preds[i][:len(test['encoded_ner_tags'].iloc[i])])
named_preds = []
for i in truncated_preds:
    named_preds.append([])
    for j in i:
        named_preds[-1].append(label_list[j])
print(metrics.flat_classification_report(test['ner_tags'], named_preds, digits=4, labels=labels))

               precision    recall  f1-score   support

B-GoodsString     0.9015    0.8332    0.8660      1109
I-GoodsString     0.9197    0.7998    0.8555      1718

    micro avg     0.9123    0.8129    0.8597      2827
    macro avg     0.9106    0.8165    0.8608      2827
 weighted avg     0.9125    0.8129    0.8596      2827



##BiLSTM

In [None]:
MAX_WORDS = len(word_to_ix)
EMBEDDING_LENGTH = 500
MAX_SEQUENCE_LENGTH = np.max(df['int_tokens'].apply(len))
HIDDEN_SIZE = 100

model = Sequential()
model.add(Embedding(MAX_WORDS, EMBEDDING_LENGTH, input_length=MAX_SEQUENCE_LENGTH))
model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True)))
# model.add(Dense(50))
model.add(TimeDistributed(Dense(len(label_list), activation="softmax")))

model.compile(Adam(lr=0.005), loss=focal_loss(), metrics=['accuracy'])
model.summary()





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 3004, 500)         6180500   
_________________________________________________________________
bidirectional_1 (Bidirection (None, 3004, 200)         480800    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 3004, 3)           603       
Total params: 6,661,903
Trainable params: 6,661,903
Non-trainable params: 0
_________________________________________________________________


In [None]:
callbacks = [EarlyStopping(monitor='val_acc', patience=4),
         ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)]

x_train = pad_sequences(train['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH)
y_train = pad_sequences(train['encoded_ner_tags'], padding='post', maxlen=MAX_SEQUENCE_LENGTH, value=tag_to_idx['O'])
y_train = to_categorical(y_train, num_classes=len(label_list))
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=50,
    batch_size=128,
    callbacks=callbacks,
    validation_split=0.1,
)

model.load_weights('best_model.h5')

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 857 samples, validate on 96 samples
Epoch 1/50





Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


In [None]:
preds = model.predict(pad_sequences(test['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH))
tag_preds = [np.argmax(pred, axis=1) for pred in preds]
truncated_preds = []
for i in range(len(tag_preds)):
    truncated_preds.append(tag_preds[i][:len(test['encoded_ner_tags'].iloc[i])])
named_preds = []
for i in truncated_preds:
    named_preds.append([])
    for j in i:
        named_preds[-1].append(label_list[j])
print(metrics.flat_classification_report(test['ner_tags'], named_preds, digits=4, labels=labels))

               precision    recall  f1-score   support

B-GoodsString     0.9177    0.8449    0.8798      1109
I-GoodsString     0.9289    0.8440    0.8844      1718

    micro avg     0.9245    0.8444    0.8826      2827
    macro avg     0.9233    0.8445    0.8821      2827
 weighted avg     0.9245    0.8444    0.8826      2827



##LSTM CRF

In [None]:
MAX_WORDS = len(word_to_ix)
EMBEDDING_LENGTH = 500
MAX_SEQUENCE_LENGTH = np.max(df['int_tokens'].apply(len))
HIDDEN_SIZE = 100

model = Sequential()
model.add(Embedding(MAX_WORDS, EMBEDDING_LENGTH, input_length=MAX_SEQUENCE_LENGTH))
model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
# model.add(Dense(50))
model.add(TimeDistributed(Dense(len(label_list), activation="relu")))
crf = CRF(len(label_list), learn_mode='marginal')
model.add(crf)

model.compile(Adam(lr=0.005), loss=focal_loss(), metrics=[crf_marginal_accuracy])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 3004, 500)         6180500   
_________________________________________________________________
lstm_6 (LSTM)                (None, 3004, 100)         240400    
_________________________________________________________________
time_distributed_6 (TimeDist (None, 3004, 3)           303       
_________________________________________________________________
crf_4 (CRF)                  (None, 3004, 3)           27        
Total params: 6,421,230
Trainable params: 6,421,230
Non-trainable params: 0
_________________________________________________________________


In [None]:
callbacks = [EarlyStopping(monitor='val_crf_marginal_accuracy', patience=4),
         ModelCheckpoint(filepath='best_model.h5', monitor='val_crf_marginal_accuracy', save_best_only=True)]

x_train = pad_sequences(train['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH)
y_train = pad_sequences(train['encoded_ner_tags'], padding='post', maxlen=MAX_SEQUENCE_LENGTH, value=tag_to_idx['O'])
y_train = to_categorical(y_train, num_classes=len(label_list))
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=50,
    batch_size=128,
    callbacks=callbacks,
    validation_split=0.1,
)

model.load_weights('best_model.h5')

Train on 857 samples, validate on 96 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50


In [None]:
preds = model.predict(pad_sequences(test['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH))
tag_preds = [np.argmax(pred, axis=1) for pred in preds]
truncated_preds = []
for i in range(len(tag_preds)):
    truncated_preds.append(tag_preds[i][:len(test['encoded_ner_tags'].iloc[i])])
named_preds = []
for i in truncated_preds:
    named_preds.append([])
    for j in i:
        named_preds[-1].append(label_list[j])
print(metrics.flat_classification_report(test['ner_tags'], named_preds, digits=4, labels=labels))

               precision    recall  f1-score   support

B-GoodsString     0.9048    0.8395    0.8709      1109
I-GoodsString     0.9324    0.7625    0.8389      1718

    micro avg     0.9207    0.7927    0.8519      2827
    macro avg     0.9186    0.8010    0.8549      2827
 weighted avg     0.9215    0.7927    0.8515      2827



##BiLSTM CRF

In [None]:
MAX_WORDS = len(word_to_ix)
EMBEDDING_LENGTH = 500
MAX_SEQUENCE_LENGTH = np.max(df['int_tokens'].apply(len))
HIDDEN_SIZE = 100

model = Sequential()
model.add(Embedding(MAX_WORDS, EMBEDDING_LENGTH, input_length=MAX_SEQUENCE_LENGTH))
model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True)))
# model.add(Dense(50))
model.add(TimeDistributed(Dense(len(label_list), activation="relu")))
crf = CRF(len(label_list), learn_mode='marginal')
model.add(crf)

model.compile(Adam(lr=0.005), loss=focal_loss(), metrics=[crf_marginal_accuracy])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 3004, 500)         6180500   
_________________________________________________________________
bidirectional_3 (Bidirection (None, 3004, 200)         480800    
_________________________________________________________________
time_distributed_7 (TimeDist (None, 3004, 3)           603       
_________________________________________________________________
crf_5 (CRF)                  (None, 3004, 3)           27        
Total params: 6,661,930
Trainable params: 6,661,930
Non-trainable params: 0
_________________________________________________________________


In [None]:
callbacks = [EarlyStopping(monitor='val_crf_marginal_accuracy', patience=4),
         ModelCheckpoint(filepath='best_model.h5', monitor='val_crf_marginal_accuracy', save_best_only=True)]

x_train = pad_sequences(train['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH)
y_train = pad_sequences(train['encoded_ner_tags'], padding='post', maxlen=MAX_SEQUENCE_LENGTH, value=tag_to_idx['O'])
y_train = to_categorical(y_train, num_classes=len(label_list))
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=50,
    batch_size=128,
    callbacks=callbacks,
    validation_split=0.1,
)

model.load_weights('best_model.h5')

Train on 857 samples, validate on 96 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50


In [None]:
preds = model.predict(pad_sequences(test['int_tokens'], padding='post', maxlen=MAX_SEQUENCE_LENGTH))
tag_preds = [np.argmax(pred, axis=1) for pred in preds]
truncated_preds = []
for i in range(len(tag_preds)):
    truncated_preds.append(tag_preds[i][:len(test['encoded_ner_tags'].iloc[i])])
named_preds = []
for i in truncated_preds:
    named_preds.append([])
    for j in i:
        named_preds[-1].append(label_list[j])
print(metrics.flat_classification_report(test['ner_tags'], named_preds, digits=4, labels=labels))

               precision    recall  f1-score   support

B-GoodsString     0.8891    0.8097    0.8476      1109
I-GoodsString     0.9220    0.8190    0.8674      1718

    micro avg     0.9089    0.8154    0.8596      2827
    macro avg     0.9056    0.8144    0.8575      2827
 weighted avg     0.9091    0.8154    0.8596      2827

