In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import gc
from gru_crf import BiGRUCRF
# 使用GPU
gpus = tf.config.experimental.list_physical_devices(device_type="GPU")
tf.config.experimental.set_visible_devices(devices=gpus[0], device_type="GPU")

 The versions of TensorFlow you are currently using is 2.5.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
# 加载数据
X = []
feaX = []
y = []

def data_read(line:str):
    lx, lfea, ly = eval(line)
    X.append(lx)
    feaX.append(lfea)
    y.append(ly)

In [3]:
with open("label_old_only_work_project.txt", encoding='utf-8') as f:
    for line in f.readlines():
        data_read(line)

In [4]:
X_new = []
feaX_new = []
y_new = []
for i in range(len(y)):
    if 'project-B' in y[i] or 'work-B' in y[i]:
        X_new.append([X[i][j] for j in range(len(y[i])) if y[i][j] != 'O'])
        feaX_new.append([feaX[i][j] for j in range(len(y[i])) if y[i][j] != 'O'])
        y_new.append([y[i][j] for j in range(len(y[i])) if y[i][j] != 'O'])

In [5]:
# 构建数据标签字典
tag2id = set()
for i in y_new:
    tag2id.update(i)
tag2id = list(tag2id)
tag2id.sort()
tag2id = {k:v for v, k in enumerate(tag2id)}
# 添加特征字符标签
# tag2id['<start>'] = len(tag2id)
tag2id['<PAD>'] = len(tag2id)

# label encoding
y_new = [[tag2id[token] for token in sequence] for sequence in y_new]

tag2id #, y[0]

{'project-B': 0, 'project-I': 1, 'work-B': 2, 'work-I': 3, '<PAD>': 4}

In [6]:
# seqence padding
max_len = max([len(i) for i in y_new])
fea_dim = len(feaX_new[0][0])

feaX_new = np.array(
    [i + [[0] * fea_dim] * (max_len - len(i)) for i in feaX_new]
)
y_new = np.array(
    [i + [tag2id['<PAD>']] * (max_len - len(i)) for i in y_new]
)

feaX_new.shape, y_new.shape

((891, 612, 24), (891, 612))

In [31]:
# feaX_new

In [34]:
# 数据集生成
def data_generater(X, y, batch_size, tag2id, is_mask=True):
    sample_num = X.shape[0]
    data = []
    if is_mask:
        for i in range(int(sample_num/batch_size) + 1):
            if i * batch_size < sample_num:
                data.append([
                    tf.constant(X[i * batch_size : (i + 1) * batch_size])
                    , tf.constant(y[i * batch_size : (i + 1) * batch_size])
                    , tf.constant(np.where(y[i * batch_size : (i + 1) * batch_size] == tag2id['<PAD>'], 0., 1.))
                ])
    else:
        for i in range(int(sample_num/batch_size) + 1):
            if i * batch_size < sample_num:
                data.append([
                    tf.constant(X[i * batch_size : (i + 1) * batch_size])
                    , tf.constant(y[i * batch_size : (i + 1) * batch_size])
                ])
    return data

In [35]:
batch_size = 40
train_data = data_generater(feaX_new, y_new, batch_size, tag2id)

In [37]:
np.shape(train_data)

  result = asarray(a).shape


(23, 3)

In [32]:
from gru_crf import *
model = BiGRUCRF(len(tag2id) - 1)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=[tf.keras.metrics.AUC(),
                       tf.keras.metrics.CategoricalAccuracy(),
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall()])

In [33]:
model.fit(x = feaX_new, 
          y = y_new,
          batch_size = 40,
          epochs = 300,
          validation_split = 0.2,
          validation_freq = 20
         )

Epoch 1/300


OperatorNotAllowedInGraphError: in user code:

    C:\Users\86183\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:855 train_function  *
        return step_function(self, iterator)
    D:\common_tools\common_tools\nlp\model\gru_crf_model\gru_crf.py:34 call  *
        X, mask = inputs
    C:\Users\86183\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:520 __iter__
        self._disallow_iteration()
    C:\Users\86183\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:513 _disallow_iteration
        self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
    C:\Users\86183\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:489 _disallow_when_autograph_enabled
        raise errors.OperatorNotAllowedInGraphError(

    OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.


In [None]:
model.summary()

In [7]:
# !pip install pytorch

In [8]:
import torch 
from torchcrf import CRF
model = CRF(num_tags, batch_first=True)

ModuleNotFoundError: No module named 'torch'

In [3]:
import tensorflow as tf
from tf2crf import CRF
from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.models import Model
from tf2crf import CRF, ModelWithCRFLoss

In [4]:
inputs = Input(shape=(None,), dtype='int32')
output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
output = Bidirectional(GRU(64, return_sequences=True))(output)
crf = CRF(units=9, type='float32')
output = crf(output)
base_model = Model(inputs, output)
model = ModelWithCRFLoss(base_model, sparse_target=True)
model.compile(optimizer='adam')

NotImplementedError: Cannot convert a symbolic Tensor (bidirectional_1/forward_gru_1/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported

In [5]:
import tensorflow as tf
from tf2crf import CRF
from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.models import Model
from tf2crf import CRF, ModelWithCRFLoss

inputs = Input(shape=(None,), dtype='int32')
output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
output = Bidirectional(GRU(64, return_sequences=True))(output)
crf = CRF(units=9, dtype='float32')
output = crf(output)
base_model = Model(inputs, output)
model = ModelWithCRFLoss(base_model, sparse_target=True)
model.compile(optimizer='adam')

x = [[5, 2, 3] * 3] * 10
y = [[1, 2, 3] * 3] * 10

model.fit(x=x, y=y, epochs=2, batch_size=2)
model.save('tests/1')


NotImplementedError: Cannot convert a symbolic Tensor (bidirectional_2/forward_gru_2/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported