In [1]:
import tensorflow as tf

In [7]:
vocab = ["a", "b", "c", "d"]
data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
layer = tf.keras.layers.StringLookup(vocabulary=vocab)
vectorized_data = layer(data)
print(vectorized_data)
print(layer.get_vocabulary())

tf.Tensor(
[[1 3 4]
 [4 0 2]], shape=(2, 3), dtype=int64)
['[UNK]', 'a', 'b', 'c', 'd']


In [9]:
text_dataset = tf.data.Dataset.from_tensor_slices(["foo", "bar", "baz"])
max_features = 5000  # Maximum vocab size.
max_len = 4  # Sequence length to pad the outputs to.

# Create the layer.
vectorize_layer = tf.keras.layers.TextVectorization(
 max_tokens=max_features,
 output_mode='int',
 output_sequence_length=max_len)

# Now that the vocab layer has been created, call `adapt` on the
# text-only dataset to create the vocabulary. You don't have to batch,
# but for large datasets this means we're not keeping spare copies of
# the dataset.
vectorize_layer.adapt(text_dataset.batch(64))

# Create the model that uses the vectorize text layer

# Start by creating an explicit input layer. It needs to have a shape of
# (1,) (because we need to guarantee that there is exactly one string
# input per batch), and the dtype needs to be 'string'.
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(1,), dtype=tf.string))
model.add(vectorize_layer)
# The first layer in our model is the vectorization layer. After this
# layer, we have a tensor of shape (batch_size, max_len) containing
# vocab indices.
model.add(vectorize_layer)

# Now, the model can map strings to integers, and you can add an
# embedding layer to map these integers to learned embeddings.
input_data = [["foo qux bar"], ["qux baz"]]
model.predict(input_data)
print(vectorize_layer.get_vocabulary())

['', '[UNK]', 'foo', 'baz', 'bar']


In [1]:
import os
import json
import tensorflow as tf
import numpy as np

In [7]:
def load_vocabs(save_dir, filename='vocabs.json'):
    with open(os.path.join(save_dir,filename),'r',encoding="utf-8") as file:
        vocab = json.load(file)
        text_vocab = vocab["word_vocab"]["idx_to_token"]
        label_vocab = vocab["tag_vocab"]["idx_to_token"]
        return text_vocab,label_vocab
save_dir = "../data/tianchi/"
text_vocab,label_vocab = load_vocabs(save_dir, filename='vocabs.json')
vectorize_layer = tf.keras.layers.TextVectorization(
 standardize=None,
 output_mode='int',
 vocabulary=text_vocab)
vectorize_layer_2 = tf.keras.layers.TextVectorization(
 standardize=None,
 output_mode='int',
 vocabulary=label_vocab)
# Because we've passed the vocabulary directly, we don't need to adapt
# the layer - the vocabulary is already set. The vocabulary contains the
# padding token ('') and OOV token ('[UNK]') as well as the passed
# tokens.
print(vectorize_layer.get_vocabulary())
print(vectorize_layer_2.get_vocabulary())

['', '[UNK]', '浙', '江', '杭', '州', '市', '干', '区', '九', '堡', '镇', '三', '村', '一', '省', '温', '平', '阳', '县', '海', '西', '宋', '埠', '公', '园', '南', '路', '0', '号', '余', '姚', '模', '具', '城', '金', '型', '_', '样', '红', 'A', '打', '印', '白', '杨', '街', '道', '下', '沙', '开', '发', '世', '茂', '滨', '花', '峻', '景', '湾', '幢', '秋', '菱', '兰', '溪', '立', '达', '框', '业', '有', '限', '司', '湖', '中', '环', '和', '交', '叉', '口', '嘉', '兴', '乡', '规', '划', '建', '设', '管', '理', '委', '员', '会', '关', '士', '伯', '大', '龙', '泉', '组', '团', '物', '处', '火', '车', '站', '旧', '家', '属', '石', '转', '单', '身', '宿', '舍', '对', '面', '安', '宜', '田', '百', '合', '栋', '义', '乌', '长', '春', '二', '楼', '教', '务', '科', '华', '永', '康', '方', '岩', '文', '万', '里', '绍', '袍', '工', '纪', '跟', '汤', '苏', '泊', '尔', '厂', '房', '仓', '库', '上', '徐', '汇', '宛', '弄', '佳', '寓', '港', '清', '远', '-', '丹', '临', '山', '当', '色', '宁', '波', '东', '丁', '浦', '周', '邓', '振', '莱', '斜', '回', '巴', '黎', '附', '近', '武', '太', '洋', '国', '际', '货', '运', '代', '徽', '宣', '广', '德', '巨', '朝', '办', '元', '桐', '门', '育', '

In [8]:
print(vectorize_layer.vocabulary_size())
print(vectorize_layer_2.vocabulary_size())

2438
59


In [9]:
text = tf.expand_dims(tf.constant([" ".join(['als','浙', '江', '杭', "skdk", '区', '九'])," ".join(['九', '堡', '镇', '三', '村'])]),-1)
label = tf.expand_dims(tf.constant([" ".join(['I', ''])," ".join(['I-poi', 'S-poi'])]),-1)

print(vectorize_layer(text))
print(vectorize_layer_2(label))

tf.Tensor(
[[ 1  2  3  4  1  8  9]
 [ 9 10 11 12 13  0  0]], shape=(2, 7), dtype=int64)
tf.Tensor(
[[1 0]
 [2 3]], shape=(2, 2), dtype=int64)


In [4]:
pred = np.random.random(size=(2,4,5))
print(pred)

[[[0.29593048 0.75991131 0.66691184 0.0442313  0.0660399 ]
  [0.31933673 0.83671252 0.93205941 0.69007656 0.84742818]
  [0.44058372 0.1588558  0.40864148 0.77324524 0.15715588]
  [0.88978617 0.85390921 0.44435366 0.93024436 0.1051121 ]]

 [[0.74604873 0.00524921 0.99680777 0.58359923 0.29062825]
  [0.1658304  0.52366239 0.74503509 0.32150467 0.68321395]
  [0.76674803 0.01045743 0.84811724 0.12360591 0.86196348]
  [0.3819387  0.64659708 0.7773225  0.10516274 0.95523952]]]


In [5]:
pred = tf.cast(pred,tf.float32)

In [7]:
a = tf.constant(12, dtype=tf.int32)
print(a)
print(a.numpy())

tf.Tensor(12, shape=(), dtype=int32)
12


In [11]:
tp = {"1":tf.constant(1,dtype=tf.int32),"2":tf.constant(1,dtype=tf.int32),"3":tf.constant(1,dtype=tf.int32)}
out = list(map(lambda x:x.numpy(),list(tp.values())))

In [13]:
out

[1, 1, 1]