In [1]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

In [5]:
words = [[2, 3, 3, 4, 5, 3, 6, 2, 1, 0]]  # 第二个和第五个

tf.random.set_seed(520)
embeddings = layers.Embedding(9, 5)

embeddings(tf.Variable(words))

<tf.Tensor: shape=(1, 10, 5), dtype=float32, numpy=
array([[[-0.01862597, -0.00334956,  0.04830854,  0.03242388,
         -0.01192739],
        [-0.0233111 ,  0.01983568,  0.0189561 , -0.01962071,
          0.02615999],
        [-0.0233111 ,  0.01983568,  0.0189561 , -0.01962071,
          0.02615999],
        [ 0.03444393, -0.01531786, -0.0033838 ,  0.03225869,
         -0.018484  ],
        [-0.04602621, -0.04833745, -0.01547657, -0.00369142,
         -0.0313596 ],
        [-0.0233111 ,  0.01983568,  0.0189561 , -0.01962071,
          0.02615999],
        [ 0.00286833, -0.02307302,  0.04050693,  0.04634685,
         -0.0186964 ],
        [-0.01862597, -0.00334956,  0.04830854,  0.03242388,
         -0.01192739],
        [ 0.04170139, -0.0338135 ,  0.04312653, -0.01489342,
          0.02225891],
        [ 0.04382542,  0.02465444, -0.00269968, -0.04287535,
         -0.02969116]]], dtype=float32)>

In [7]:
conv = layers.Conv1D(4, 3)

conv(embeddings(tf.Variable(words)))


<tf.Tensor: shape=(1, 8, 4), dtype=float32, numpy=
array([[[ 2.1573531e-03, -2.2026023e-02,  6.2333042e-05, -3.7967772e-03],
        [ 5.2825692e-03,  3.0556269e-02,  1.0702302e-02, -3.3799879e-02],
        [-4.1705664e-02,  1.3093116e-02, -2.9750289e-02,  3.5943914e-02],
        [ 4.5824167e-04, -8.0889901e-03, -1.4216326e-03,  2.5043406e-02],
        [-3.9527997e-02, -4.2051878e-02,  5.8549918e-02, -2.5723914e-02],
        [-2.6600368e-02,  5.7612609e-02, -2.5477795e-02,  1.6935151e-02],
        [-4.4883611e-03, -7.7862376e-03,  2.1512130e-02,  1.6298648e-02],
        [ 3.1133471e-02,  8.2555590e-03,  2.3945654e-02, -4.4470350e-03]]],
      dtype=float32)>

In [8]:
pooling = layers.MaxPool1D(3, padding='same')

pooling(conv(embeddings(tf.Variable(words))))

<tf.Tensor: shape=(1, 3, 4), dtype=float32, numpy=
array([[[0.00528257, 0.03055627, 0.0107023 , 0.03594391],
        [0.00045824, 0.05761261, 0.05854992, 0.02504341],
        [0.03113347, 0.00825556, 0.02394565, 0.01629865]]], dtype=float32)>

In [17]:
flatten = layers.Flatten()

flatten(pooling(conv(embeddings(tf.Variable(words)))))

<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
array([[0.00528257, 0.03055627, 0.0107023 , 0.03594391, 0.00045824,
        0.05761261, 0.05854992, 0.02504341, 0.03113347, 0.00825556,
        0.02394565, 0.01629865]], dtype=float32)>

In [18]:
dropout = layers.Dropout(0.2)

dropout(flatten(pooling(conv(embeddings(tf.Variable(words))))))

<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
array([[0.00528257, 0.03055627, 0.0107023 , 0.03594391, 0.00045824,
        0.05761261, 0.05854992, 0.02504341, 0.03113347, 0.00825556,
        0.02394565, 0.01629865]], dtype=float32)>

In [19]:
dense = layers.Dense(2, activation='softmax')

dense(dropout(flatten(pooling(conv(embeddings(tf.Variable(words)))))))

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.5052252 , 0.49477482]], dtype=float32)>

## Loss

In [1]:
temp = list(range(10))
temp.split()

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [12]:
class Data:
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size

        self.start = 0
        self.end = len(data)

    def __iter__(self):
        return self
    
    def __next__(self):
        start = self.start
        if start < self.end:
            self.start += self.batch_size
            return self.data[start:self.start]
        else:
            raise StopIteration
        



In [38]:
class TextCNN(keras.Model):
    def __init__(self):
        super().__init__()

        self.max_len = 20  # 每个句子最大单词个数
        self.word_dim = 128  # 词向量维度
        self.word_num = 1670  # 最大单词数
        self.class_num = 10  # 类别总数
        self.dropout = 0.2

        tf.random.set_seed(520)        
        self.embeddings_layer = layers.Embedding(self.word_num, self.word_dim)

        self.conv_layer = layers.Conv1D(128, 3)
        self.pooling_layer = layers.MaxPool1D(8, padding='same')

        self.flatten = layers.Flatten()

        self.dropout_layer = layers.Dropout(self.dropout)
        self.dense_layer = layers.Dense(self.class_num, activation='softmax')

    def call(self, batch_word_index):
        """
            args:
                batch_word_index: list, batch * sentence_length
            return:
                x: 
        """
        vector_x = self.embeddings(tf.Variable(batch_word_index))
        x_1 = self.pooling_layer(self.conv_layer(vector_x))
        x = self.dense_layer(self.dropout(self.flatten(x_1)))
        return x

In [None]:
tf.reduce_mean([2, 3, 4])

Error: Session cannot generate requests

In [45]:
tf.losses.categorical_crossentropy([0, 1, 0, 0], [0.2 ,0.5, 0.1, 0.1])

0.5877866

In [40]:
import time


def lazy_property(func):
    attr_name = f'_lazy_{func.__name__}'

    @property
    def _lazy_proprety(self):
        if not hasattr(self, attr_name):
            setattr(self, attr_name, func(self))
        return getattr(self, attr_name)
    
    return _lazy_proprety


class Test:
    @property
    # @lazy_property
    def time(self):
        return int(time.time())

t = Test()
        

1635392765

In [2]:
import tensorflow as tf
import tensorflow.keras as keras

from bieshu_ai.text_cnn import TextCNN
from bieshu_ai.data_processing import DataLoader

physical_devices = tf.config.list_physical_devices('CPU')
tf.config.set_visible_devices(physical_devices)

In [3]:
dataloader = DataLoader()

model = TextCNN()

# model.compile(
#         optimizer=keras.optimizers.Adam(),
#         loss=keras.losses.CategoricalCrossentropy()
#     )

# train_x, train_y = dataloader.get_data(mode='train')
# train_x_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(500)
# model.fit(train_x_dataset, epochs=0)

model([dataloader.word_to_index('我要看案例')[0]])

model.load_weights('./models/model_weights.h5')

model

2021-10-28 17:34:59.214796: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.789 seconds.
Prefix dict has been built successfully.


<bieshu_ai.text_cnn.TextCNN at 0x7f99d5c09d30>

In [8]:
model([dataloader.word_to_index('我怕你骚扰我')[0]]).numpy()

array([[1.4903464e-09, 1.6335100e-09, 1.0596084e-07, 2.4086501e-07,
        1.9564792e-14, 3.9162978e-14, 8.4697171e-15, 4.7496951e-11,
        9.9999964e-01, 8.6111768e-14]], dtype=float32)

tuple

In [2]:
import tensorflow as tf

In [7]:
shape = tf.TensorSpec(shape = (10, 20))
shape


TensorSpec(shape=(10, 20), dtype=tf.float32, name=None)