In [1]:
import timm

In [2]:
# timm.list_models()

In [3]:
sentence = "The wide road shimmered in the hot sun"
tokens = list(sentence.lower().split())
print(len(tokens))

8


In [4]:
vocab, index = {}, 1  # start indexing from 1
vocab['<pad>'] = 0  # add a padding token
for token in tokens:
  if token not in vocab:
    vocab[token] = index
    index += 1
vocab_size = len(vocab)
print(vocab)

{'<pad>': 0, 'the': 1, 'wide': 2, 'road': 3, 'shimmered': 4, 'in': 5, 'hot': 6, 'sun': 7}


In [5]:
inverse_vocab = {index: token for token, index in vocab.items()}
print(inverse_vocab)

{0: '<pad>', 1: 'the', 2: 'wide', 3: 'road', 4: 'shimmered', 5: 'in', 6: 'hot', 7: 'sun'}


In [6]:
example_sequence = [vocab[word] for word in tokens]
print(example_sequence)

[1, 2, 3, 4, 5, 1, 6, 7]


# 循环神经网络（RNN）文本生成

In [7]:
import tensorflow as tf

import numpy as np
import os
import time

2024-09-20 11:03:22.562631: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-20 11:03:22.563669: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-20 11:03:22.566746: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-20 11:03:22.575837: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-20 11:03:22.590542: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been 

In [8]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [9]:
path_to_file # '/root/.keras/datasets/shakespeare.txt'

'/root/.keras/datasets/shakespeare.txt'

In [10]:
# 读取并为 py2 compat 解码
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# 文本长度是指文本中的字符个数
print ('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [11]:
# 看一看文本中的前 250 个字符
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [12]:
# 文本中的非重复字符
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

65 unique characters


In [13]:
vocab[:15]

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B']

## 处理文本

向量化文本

在训练之前，我们需要将字符串映射到数字表示值。创建两个查找表格：一个将字符映射到数字，另一个将数字映射到字符。

In [14]:
# 创建从非重复字符到索引的映射
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [15]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  ...
}


In [16]:
# 显示文本首 13 个字符的整数映射
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

'First Citizen' ---- characters mapped to int ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


## 预测任务
给定一个字符或者一个字符序列，下一个最可能出现的字符是什么？这就是我们训练模型要执行的任务。输入进模型的是一个字符序列，我们训练这个模型来预测输出 -- 每个时间步（time step）预测下一个字符是什么。

将文本拆分为长度为 seq_length+1 的文本块。例如，假设 seq_length 为 4 而且文本为 “Hello”， 那么输入序列将为 “Hell”，目标序列将为 “ello”。

### 创建训练样本和目标

In [17]:
# 设定每个输入句子长度的最大值
seq_length = 100
examples_per_epoch = len(text)//seq_length

# 创建训练样本 / 目标
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


2024-09-20 11:03:24.441152: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-09-20 11:03:24.487685: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [18]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


2024-09-20 11:03:24.518834: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [19]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [20]:
# 打印第一批样本的输入与目标值：
for input_example, target_example in  dataset.take(1):
  print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
  print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data: 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [21]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step    0
  input: 18 ('F')
  expected output: 47 ('i')
Step    1
  input: 47 ('i')
  expected output: 56 ('r')
Step    2
  input: 56 ('r')
  expected output: 57 ('s')
Step    3
  input: 57 ('s')
  expected output: 58 ('t')
Step    4
  input: 58 ('t')
  expected output: 1 (' ')


### 创建训练批次

In [22]:
# 批大小
BATCH_SIZE = 64

# 设定缓冲区大小，以重新排列数据集
# （TF 数据被设计为可以处理可能是无限的序列，
# 所以它不会试图在内存中重新排列整个序列。相反，
# 它维持一个缓冲区，在缓冲区重新排列元素。） 
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [23]:
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

### 创建模型

In [24]:
# 词集的长度
vocab_size = len(vocab)

# 嵌入的维度
embedding_dim = 256

# RNN 的单元数量
rnn_units = 1024

In [25]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    inputs = tf.keras.Input(batch_shape=[batch_size, None])
    x = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)
    x = tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform')(x)
    outputs = tf.keras.layers.Dense(vocab_size)(x)

    model = tf.keras.Model(inputs, outputs)
    return model

In [26]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [27]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


2024-09-20 11:03:26.306663: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [28]:
model.summary()

In [29]:
input_example_batch.shape

TensorShape([64, 100])

In [30]:
example_batch_predictions.shape

TensorShape([64, 100, 65])

In [31]:
# 这个批次中的第一个样本
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [32]:
sampled_indices

array([60, 37, 37, 52, 47,  7, 49,  1, 45, 56, 44, 24,  2, 24, 36, 53, 19,
       58, 45, 42, 45, 57, 61, 16, 53, 56, 57, 45, 28, 22, 34, 58, 42, 47,
       14, 18, 57, 39, 61, 47, 16, 54, 33, 42,  4, 34, 58, 24,  8,  4, 28,
       31, 27, 24, 44, 48, 46, 63, 36,  7,  5, 14, 17, 47, 43, 25, 29, 30,
       35,  2, 39, 24, 36, 32, 25, 58,  1, 37,  7, 40, 64,  2,  4, 31, 42,
       64, 46, 59, 57, 60, 13,  8, 39, 35, 57, 28, 24, 12, 48, 15])

In [33]:
example_batch_predictions[0].shape

TensorShape([100, 65])

In [34]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

Input: 
 'e which first we do deserve.\nWhy in this woolvish toge should I stand here,\nTo beg of Hob and Dick, '

Next Char Predictions: 
 "vYYni-k grfL!LXoGtgdgswDorsgPJVtdiBFsawiDpUd&VtL.&PSOLfjhyX-'BEieMQRW!aLXTMt Y-bz!&SdzhusvA.aWsPL?jC"


### 训练模型 
-- 使用不了GPU，之间在google colab运行，暂时不兼容tensorflow gpu环境

In [35]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
print(gpus)

[]


In [36]:
import tensorflow as tf

# 指定使用第一个 GPU
gpu_to_use = 0
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_visible_devices(gpus[gpu_to_use], 'GPU')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(f"Using GPU: {logical_gpus[0].name}")
    except RuntimeError as e:
        print(e)
else:
    print(f"error")

error


In [37]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.17.0


In [38]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 181563271034973567
xla_global_id: -1
]


2024-09-20 11:03:26.919688: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [39]:
import sys
print(sys.executable)

/root/conda/envs/cv_env/bin/python


In [40]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.175299


In [41]:
model.compile(optimizer='adam', loss=loss)

In [42]:
# 检查点保存至的目录
checkpoint_dir = './RNN_training_checkpoints'

# 检查点的文件名
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [43]:
EPOCHS=10

In [45]:
# history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

# 基于注意力的神经机器翻译模型架构

In [46]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, LSTM, Embedding, Dense, Input
from tensorflow.keras.models import Model

In [47]:

class AttentionLayer(Layer):
    def __init__(self, units):
        super(AttentionLayer, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)
        
    def call(self, hidden_state, enc_output):
        # hidden_state shape == (batch_size, hidden size)
        hidden_with_time_axis = tf.expand_dims(hidden_state, 1)
        
        # score shape == (batch_size, max_length, hidden size)
        score = tf.nn.tanh(self.W1(enc_output) + self.W2(hidden_with_time_axis))
        
        # attention_weights shape == (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(self.V(score), axis=1)
        
        # context_vector shape == (batch_size, hidden_size)
        context_vector = attention_weights * enc_output
        context_vector = tf.reduce_sum(context_vector, axis=1)
        
        return context_vector, attention_weights

In [66]:
from tensorflow.keras.layers import Layer
class ConcatLayer(Layer):
    def call(self, inputs):
        context_vector, dec_embedding = inputs
        context_vector_expanded = tf.expand_dims(context_vector, 1)
        return tf.concat([context_vector_expanded, dec_embedding], axis=-1)
                         
def build_model(vocab_size, embedding_dim, units, batch_size):
    # Encoder
    encoder_inputs = Input(shape=(None,))
    enc_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
    enc_output, enc_state_h, enc_state_c = LSTM(units, return_sequences=True, return_state=True)(enc_embedding)
    
    # Attention
    decoder_inputs = Input(shape=(None,))
    dec_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
    decoder_lstm = LSTM(units, return_sequences=True, return_state=True)
    
    attention = AttentionLayer(units)
    context_vector, _ = attention(enc_state_h, enc_output)
    
    dec_input_concat = ConcatLayer()([context_vector, dec_embedding])
    
    decoder_outputs, _, _ = decoder_lstm(dec_input_concat, initial_state=[enc_state_h, enc_state_c])
    outputs = Dense(vocab_size, activation='softmax')(decoder_outputs)
    
    model = Model([encoder_inputs, decoder_inputs], outputs)
    return model

In [67]:
# Hyperparameters
vocab_size = 10000  # Example vocabulary size
embedding_dim = 256
units = 512
batch_size = 64

In [68]:
# Build and compile model
model = build_model(vocab_size, embedding_dim, units, batch_size)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()
