In [1]:
import numpy as np
import random
import time
import cllm_utils

In [2]:
#获取名称
data = open("dinos.txt","r").read()

#转化为小写
data = data.lower()

#转化为不重复的元素列表
chars = list(set(data))

#获取大小信息
data_size, vocab_size = len(data), len(chars)

print(chars)
print("共计有%d个字符，唯一字符有%d个"%(data_size,vocab_size))

['w', 'm', 'q', 't', 'b', 'x', 'u', 'c', 'i', 's', 'z', 'o', 'p', 'y', 'd', 'v', 'l', 'r', 'h', 'f', 'k', 'e', 'g', 'a', '\n', 'j', 'n']
共计有19909个字符，唯一字符有27个


In [3]:
#构建字典
char_to_ix = {ch:i for i, ch in enumerate(sorted(chars))}
ix_to_char= {i:ch for i, ch in enumerate(sorted(chars))}

print(char_to_ix,"\n")
print(ix_to_char)

{'\n': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26} 

{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


In [7]:
def clip(gradients, maxValue):
    """
    使用maxValue来修剪梯度
    
    参数：
        gradients -- 字典类型，包含了以下参数："dWaa", "dWax", "dWya", "db", "dby"
        maxValue -- 阈值，把梯度值限制在[-maxValue, maxValue]内
        
    返回：
        gradients -- 修剪后的梯度
    """
    #获取参数
    dWaa, dWax, dWya, db, dby = gradients["dWaa"], gradients["dWax"], gradients["dWya"], gradients["db"], gradients["dby"]
    
    #梯度修剪
    for gradient in [dWaa, dWax, dWya, db, dby]:
        np.clip(gradient, -maxValue, maxValue, out=gradient)
    
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    
    return gradients

#test
# np.random.seed(3)
# dWax = np.random.randn(5,3)*10
# dWaa = np.random.randn(5,5)*10
# dWya = np.random.randn(2,5)*10
# db = np.random.randn(5,1)*10
# dby = np.random.randn(2,1)*10
# gradients = {"dWax": dWax, "dWaa": dWaa, "dWya": dWya, "db": db, "dby": dby}
# gradients = clip(gradients, 10)
# print("gradients[\"dWaa\"][1][2] =", gradients["dWaa"][1][2])
# print("gradients[\"dWax\"][3][1] =", gradients["dWax"][3][1])
# print("gradients[\"dWya\"][1][2] =", gradients["dWya"][1][2])
# print("gradients[\"db\"][4] =", gradients["db"][4])
# print("gradients[\"dby\"][1] =", gradients["dby"][1])

In [12]:
def sample(parameters, char_to_ix, seed):
    """
    根据RNN输出的概率分布序列对字符序列进行采样
    
    参数：
        parameters -- 包含了Waa, Wax, Wya, by, b的字典
        char_to_ix -- 字符映射到索引的字典
        seed -- 随机种子
        
    返回：
        indices -- 包含采样字符索引的长度为n的列表。
    """
    
    #从parameters中获取参数
    Waa, Wax, Wya, by, b = parameters["Waa"], parameters["Wax"], parameters["Wya"], parameters["by"], parameters["b"]
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    #1st step:创建one-hot vector
    x = np.zeros((vocab_size,1))
    
    #初始化a_prev
    a_prev = np.zeros((n_a,1))
    
    #创建索引的空列表，用来储存生成的字符的索引
    indices = []
    
    #用IDX来检测是否遇到换行符
    idx = -1
    
    #循环时间步t，每个时间步选择一个字符，讲字符添加到"indices"上，如果我们达到50字符就结束
    counter = 0
    newline_character = char_to_ix["\n"]
    
    while (idx != newline_character and counter < 50):
        #2nd step: forward propagation
        a = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, x) + b)
        z = np.dot(Wya,a) + by
        y = cllm_utils.softmax(z)
        
        #设定随机种子
        np.random.seed(counter + seed)
        
        #3rd step: 从y中随机选取字符，并储存索引,p为对应索引取到的概率（算出来的y）
        idx = np.random.choice(vocab_size, p=y.ravel())
        indices.append(idx)
        
        #4th step: 将输入的字符重写为与采样索引对应的字符,namely对应的one-hot向量
        x = np.zeros((vocab_size,1))
        x[idx] = 1
        
        #更新a_prev为a
        a_prev = a
        
        #累加器
        seed +=1
        counter +=1
    
    if(counter == 50):
        indices.append(newline_character)
        
    return indices

#test
# np.random.seed(2)
# _, n_a = 20, 100
# Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(n_a, n_a), np.random.randn(vocab_size, n_a)
# b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)
# parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}


# indices = sample(parameters, char_to_ix, 0)
# print("Sampling:")
# print("list of sampled indices:", indices)
# print("list of sampled characters:", [ix_to_char[i] for i in indices])

In [14]:
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    """
    执行训练模型的单步优化。
    
    参数：
        X -- 整数列表，其中每个整数映射到词汇表中的字符。
        Y -- 整数列表，与X完全相同，但向左移动了一个索引。
        a_prev -- 上一个隐藏状态
        parameters -- 字典，包含了以下参数：
                        Wax -- 权重矩阵乘以输入，维度为(n_a, n_x)
                        Waa -- 权重矩阵乘以隐藏状态，维度为(n_a, n_a)
                        Wya -- 隐藏状态与输出相关的权重矩阵，维度为(n_y, n_a)
                        b -- 偏置，维度为(n_a, 1)
                        by -- 隐藏状态与输出相关的权重偏置，维度为(n_y, 1)
        learning_rate -- 模型学习的速率
    
    返回：
        loss -- 损失函数的值（交叉熵损失）
        gradients -- 字典，包含了以下参数：
                        dWax -- 输入到隐藏的权值的梯度，维度为(n_a, n_x)
                        dWaa -- 隐藏到隐藏的权值的梯度，维度为(n_a, n_a)
                        dWya -- 隐藏到输出的权值的梯度，维度为(n_y, n_a)
                        db -- 偏置的梯度，维度为(n_a, 1)
                        dby -- 输出偏置向量的梯度，维度为(n_y, 1)
        a[len(X)-1] -- 最后的隐藏状态，维度为(n_a, 1)
    """
    #前向传播
    loss, cache = cllm_utils.rnn_forward(X, Y, a_prev, parameters)
    
    #反向传播
    gradients, a = cllm_utils.rnn_backward(X, Y, parameters, cache)
    
    #梯度修剪
    gradients = clip(gradients,5)
    
    #更新参数
    parameters = cllm_utils.update_parameters(parameters,gradients,learning_rate)
    
    return loss, gradients, a[len(X)-1]

#test
# np.random.seed(1)
# vocab_size, n_a = 27, 100
# a_prev = np.random.randn(n_a, 1)
# Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(n_a, n_a), np.random.randn(vocab_size, n_a)
# b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)
# parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}
# X = [12,3,5,11,22,3]
# Y = [4,14,11,22,25, 26]

# loss, gradients, a_last = optimize(X, Y, a_prev, parameters, learning_rate = 0.01)
# print("Loss =", loss)
# print("gradients[\"dWaa\"][1][2] =", gradients["dWaa"][1][2])
# print("np.argmax(gradients[\"dWax\"]) =", np.argmax(gradients["dWax"]))
# print("gradients[\"dWya\"][1][2] =", gradients["dWya"][1][2])
# print("gradients[\"db\"][4] =", gradients["db"][4])
# print("gradients[\"dby\"][1] =", gradients["dby"][1])
# print("a_last[4] =", a_last[4])

In [31]:
def model(data, ix_to_char, char_to_ix, num_iterations=3500, 
          n_a=50, dino_names=7,vocab_size=27):
    """
    训练模型并生成恐龙名字
    
    参数：
        data -- 语料库
        ix_to_char -- 索引映射字符字典
        char_to_ix -- 字符映射索引字典
        num_iterations -- 迭代次数
        n_a -- RNN单元数量
        dino_names -- 每次迭代中采样的数量
        vocab_size -- 在文本中的唯一字符的数量
    
    返回：
        parameters -- 学习后了的参数
    """
    #从vocab_size中获取n_x, n_y
    n_x, n_y = vocab_size, vocab_size
    
    #初始化参数,输入和输出的vector长度应该一样
    parameters = cllm_utils.initialize_parameters(n_a, n_x, n_y)
    
    #初始化损失
    loss = cllm_utils.get_initial_loss(vocab_size, dino_names)
    
    #构建全部的恐龙名称
    with open("dinos.txt") as f:
        examples = f.readlines()
    #小写恐龙的名字，并且去除换行符"\n"
    examples = [x.lower().strip() for x in examples]
    
    #打乱全部恐龙的名称
    np.random.seed(0)
    np.random.shuffle(examples)
    
    #初始化LSTM的隐藏状态
    a_prev = np.zeros((n_a,1))
    
    #循环训练
    for j in range(num_iterations):
        #定义一个训练样本
        index = j % len(examples)   #防止循环数大于example的个数
        X = [None] + [char_to_ix[ch] for ch in examples[index]]  #构建出每个字符对应的one-hot编码，第一个None对应0向量
        Y = X[1:]  + [char_to_ix["\n"]]  #Y(t)对应X(t+1), 加上每个字符串结尾的\n转行符
        
        #执行单步优化：前向传播->反向传播->梯度修剪->更新参数
        #选择学习率为0.01
        curr_loss, gradients, a_prev = optimize(X, Y, a_prev, parameters)
        
        #使用延迟来保持损失平滑，加速训练
        loss = cllm_utils.smooth(loss, curr_loss)
        
        #每2000代，通过sample()生成“\n”字符,检查模型是否学习正确
        if j % 2000== 0:
            print("第" + str(j+1) + "次迭代，损失值为:" + str(loss))
            
            seed = 0
            for name in range(dino_names):
                #采样
                sampled_indices = sample(parameters, char_to_ix, seed)
                cllm_utils.print_sample(sampled_indices, ix_to_char)
                
                #为了得到相同的效果，种子+1
                seed += 1
                
            print("\n")
            
    return parameters

#test
# #开始时间
# start_time = time.clock()

# #开始训练
# parameters = model(data, ix_to_char, char_to_ix, num_iterations=3500)

# #结束时间
# end_time = time.clock()

# #计算时差
# minium = end_time - start_time

# print("执行了：" + str(int(minium / 60)) + "分" + str(int(minium%60)) + "秒")

In [32]:
#开始时间
start_time = time.clock()

from keras.callbacks import LambdaCallback
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking
from keras.layers import LSTM
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import pad_sequences
from shakespeare_utils import *
import sys
import io

#结束时间
end_time = time.clock()

#计算时差
minium = end_time - start_time

print("执行了：" + str(int(minium / 60)) + "分" + str(int(minium%60)) + "秒")

  
Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Loading text data...
Creating training set...
number of training examples: 31412
Vectorizing training set...
Loading model...
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

执行了：0分16秒


  from ipykernel import kernelapp as app
