In [1]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


In [2]:
# 코드 8-1 다른 온도 값을 사용하여 확률 분포의 가중치 바꾸기
import numpy as np

# 오리지날은 전체 합이 1인 1D넘파이 배열 temperature는 출력 분포의 엔트로피 양을 결정
def reweight_distribution(original_distribution, temperature = 0.5): 
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    
    # 원본 분포의 가중치를 변경하며 반환 이 분포의 합은 1이 아닐 수 있으므로 새로운 분포의 합으로 나눔
    return distribution / np.sum(distribution)

In [3]:
# 코드 8-2 원본 텍스트 파일을 내려받아 파싱하기
import keras
import numpy as np

path = keras.utils.get_file("nietzsche.txt",
                           origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read().lower()
print("말뭉치 크기 :", len(text))

말뭉치 크기 : 600893


In [4]:
# 코드 8-3 글자 시퀀스 벡터화하기
maxlen = 60 # 60개의 글자로 된 시퀀스를 추출합니다
step = 3 # 세 글자씩 건너뛰면서 새로운 시퀀스를 샘플링합니다

sentences = [] # 추출할 시퀀스를 담을 리스트

next_chars = [] # 타깃(시퀀스 다음 글지)을 담을 시퀀스

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print("시퀀스 개수 :", len(sentences))

chars = sorted(list(set(text))) # 말뭉치에서 고유한 글자를 담은 리스트
print("고유한 글자 :", len(chars))
char_indices = dict((char, chars.index(char)) for char in chars) # chars 리스트에 있는 글자와 글자의 인덱스를 매핑한 딕셔너리

print("벡터화...")

# 글자를 원-핫 인코딩하여 0과 1의 이진 배열로 바꿉니다
x = np.zeros((len(sentences), maxlen, len(chars)), dtype = np.bool)
y = np.zeros((len(sentences), len(chars)), dtype = np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

시퀀스 개수 : 200278
고유한 글자 : 57
벡터화...


In [5]:
# 코드 8-4 다음 글자를 예측하기 위한 단일 LSTM 모델
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation="softmax"))

In [6]:
# 코드 8-5 모델 컴파일 설정하기
optimizer = keras.optimizers.RMSprop(lr = 0.01)
model.compile(loss = "categorical_crossentropy",
             optimizer = optimizer)

In [7]:
# 코드 8-6 모델의 예측이 주어졌을 때 새로운 글자를 샘플링하는 함수
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
# 코드 8-7 텍스트 생성 루프
import random
import sys

random.seed(42)
start_index = random.randint(0, len(text) - maxlen - 1)

for epoch in range(1, 60): # 60 에포크 동안 모델을 훈련합니다
    print("에포크 :", epoch, "\n\n")
    model.fit(x, y, batch_size=128, epochs = 1) # 데이터에사 한 번만 학습해서 모델을 훈련합니다
    
    seed_text = text[start_index: start_index + maxlen] # 무작위로 시드 텍스트를 선택합니다
    print('--시드 텍스트: "' + seed_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]: # 여러가지 샘플링 온도를 시도합니다
        print("------ 온도 :", temperature)
        generated_text = seed_text
        sys.stdout.write(generated_text)
        
        for i in range(400): # 시드 텍스트에서 시작해서 400개의 글자를 생성합니다
            
            # 지금까지 생성된 글자를 원-핫 인코딩으로 바꿉니다
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
            
            # 다음 글자를 샘플링합니다
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)
            sys.stdout.flush()

        print()

에포크 : 1 


--시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도 : 0.2
the slowly ascending ranks and classes, in which,
through for the such a such a such a such a such a still to the superfice of the such a such and the such any such a such a such any such a such to the such a such a more and in the such a such a perion, and the such a pertain and the such a self and the such a such a most of the such a suct to the such a such and the such a such a perhaps of the such a such a such a pertain the such a such and possible to 
------ 온도 : 0.5
the slowly ascending ranks and classes, in which,
through for the soul and such a the soul and allorican of the sume to of the will as the superation, the farule as it is the still of grature of the later the stronger, the make of the such a posiation, the such a still such and in the most continess, to the some the soul to the world of the well contain to the take however of the farm to the world to the spection, and p

uneno be on mere wides which schezerations in mutwion those-frublite, that profting for, that it is, are to
muty willied could per methoh afoverxernes--inintions
and if-work ownjoriet phedment, of meriable
에포크 : 9 


--시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도 : 0.2
the slowly ascending ranks and classes, in which,
through for the same the strength and the same the world and the strength and souls to the same the strength to the same the subject and so the stands to the same the same the strength and the such a soul the strengther and actiness and the saint and the man and the present and pain, and the strength and present and passions and the man and the contempt to the believe of the self-mears of the strength and t
------ 온도 : 0.5
the slowly ascending ranks and classes, in which,
through for the sense of a something man of the one another another, and a power and presents, as the existence of the deviluaring books, to say, and so such its presu