In [1]:
import tensorflow as tf

In [2]:
# tf.debugging.set_log_device_placement(True)
# tf.ones([])

In [3]:
import numpy as np
import os
import time

In [4]:
import re
import shutil
import zipfile

import numpy as np
import pandas as pd
import unicodedata
import urllib3
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

import pathlib
import random
import string
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from pickle import dump
from unicodedata import normalize

# 데이터 전처리

## 데이터 로드

In [5]:
f = open("C:/Users/ST-USER/Desktop/project_4/les_miserables.txt", 'r', encoding='utf-8')
data = f.read()

In [6]:
print(data[:250])

Chapitre I

Monsieur Myriel


En 1815, M. Charles-François-Bienvenu Myriel était évêque de Digne.
C'était un vieillard d'environ soixante-quinze ans; il occupait le siège
de Digne depuis 1806.

Quoique ce détail ne touche en aucune manière au fond mê


In [7]:
print ('텍스트의 길이: {}자'.format(len(data)))

텍스트의 길이: 3089743자


## Vocabulary 생성

In [8]:
vocab = sorted(set(data))
print ('고유 문자수 {}개'.format(len(vocab)))

고유 문자수 106개


## 문자 별 인덱스, 인덱스 별 문자 맵핑 생성

In [9]:
char2idx = {u:i for i, u in enumerate(vocab)}

# 인덱스에서 문자로 매핑
idx2char = np.array(vocab)

## 생성된 맵핑 확인 : char2idx 20개 항목 확인

In [10]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '"' :   3,
  "'" :   4,
  '(' :   5,
  ')' :   6,
  '*' :   7,
  '+' :   8,
  ',' :   9,
  '-' :  10,
  '.' :  11,
  '/' :  12,
  '0' :  13,
  '1' :  14,
  '2' :  15,
  '3' :  16,
  '4' :  17,
  '5' :  18,
  '6' :  19,
  ...
}


## 문자열 데이터를 숫자열 데이터로 변환

In [11]:
text_as_int = np.array([char2idx[c] for c in data])

## 문자열에서 숫자열로 맵핑 확인

In [12]:
# 텍스트에서 처음 13개의 문자가 숫자로 어떻게 매핑되었는지를 보여줍니다
print ('{} ---- 문자들이 다음의 정수로 매핑되었습니다 ---- > {}'.format(repr(data[:13]), text_as_int[:13]))

'Chapitre I\n\nM' ---- 문자들이 다음의 정수로 매핑되었습니다 ---- > [28 60 53 68 61 72 70 57  1 34  0  0 38]


# 데이터 세트 생성

## 1. 문자 단위 데이터 세트 생성 

In [13]:
# RNN 입력 sequence 길이
seq_length = 100

# 데이터셋 만들기
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# 처음 5개 문자 확인
for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

C
h
a
p
i


## 2. 청크 단위 데이터 세트 생성

In [14]:
# label 생성을 위해 배치 길이를 seq_length+1 설정
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

# 처음 5개 sequence 확인
for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

"Chapitre I\n\nMonsieur Myriel\n\n\nEn 1815, M. Charles-François-Bienvenu Myriel était évêque de Digne.\nC'é"
"tait un vieillard d'environ soixante-quinze ans; il occupait le siège\nde Digne depuis 1806.\n\nQuoique "
"ce détail ne touche en aucune manière au fond même de ce que\nnous avons à raconter, il n'est peut-êtr"
"e pas inutile, ne fût-ce que\npour être exact en tout, d'indiquer ici les bruits et les propos qui\nava"
"ient couru sur son compte au moment où il était arrivé dans le\ndiocèse. Vrai ou faux, ce qu'on dit de"


## 3. 입력과 타겟이 분리된 데이터 세트 생성

In [15]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

## 입력 & 타겟 확인

In [16]:
for input_example, target_example in  dataset.take(1):
  print ('입력 데이터: ', repr(''.join(idx2char[input_example.numpy()])))
  print ('타깃 데이터: ', repr(''.join(idx2char[target_example.numpy()])))

입력 데이터:  "Chapitre I\n\nMonsieur Myriel\n\n\nEn 1815, M. Charles-François-Bienvenu Myriel était évêque de Digne.\nC'"
타깃 데이터:  "hapitre I\n\nMonsieur Myriel\n\n\nEn 1815, M. Charles-François-Bienvenu Myriel était évêque de Digne.\nC'é"


## 4. 배치 단위의 데이터 세트 생성

In [17]:
BATCH_SIZE = 64 # 배치 크기
BUFFER_SIZE = 10000 # 데이터셋을 섞을 버퍼 크기

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [18]:
dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int32, tf.int32)>

# 모델 정의 - LSTM

## 모델 정의

In [19]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

## 모델 생성

In [20]:
vocab_size = len(vocab) # 어휘 사전의 크기
embedding_dim = 256     # 임베딩 차원
rnn_units = 1024        # RNN 유닛(unit) 개수

model1 = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

## 출력 Tensor Shape 확인

In [21]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model1(input_example_batch)
  print(example_batch_predictions.shape, "# (배치 크기, 시퀀스 길이, 어휘 사전 크기)")

(64, 100, 106) # (배치 크기, 시퀀스 길이, 어휘 사전 크기)


## 모델 구조 확인

In [22]:
model1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           27136     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 106)           108650    
Total params: 5,382,762
Trainable params: 5,382,762
Non-trainable params: 0
_________________________________________________________________


## 예측 분포에서 샘플링 테스트

### Categorical Distribution 샘플링

In [23]:
# (bach size, number of class)형태의 2D Tensor로 입력
# 따라서, 100개의 timestep이 batch인 것으로 처리됨
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)

# 출력이 (100,1)이므로 (100)으로 변경
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

### 100개 timestep에 대한 샘플

In [24]:
sampled_indices

array([ 74,  67,  78,  15, 102,  46,  80,  60,  52,  17,  42,  33,  27,
        45,  41,  55,  24,  16,  35,  93,  65,  15,   1,  36, 102,  22,
        67,   3,  82,  54,  30,  69,  81,  75, 105,  20,   9,  24,  52,
        24,  84,  47,   5,   2,  83,  14,  40,  99,  93,  32,   0,  37,
         8,  35,  62, 102, 105,  72,  97,  90,  48,  12, 105,  38,  47,
        83,  22, 101,  80,   0,  86,  30,  15,  42,   6,  14,  10,  60,
         9,  72,  28,  13,  11,  37,  46,  52,   8,  98,  80,  27,  82,
        28,  63,  92,  17,  94,  46,  85,   0,  55], dtype=int64)

### 예측된 텍스트 복호화

In [25]:
print("입력: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("예측된 다음 문자: \n", repr("".join(idx2char[sampled_indices])))

입력: 
 " c'est-à-dire à la nuit close, il\npassait devant le théâtre de la Porte-Saint-Martin où l'on donnait"

예측된 다음 문자: 
 'voz2öU°h_4QHBTPc;3Jçm2 Kö9o"»bEqºwü7,;_;ÂV(!À1OïçG\nL+JjöütëàW/üMVÀ9ô°\nÈE2Q)1-h,tC0.LU_+î°B»Ckæ4èUÇ\nc'


# 모델 훈련

## 모델 컴파일

In [26]:
model1.compile(optimizer='adam', 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

## 체크포인트 콜백 정의

In [27]:
# 체크포인트가 저장될 디렉토리
checkpoint_dir1 = './training_checkpoints'
# 체크포인트 파일 이름
checkpoint_prefix1 = os.path.join(checkpoint_dir1, "ckpt_{epoch}")

checkpoint_callback1=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix1,
    save_weights_only=True)

## 모델 훈련

In [29]:
# EPOCH = 50

# # 체크포인트 콜백 설정
# model1.fit(dataset, epochs=EPOCH, callbacks=[checkpoint_callback1])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x233d18763a0>

In [30]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8295201077785277440,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 5736300544
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 17033106111397212744
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:2b:00.0, compute capability: 8.6"]

# 모델 테스트

### 모델을 테스트 하기 위해 훈련된 가중치를   갖고 배치 크기가 1인 입력을 받는 모델을 새로 만들어야 함

## 테스트 용 모델을 새로 생성 (단, batch_size=1로 설정)

In [31]:
model1 = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

## 모델에 마지막 저장 체크포인트 복구

In [32]:
last_checkpoint1 = tf.train.latest_checkpoint(checkpoint_dir1)
print(last_checkpoint1)
model1.load_weights(last_checkpoint1)

./training_checkpoints\ckpt_50


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x233f1b1a850>

## 모델의 input shape을 [1, None]으로 변경

In [33]:
# 배치 크기 1로 모델을 새로 빌드
model1.build(tf.TensorShape([1, None]))

In [34]:
model1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            27136     
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_1 (Dense)              (1, None, 106)            108650    
Total params: 5,382,762
Trainable params: 5,382,762
Non-trainable params: 0
_________________________________________________________________


## 모델 테스트

In [301]:
# 학습된 모델을 사용하여 텍스트 생성
def generate_text(model1, start_string, num_generate):
  
  num_generate = 50 # 생성할 문자의 수

  # 시작 문자열을 숫자열로 변환
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0) # 2차원 배열로 변환

  text_generated = [] # 생성된 결과를 저장할 빈 문자열

  # temperature로 확률 값 조정 – 크면 균등분포, 낮으면 argmax와 같이 됨
  temperature = 1.0

  model1.reset_states()
  for i in range(num_generate):
      predictions = model1(input_eval) # 배치 크기 = 1
      predictions = tf.squeeze(predictions, 0) # 배치 차원 제거
      predictions = predictions / temperature # temperature 적용
    
    
      # 범주형 분포를 사용하여 모델에서 리턴한 단어 예측
      # input   : [batch_size, num_classes]  RNN sequence를 batch 형태로 입력
      # output : [batch_size, num_samples] [-1,0]는 마지막 batch 항목에서 첫번째로 sampling한 값을 의미
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # 예측된 단어를 다음 입력으로 모델에 전달
      input_eval = tf.expand_dims([predicted_id], 0) # 2차원 배열로 변환

      text_generated.append(idx2char[predicted_id]) # 생성된 문자열에 추가

  return (start_string + ''.join(text_generated))

In [387]:
lstm_text1 = generate_text(model1, start_string=u"Je suis ", num_generate=5)
print(lstm_text1)

Je suis tonnante:

--Venez les êtres d'une patodilition de


In [388]:
lstm_text2 = generate_text(model1, start_string=u"Il y a ", num_generate=5)
print(lstm_text2)

Il y a tir dans les bois. Au peuple, était de la
fumée et


In [37]:
f2 = open("C:/Users/ST-USER/Desktop/project_4/les_miserables.txt", 'r', encoding='utf-8')
data2 = f2.read()

In [38]:
print ('텍스트의 길이: {}자'.format(len(data2)))

텍스트의 길이: 3089743자


In [39]:
import string

In [40]:
def clean_doc(doc):
    # replace '--' with a space ' '
    doc = doc.replace('--', ' ')
    doc = doc.lower()
    return doc

In [41]:
data2 = clean_doc(data2)

# 좀 더 다듬은 GRU 모델

In [42]:
"".join(sorted(set(data2.lower())))

'\n !"\'()*+,-./0123456789:;?_abcdefghijklmnopqrstuvwxyz«°º»àâæçèéêëîïñôöùûü'

In [43]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(data2)

In [44]:
tokenizer.texts_to_sequences(["miserables"])

[[14, 6, 5, 2, 8, 3, 24, 10, 2, 5]]

In [45]:
tokenizer.sequences_to_texts([[14, 4, 6, 2, 9, 3, 25, 10, 2, 6]])

['m t i e u a h l e i']

In [46]:
max_id = len(tokenizer.word_index) # number of distinct characters
dataset_size = tokenizer.document_count # total number of characters

In [47]:
[encoded] = np.array(tokenizer.texts_to_sequences([data2])) - 1
train_size = dataset_size * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [48]:
n_steps = 100
window_length = n_steps + 1 # target = input shifted 1 character ahead
dataset = dataset.window(window_length, shift=1, drop_remainder=True)

In [49]:
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [50]:
np.random.seed(42)
tf.random.set_seed(42)

In [51]:
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [52]:
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [53]:
dataset = dataset.prefetch(1)

In [54]:
for X_batch, Y_batch in dataset.take(1):
    print(X_batch.shape, Y_batch.shape)

(32, 100, 73) (32, 100)


In [55]:
model2 = tf.keras.models.Sequential([
    tf.keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id],
                     #dropout=0.2, recurrent_dropout=0.2),
                     dropout=0.2),
    tf.keras.layers.GRU(128, return_sequences=True,
                     #dropout=0.2, recurrent_dropout=0.2),
                     dropout=0.2),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(max_id,
                                                    activation="softmax"))
])


In [56]:
model2.compile(loss="sparse_categorical_crossentropy", optimizer="adam")

In [59]:
checkpoint_dir2 = './training_checkpoints_full_text'
# 체크포인트 파일 이름
checkpoint_prefix2 = os.path.join(checkpoint_dir2, "ckpt_{epoch}")

checkpoint_callback2=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix2,
    save_weights_only=True)

In [463]:
model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 128)         77952     
_________________________________________________________________
gru_1 (GRU)                  (None, None, 128)         99072     
_________________________________________________________________
time_distributed (TimeDistri (None, None, 73)          9417      
Total params: 186,441
Trainable params: 186,441
Non-trainable params: 0
_________________________________________________________________


In [60]:
last_checkpoint2 = tf.train.latest_checkpoint(checkpoint_dir2)
print(last_checkpoint2)
model2.load_weights(last_checkpoint2)

./training_checkpoints_full_text\ckpt_10


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x233f1ba2c40>

In [61]:
# EPOCH = 10

# # 체크포인트 콜백 설정
# model.fit(dataset, epochs=EPOCH, callbacks=[checkpoint_callback])

In [62]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(X, max_id)

In [63]:
X_new = preprocess(["Je sui"])
#Y_pred = model.predict_classes(X_new)
Y_pred = np.argmax(model2(X_new), axis=-1)
tokenizer.sequences_to_texts(Y_pred + 1)[0][-1] # 1st sentence, last char

'v'

In [64]:
tf.random.set_seed(42)

tf.random.categorical([[np.log(0.5), np.log(0.4), np.log(0.1)]], num_samples=40).numpy()

array([[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
        2, 0, 0, 1, 1, 1, 0, 0, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0]],
      dtype=int64)

In [304]:
def next_char2(text, temperature=1):
    X_new = preprocess([text])
    y_proba = model2(X_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [305]:
tf.random.set_seed(42)

next_char2("Je sui", temperature=1)

'v'

In [306]:
def complete_text2(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char2(text, temperature)
    return text

In [307]:
tf.random.set_seed(42)

print(complete_text2("J", temperature=0.2))

Jean valjean se trouva le corps en percé de la rue.


In [308]:
tf.random.set_seed(42)

print(complete_text2("e", temperature=1))

e ne poccergent teus pied à la porte,
c'est terribl


In [389]:
tf.random.set_seed(42)

gru_text1 = complete_text2("Je suis", temperature=1)
print(gru_text1)

Je suis bongé, il est teusonne glande
par bout à la raiso


In [390]:
tf.random.set_seed(42)

gru_text2 = complete_text2("Il y a ", temperature=1)
print(gru_text2)

Il y a pas toccourement un signe d'un cachet. jean valjea


In [353]:
### new_model = tf.keras.models.Sequential([
#     tf.keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id],
#                      #dropout=0.2, recurrent_dropout=0.2),
#                      dropout=0.2),
#     tf.keras.layers.GRU(128, return_sequences=True,
#                      #dropout=0.2, recurrent_dropout=0.2),
#                      dropout=0.2),
#     tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(max_id,
#                                                     activation="softmax"))
# ])


In [77]:
# last_checkpoint2 = tf.train.latest_checkpoint(checkpoint_dir)
# print(last_checkpoint2)
# new_model.load_weights(last_checkpoint2)

In [78]:
# def next_char2(text, temperature=1):
#     X_new = preprocess([text])
#     y_proba = new_model(X_new)[0, -1:, :]
#     rescaled_logits = tf.math.log(y_proba) / temperature
#     char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
#     return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [79]:
# complete_text('Je suis')

# Stateful RNN

In [80]:
tf.random.set_seed(42)

In [81]:
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset = dataset.window(window_length, shift=n_steps, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))
dataset = dataset.batch(1)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))
dataset = dataset.prefetch(1)

In [82]:
batch_size = 32
encoded_parts = np.array_split(encoded[:train_size], batch_size)
datasets = []
for encoded_part in encoded_parts:
    dataset = tf.data.Dataset.from_tensor_slices(encoded_part)
    dataset = dataset.window(window_length, shift=n_steps, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_length))
    datasets.append(dataset)
dataset = tf.data.Dataset.zip(tuple(datasets)).map(lambda *windows: tf.stack(windows))
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))
dataset = dataset.prefetch(1)

In [83]:
model3 = tf.keras.models.Sequential([
    tf.keras.layers.GRU(128, return_sequences=True, stateful=True,
                     #dropout=0.2, recurrent_dropout=0.2,
                     dropout=0.2,
                     batch_input_shape=[batch_size, None, max_id]),
    tf.keras.layers.GRU(128, return_sequences=True, stateful=True,
                     #dropout=0.2, recurrent_dropout=0.2),
                     dropout=0.2),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(max_id,
                                                    activation="softmax"))
])

In [84]:
class ResetStatesCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [None]:
# model3.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
# history = model3.fit(dataset, epochs=50,
#                     callbacks=[ResetStatesCallback()])

In [464]:
model3.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_4 (GRU)                  (None, None, 128)         77952     
_________________________________________________________________
gru_5 (GRU)                  (None, None, 128)         99072     
_________________________________________________________________
time_distributed_2 (TimeDist (None, None, 73)          9417      
Total params: 186,441
Trainable params: 186,441
Non-trainable params: 0
_________________________________________________________________


In [86]:
# diff batch size
stateless_model = tf.keras.models.Sequential([
    tf.keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id]),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(max_id,
                                                    activation="softmax"))
])

In [87]:
stateless_model.build(tf.TensorShape([None, None, max_id]))

In [88]:
stateless_model.load_weights('stateless_model.h5')

In [89]:
# stateless_model.set_weights(model3.get_weights())
model3 = stateless_model

In [311]:
def next_char3(text, temperature=1):
    X_new = preprocess([text])
    y_proba = model3(X_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [312]:
def complete_text3(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char3(text, temperature)
    return text

In [313]:
tf.random.set_seed(42)

print(complete_text3("t"))

t-il pocricle _pour ayant l'autre
chose. moite avai


In [391]:
tf.random.set_seed(42)

state_text1 = complete_text3("Je suis ")
print(state_text1)

Je suis pas locricle de
cours _dans les jours.

   t il y 


In [392]:
tf.random.set_seed(42)

state_text2 = complete_text3("Il y a ")
print(state_text2)

Il y a pas pocricle de
cosette grande la porte qui lua so


In [345]:
# model_state.save_weights('stateless_model.h5')

------------------------

# 기계번역

# 학습 가능한 형태로 코퍼스를 변형

In [157]:
num_samples = 190000 # 20000

In [158]:
with open('fra.txt', 'r', encoding='utf-8') as f:
    lines = f.read().split("\n")[:-1]
text_pairs = []
for line in lines:
    eng, fra = line.split("\t")
    eng = "[start] " + eng + " [end]"
    text_pairs.append((eng, fra))

In [159]:
for _ in range(5):
    print(random.choice(text_pairs))

("[start] Tom's new girlfriend is quite attractive. [end]", 'La nouvelle copine de Tom est plutôt séduisante.')
("[start] I haven't seen you around before. [end]", "Je ne t'ai pas vu auparavant dans les environs.")
('[start] They were afraid of being overheard. [end]', "Elles ont craint d'être écoutées.")
("[start] I'd like you to mail this letter. [end]", "J'aimerais que tu postes cette lettre.")
("[start] I'm not sure that that's true. [end]", 'Je ne suis pas sûr que ça soit vrai.')


In [160]:
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples : num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples :]

print(f"{len(text_pairs)} total pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(val_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")

167130 total pairs
116992 training pairs
25069 validation pairs
25069 test pairs


In [161]:
strip_chars = string.punctuation + "?"
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

vocab_size = 15000
sequence_length = 20
batch_size = 64


def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")


fra_vectorization = TextVectorization(
    max_tokens=vocab_size, output_mode="int", output_sequence_length=sequence_length,
)
eng_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length + 1,
    standardize=custom_standardization,
)
train_eng_texts = [pair[0] for pair in train_pairs]
train_fra_texts = [pair[1] for pair in train_pairs]
eng_vectorization.adapt(train_eng_texts)
fra_vectorization.adapt(train_fra_texts)

In [162]:
def format_dataset(eng, fra):
    eng = eng_vectorization(eng)
    fra = fra_vectorization(fra)
    return ({"encoder_inputs": fra, "decoder_inputs": eng[:, :-1],}, eng[:, 1:])


def make_dataset(pairs):
    eng_texts, fra_texts = zip(*pairs)
    eng_texts = list(eng_texts)
    fra_texts = list(fra_texts)
    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, fra_texts))
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(format_dataset)
    return dataset.shuffle(2048).prefetch(16).cache()


train_ds = make_dataset(train_pairs)
val_ds = make_dataset(val_pairs)

In [163]:
for inputs, targets in train_ds.take(1):
    print(f'inputs["encoder_inputs"].shape: {inputs["encoder_inputs"].shape}')
    print(f'inputs["decoder_inputs"].shape: {inputs["decoder_inputs"].shape}')
    print(f"targets.shape: {targets.shape}")

inputs["encoder_inputs"].shape: (64, 20)
inputs["decoder_inputs"].shape: (64, 20)
targets.shape: (64, 20)


# 트랜스포머

In [164]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super(TransformerEncoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.supports_masking = True
        
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'embed_dim': self.embed_dim,
            'dense_dim': self.dense_dim,
            'num_heads': self.num_heads,
        })
        return config

    def call(self, inputs, mask=None):
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, tf.newaxis, :], dtype="int32")
        attention_output = self.attention(
            query=inputs, value=inputs, key=inputs, attention_mask=padding_mask
        )
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)


class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'vocab_size': self.vocab_size,
            'sequence_length': self.sequence_length,
            'embed_dim': self.embed_dim,
        })
        return config

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)


class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
        super(TransformerDecoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [layers.Dense(latent_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'embed_dim': self.embed_dim,
            'latent_dim': self.latent_dim,
            'num_heads': self.num_heads,
        })
        return config

    
    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs)
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32")
            padding_mask = tf.minimum(padding_mask, causal_mask)

        attention_output_1 = self.attention_1(
            query=inputs, value=inputs, key=inputs, attention_mask=causal_mask
        )
        out_1 = self.layernorm_1(inputs + attention_output_1)

        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
        )
        out_2 = self.layernorm_2(out_1 + attention_output_2)

        proj_output = self.dense_proj(out_2)
        return self.layernorm_3(out_2 + proj_output)

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
            axis=0,
        )
        return tf.tile(mask, mult)

In [165]:
embed_dim = 256
latent_dim = 2048
num_heads = 8

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="encoder_inputs")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)
encoder_outputs = TransformerEncoder(embed_dim, latent_dim, num_heads)(x)
encoder = keras.Model(encoder_inputs, encoder_outputs)

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="decoder_inputs")
encoded_seq_inputs = keras.Input(shape=(None, embed_dim), name="decoder_state_inputs")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, encoded_seq_inputs)
x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)
decoder = keras.Model([decoder_inputs, encoded_seq_inputs], decoder_outputs)

decoder_outputs = decoder([decoder_inputs, encoder_outputs])
transformer = keras.Model(
    [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer"
)

# 학습

In [166]:
epochs = 30  # This should be at least 30 for convergence

transformer.summary()
transformer.compile(
    "rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

Model: "transformer"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     [(None, None)]       0                                            
__________________________________________________________________________________________________
positional_embedding_4 (Positio (None, None, 256)    3845120     encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     [(None, None)]       0                                            
__________________________________________________________________________________________________
transformer_encoder_2 (Transfor (None, None, 256)    3155456     positional_embedding_4[0][0]     
________________________________________________________________________________________

In [167]:
checkpoint_dir4 = './translator_checkpoints2'
# 체크포인트 파일 이름
checkpoint_prefix4 = os.path.join(checkpoint_dir4, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix4,
    save_weights_only=True,
    save_freq = 10)


transformer.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=[checkpoint_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x233f1dd7760>

In [168]:
last_checkpoint4 = tf.train.latest_checkpoint(checkpoint_dir4)
print(last_checkpoint4)
transformer.load_weights(last_checkpoint4)

./translator_checkpoints2\ckpt_30


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x233f1dd7460>

# 문장 디코딩

In [169]:
eng_vocab = eng_vectorization.get_vocabulary()
eng_index_lookup = dict(zip(range(len(eng_vocab)), eng_vocab))
max_decoded_sentence_length = 20


def decode_sequence(input_sentence):
    tokenized_input_sentence = fra_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = eng_vectorization([decoded_sentence])[:, :-1]
        predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])

        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = eng_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token

        if sampled_token == "[end]":
            break
    return decoded_sentence


test_fra_texts = [pair[1] for pair in test_pairs]
for _ in range(5):
    input_sentence = random.choice(test_fra_texts)
    translated = decode_sequence(input_sentence)

In [170]:
decode_sequence("trois poem")

'[start] three [end]'

In [171]:
from tensorflow.keras.models import load_model
transformer.save_weights('translator_1')
# new_model = tf.keras.models.load_model('iris.h5')

In [172]:
transformer_last = keras.Model(
    [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer_last"
)

In [173]:
transformer_last.load_weights("translator_1")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2359d4236a0>

In [174]:
transformer_last.summary()

Model: "transformer_last"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     [(None, None)]       0                                            
__________________________________________________________________________________________________
positional_embedding_4 (Positio (None, None, 256)    3845120     encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     [(None, None)]       0                                            
__________________________________________________________________________________________________
transformer_encoder_2 (Transfor (None, None, 256)    3155456     positional_embedding_4[0][0]     
___________________________________________________________________________________

In [175]:
def decode_sequence2(input_sentence):
    tokenized_input_sentence = fra_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = eng_vectorization([decoded_sentence])[:, :-1]
        predictions = transformer_last([tokenized_input_sentence, tokenized_target_sentence])

        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = eng_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token

        if sampled_token == "[end]":
            break
    return decoded_sentence


test_fra_texts = [pair[1] for pair in test_pairs]
for _ in range(5):
    input_sentence = random.choice(test_fra_texts)
    translated = decode_sequence2(input_sentence)

In [176]:
decode_sequence2("cherche la femme")

'[start] thank you the woman [end]'

In [197]:
decode_sequence2("comment te dire adieu")

'[start] how do you say [end]'

In [222]:
decode_sequence2("c'est a vie")

'[start] its life [end]'

# 문장 생성과 번역

In [393]:
lstm_text1

"Je suis tonnante:\n\n--Venez les êtres d'une patodilition de"

In [410]:
lstm_text1 = lstm_text1.replace("\n", ' ')
lstm_text1 = lstm_text1.replace(":", '')
lstm_text1 = lstm_text1.replace(",", '')
lstm_text1 = lstm_text1.replace(";", '')
lstm_text1 = lstm_text1.replace("?", ' ')
lstm_text1 = lstm_text1.replace("-", ' ')
lstm_text1

"Je suis tonnante    Venez les êtres d'une patodilition de"

In [425]:
lstm_trans1 = "I'm amazing Come the beings of a patodilition of"

In [441]:
lstm_result1 = decode_sequence2(lstm_text1)
lstm_result1

'[start] i am a [end]'

In [396]:
lstm_text2

'Il y a tir dans les bois. Au peuple, était de la\nfumée et'

In [397]:
lstm_text2 = lstm_text2.replace("\n", ' ')
lstm_text2 = lstm_text2.replace(":", '')
lstm_text2 = lstm_text2.replace(",", '')
lstm_text2 = lstm_text2.replace(";", '')
lstm_text2

'Il y a tir dans les bois. Au peuple était de la fumée et'

In [426]:
lstm_trans2 = "There is shooting in the woods. To the people was smoke and"

In [442]:
lstm_result2 = decode_sequence2(lstm_text2)
lstm_result2

'[start] there are have have have a good time at the to get used to the people [end]'

In [399]:
gru_text1

'Je suis bongé, il est teusonne glande\npar bout à la raiso'

In [400]:
gru_text1 = gru_text1.replace("\n", ' ')
gru_text1 = gru_text1.replace(":", '')
gru_text1 = gru_text1.replace(",", '')
gru_text1 = gru_text1.replace(";", '')
gru_text1

'Je suis bongé il est teusonne glande par bout à la raiso'

In [427]:
gru_trans1 = "I'm good, he's a bit of a pain at the end of the day"

In [443]:
gru_result1 = decode_sequence2(gru_text1)
gru_result1

'[start] i was a [end]'

In [402]:
gru_text2 = gru_text2.replace("\n", ' ')
gru_text2 = gru_text2.replace(":", '')
gru_text2 = gru_text2.replace(",", '')
gru_text2 = gru_text2.replace(";", '')
gru_text2

"Il y a pas toccourement un signe d'un cachet. jean valjea"

In [428]:
gru_trans2 = "There is not toccourement a sign of a cachet. jean valjea" 

In [444]:
gru_result2 = decode_sequence2(gru_text2)
gru_result2

'[start] there is no the [end]'

In [404]:
state_text1

'Je suis pas locricle de\ncours _dans les jours.\n\n   t il y '

In [405]:
state_text1 = state_text1.replace("\n", ' ')
state_text1 = state_text1.replace(":", '')
state_text1 = state_text1.replace(",", '')
state_text1 = state_text1.replace(";", '')
state_text1 = state_text1.replace("_", '')
state_text1

'Je suis pas locricle de cours dans les jours.     t il y '

In [429]:
state_trans1 = "I don't follow the course during the day. t there"

In [445]:
state_result1 = decode_sequence2(state_text1)
state_result1

'[start] i dont go to class at the days there are [end]'

In [407]:
state_text2

'Il y a pas pocricle de\ncosette grande la porte qui lua so'

In [408]:
state_text2 = state_text2.replace("\n", ' ')
state_text2 = state_text2.replace(":", ' ')
state_text2 = state_text2.replace(",", ' ')
state_text2 = state_text2.replace(";", ' ')
state_text2

'Il y a pas pocricle de cosette grande la porte qui lua so'

In [430]:
state_trans2 = "There is no grand cosette pocricle the door that lua so"

In [446]:
state_result2 = decode_sequence2(state_text2)
state_result2

'[start] there is no [end]'

In [447]:
lstm_text3 = generate_text(model1, start_string=u"Je", num_generate=5)
#print(lstm_text3)

lstm_text3 = lstm_text3.replace("\n", ' ')
lstm_text3 = lstm_text3.replace(":", ' ')
lstm_text3 = lstm_text3.replace(",", ' ')
lstm_text3 = lstm_text3.replace(";", ' ')
print(lstm_text3)
lstm_result3 = decode_sequence2(lstm_text3)
print(lstm_result3)

Je per qu'il avait que l'idée de génie à convoit du 
[start] i had that he a blue idea down from the [end]


In [438]:
lstm_trans3 = "I perceive that he had only the idea of genius to covet the"

In [448]:
tf.random.set_seed(42)

gru_text3 = complete_text2("Je", temperature=1)
#print(gru_text3)
gru_text3 = gru_text3.replace("\n", ' ')
gru_text3 = gru_text3.replace(":", ' ')
gru_text3 = gru_text3.replace(",", ' ')
gru_text3 = gru_text3.replace(";", ' ')
print(gru_text3)
gru_result3 = decode_sequence2(gru_text3)
print(gru_result3)

Je ne poccienne pas son ligne de la porte et terribl
[start] i wont get his line door and closed [end]


In [432]:
gru_trans3 = 'I do not poccienne its line of the door and terribl'

In [449]:
tf.random.set_seed(42)

state_text3 = complete_text3("Je")
#print(state_text3)
state_text3 = state_text3.replace("\n", '')
state_text3 = state_text3.replace(":", ' ')
state_text3 = state_text3.replace(",", ' ')
state_text3 = state_text3.replace(";", ' ')
print(state_text3)
state_result3 = decode_sequence2(state_text3)
print(state_result3)

Je n'avocaile dite que la larre na poussa dit  armo
[start] i gave the the the the the [end]


In [434]:
state_text3

"Je n'avocaile dite que la larre na poussa dit  armo"

In [439]:
state_trans3 = "I only avocado said that the larre na pushed said armo"

In [423]:
import nltk.translate.bleu_score as bleu

In [450]:
bleu.sentence_bleu(lstm_trans1.split(), lstm_result1.split())

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


1.4488496539373276e-231

In [468]:
bleu.sentence_bleu(lstm_trans2.split(), lstm_result2.split())

8.844844403089352e-232

In [452]:
bleu.sentence_bleu(lstm_trans3.split(), lstm_result3.split())

1.1640469867513693e-231

In [453]:
bleu.sentence_bleu(gru_trans1.split(), gru_result1.split())

1.4488496539373276e-231

In [466]:
bleu.sentence_bleu(gru_result2.split(),gru_trans2.split())

1.0003688322288243e-231

In [455]:
bleu.sentence_bleu(gru_trans3.split(), gru_result3.split())

1.0244914152188952e-231

In [469]:
bleu.sentence_bleu(state_trans1.split(), state_result1.split())

1.0244914152188952e-231

In [457]:
bleu.sentence_bleu(state_trans2.split(), state_result2.split())

0

In [458]:
bleu.sentence_bleu(state_trans3.split(), state_result3.split())

1.0518351895246305e-231

In [459]:
tf.__version__

'2.6.0'

In [461]:
import sys

print("-sys.version—")
print(sys.version)

-sys.version—
3.8.12 (default, Oct 12 2021, 03:01:40) [MSC v.1916 64 bit (AMD64)]


In [462]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Mon_Sep_13_20:11:50_Pacific_Daylight_Time_2021
Cuda compilation tools, release 11.5, V11.5.50
Build cuda_11.5.r11.5/compiler.30411180_0
