In [119]:
import tensorflow as tf
print(tf.__version__)
tf.random.set_seed(1234)

!pip install tensorflow-datasets==1.2.0

import tensorflow_datasets as tfds
import os
import re
import numpy as np
import matplotlib.pyplot as plt

2.10.0


In [120]:
path_to_zip = tf.keras.utils.get_file(
    "cornell_movie_dialogs.zip",
    origin='https://www.kaggle.com/api/v1/datasets/download/soumikrakshit/cornell-movie-dialogs-corpus',
    extract=True
)
path_to_data = os.path.join(os.path.dirname(path_to_zip), "cornell movie-dialogs corpus")

path_to_movie_lines = os.path.join(path_to_data, "movie_lines.txt")
path_to_movie_conversations = os.path.join(path_to_data, "movie_conversations.txt")

print("Dataset downloaded and extracted.")

Dataset downloaded and extracted.


In [121]:
path_to_data

'C:\\Users\\erand\\.keras\\datasets\\cornell movie-dialogs corpus'

In [122]:
MAX_SAMPLE = 50000

def preprocess_sentence(sentence):
    sentence = sentence.lower().strip()
    sentence = re.sub(r"([?.!,])", r" \1", sentence)
    sentence = re.sub(r'[" "]+', " ", sentence)
    sentence = re.sub(r"[^a-zA-Z?.,!]+", " ", sentence)
    return sentence.strip()

def load_conversations():
    id2line = {}
    with open(path_to_movie_lines, errors="ignore") as file:
        lines = file.readlines()
    for line in lines:
        parts = line.replace("\n", "").split(" +++$+++ ")
        id2line[parts[0]] = parts[4]

    inputs, outputs = [], []
    with open(path_to_movie_conversations, "r") as file:
        lines = file.readlines()
    for line in lines:
        parts = line.replace("\n", "").split(" +++$+++ ")
        conversation = [line[1:-1] for line in parts[3][1:-1].split(", ")]
        for i in range(len(conversation) - 1):
            inputs.append(preprocess_sentence(id2line[conversation[i]]))
            outputs.append(preprocess_sentence(id2line[conversation[i + 1]]))
            if len(inputs) >= MAX_SAMPLE:
                return inputs, outputs
    return inputs, outputs



In [123]:
questions, answers = load_conversations()

In [124]:
len(questions)


50000

In [125]:
len(answers)

50000

In [126]:
print("Q : {}".format(questions[10]))
print("A : {}".format(answers[10]))

Q : c esc ma tete . this is my head
A : right . see ? you re ready for the quiz .


In [127]:
tokenizer = tfds.features.text.SubwordTextEncoder.build_from_corpus(
    questions+answers,target_vocab_size=2**13
)

START_TOKEN, END_TOKEN = [tokenizer.vocab_size],[tokenizer.vocab_size+1]
VOCAB_SIZE = tokenizer.vocab_size + 2

In [128]:
print("Tokenized Q : {}".format(tokenizer.encode(questions[30])))

Tokenized Q : [114, 15, 8, 54, 5, 112, 9, 210, 1]


In [129]:
def tokenize_and_filter(inputs,outputs):
    MAX_LENGTH = 40  # Example value
    tokenized_inputs,tokenized_outputs = [],[]

    for (sentence1,sentence2) in zip(inputs,outputs):
        sentence1 = START_TOKEN+tokenizer.encode(sentence1)+END_TOKEN
        sentence2 = START_TOKEN+tokenizer.encode(sentence2)+END_TOKEN
        if len(sentence1) <= MAX_LENGTH and len(sentence2) <=MAX_LENGTH:
            tokenized_inputs.append(sentence1)
            tokenized_outputs.append(sentence2)

    tokenized_inputs = tf.keras.preprocessing.sequence.pad_sequences(tokenized_inputs,maxlen=MAX_LENGTH,padding="post")
    tokenized_outputs = tf.keras.preprocessing.sequence.pad_sequences(tokenized_outputs,maxlen=MAX_LENGTH,padding="post")

    return tokenized_inputs,tokenized_outputs

questions,answers = tokenize_and_filter(questions,answers)


In [130]:
len(questions)

44098

In [131]:
VOCAB_SIZE

8334

In [132]:
BATCH_SIZE = 64
BUFFER_SIZE = 20000

dataset = tf.data.Dataset.from_tensor_slices((
    {
        "inputs": questions,
        "dec_inputs": answers[:,:-1]
    },
    {
        "outputs:": answers[:,1:]
    }
))
dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [133]:
def scaled_dot_product_attention(query, key, value, mask=None):
    matmul_qk = tf.matmul(query, key, transpose_b=True)  # Q * K^T
    dk = tf.cast(tf.shape(key)[-1], tf.float32)  # Dimension of key
    scaled_attention_logits = matmul_qk / tf.sqrt(dk)  # Scale the attention logits

    if mask is not None:
        scaled_attention_logits += (mask * -1e9)  # Apply mask

    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # Softmax along the last axis

    output = tf.matmul(attention_weights, value)  # Apply attention weights to the values
    return output, attention_weights


In [139]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, name="multi_head_attention"):
        super(MultiHeadAttention, self).__init__(name=name)
        self.num_heads = num_heads
        self.d_model = d_model
        assert d_model % self.num_heads == 0
        self.depth = d_model // self.num_heads
        self.query_dense = tf.keras.layers.Dense(units=d_model)
        self.key_dense = tf.keras.layers.Dense(units=d_model)
        self.value_dense = tf.keras.layers.Dense(units=d_model)
        self.dense = tf.keras.layers.Dense(units=d_model)

    def split_heads(self, inputs, batch_size):
        inputs = tf.reshape(inputs, shape=(batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(inputs, perm=[0, 2, 1, 3])

    def call(self, inputs):
        query, key, value, mask = inputs["query"], inputs["key"], inputs["value"], inputs["mask"]
        batch_size = tf.shape(query)[0]
        query = self.query_dense(query)
        key = self.key_dense(key)
        value = self.value_dense(value)
        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)
        scaled_attention = scaled_dot_product_attention(query, key, value, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        output = self.dense(concat_attention)
        return output


In [140]:
class Transformer(tf.keras.Model):
    def __init__(self, vocab_size, d_model, num_heads, num_layers, hidden_units, dropout_rate):
        super(Transformer, self).__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.hidden_units = hidden_units

        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)
        self.encoder_layers = [MultiHeadAttention(d_model, num_heads) for _ in range(num_layers)]
        self.decoder_layers = [MultiHeadAttention(d_model, num_heads) for _ in range(num_layers)]
        self.dense = tf.keras.layers.Dense(hidden_units, activation='relu')
        self.final_layer = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, mask=None):
        x = self.embedding(inputs["inputs"])

        for layer in self.encoder_layers:
            x = layer({"query": x, "key": x, "value": x, "mask": mask})

        decoder_input = self.embedding(inputs["dec_inputs"])

        for layer in self.decoder_layers:
            decoder_input = layer({"query": decoder_input, "key": x, "value": x, "mask": mask})

        output = self.dense(decoder_input)
        output = self.final_layer(output)

        return output

In [142]:
model = Transformer(vocab_size=VOCAB_SIZE, d_model=256, num_heads=8, num_layers=4, hidden_units=1024, dropout_rate=0.1)

In [143]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [144]:
EPOCHS = 20
model.fit(dataset , epochs=EPOCHS)

Epoch 1/20


ValueError: in user code:

    File "C:\Users\erand\anaconda3\envs\tf_env\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\erand\anaconda3\envs\tf_env\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\erand\anaconda3\envs\tf_env\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\erand\anaconda3\envs\tf_env\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\erand\anaconda3\envs\tf_env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\erand\AppData\Local\Temp\__autograph_generated_filersye9ze1.py", line 24, in tf__call
        ag__.for_stmt(ag__.ld(self).encoder_layers, None, loop_body, get_state, set_state, ('x',), {'iterate_names': 'layer'})
    File "C:\Users\erand\AppData\Local\Temp\__autograph_generated_filersye9ze1.py", line 22, in loop_body
        x = ag__.converted_call(ag__.ld(layer), ({'query': ag__.ld(x), 'key': ag__.ld(x), 'value': ag__.ld(x), 'mask': ag__.ld(mask)},), None, fscope)
    File "C:\Users\erand\AppData\Local\Temp\__autograph_generated_file06cu0vok.py", line 19, in tf__call
        scaled_attention = ag__.converted_call(ag__.ld(tf).transpose, (ag__.ld(scaled_attention),), dict(perm=[0, 2, 1, 3]), fscope)

    ValueError: Exception encountered when calling layer "transformer_3" "                 f"(type Transformer).
    
    in user code:
    
        File "C:\Users\erand\AppData\Local\Temp\ipykernel_15548\3606762752.py", line 19, in call  *
            x = layer({"query": x, "key": x, "value": x, "mask": mask})
        File "C:\Users\erand\anaconda3\envs\tf_env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\erand\AppData\Local\Temp\__autograph_generated_file06cu0vok.py", line 19, in tf__call
            scaled_attention = ag__.converted_call(ag__.ld(tf).transpose, (ag__.ld(scaled_attention),), dict(perm=[0, 2, 1, 3]), fscope)
    
        ValueError: Exception encountered when calling layer "multi_head_attention" "                 f"(type MultiHeadAttention).
        
        in user code:
        
            File "C:\Users\erand\AppData\Local\Temp\ipykernel_15548\2767108848.py", line 27, in call  *
                scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        
            ValueError: Dimension must be 5 but is 4 for '{{node transformer_3/multi_head_attention/transpose_3}} = Transpose[T=DT_FLOAT, Tperm=DT_INT32](transformer_3/multi_head_attention/transpose_3/a, transformer_3/multi_head_attention/transpose_3/perm)' with input shapes: [2,?,8,?,32], [4].
        
        
        Call arguments received by layer "multi_head_attention" "                 f"(type MultiHeadAttention):
          • inputs={'query': 'tf.Tensor(shape=(None, 40, 256), dtype=float32)', 'key': 'tf.Tensor(shape=(None, 40, 256), dtype=float32)', 'value': 'tf.Tensor(shape=(None, 40, 256), dtype=float32)', 'mask': 'None'}
    
    
    Call arguments received by layer "transformer_3" "                 f"(type Transformer):
      • inputs={'inputs': 'tf.Tensor(shape=(None, 40), dtype=int32)', 'dec_inputs': 'tf.Tensor(shape=(None, 39), dtype=int32)'}
      • mask=None
