In [14]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [15]:
data = pd.read_csv(
    "../data/LJSpeech-1.1/metadata.csv",
    sep="|",
    header=None,
    names=["ID", "Text1", "Text2"],
)
texts = data["Text1"].to_list()
ID = data["ID"].to_list()
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
num_classes = len(tokenizer.word_index) + 1  # Add 1 for the padding token
sequences = tokenizer.texts_to_sequences(texts)
Y_data = pad_sequences(sequences, padding="post", maxlen=30)

In [16]:
def scaled_dot_product(q, k, v, mask):
    d_k = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_qk = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(d_k)

    if mask is not None:
        scaled_qk += mask

    attention_weights = tf.nn.softmax(scaled_qk)
    output = tf.matmul(attention_weights, v)
    return output, attention_weights

In [17]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads
        self.qkv_layer = tf.keras.layers.Dense(3 * d_model, use_bias=False)
        self.linear_layer = tf.keras.layers.Dense(d_model, activation='relu')

    def split_heads(self, x, batch_size):
        if len(x.shape) == 2:
            x = tf.expand_dims(tf.expand_dims(x, axis=0), axis=1)
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.head_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, x, mask):
        batch_size, _, _ = x.shape

        qkv = self.qkv_layer(x)
        q, k, v = tf.split(qkv, 3, axis=-1)
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        values, attention = scaled_dot_product(q, k, v, mask)

        values = tf.transpose(values, perm=[0, 2, 1, 3])
        values = tf.reshape(values, (batch_size, -1, self.num_heads * self.head_dim))
        out = self.linear_layer(values)
        return out

In [18]:
class PositionwiseFeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, hidden, drop_prob=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.linear1 = tf.keras.layers.Dense(hidden)
        self.linear2 = tf.keras.layers.Dense(d_model)
        self.relu = tf.keras.layers.ReLU()
        self.dropout = tf.keras.layers.Dropout(rate=drop_prob)

    
    def call(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x

In [19]:
class PostionalEmbedding(tf.keras.Model):
    def __init__(self,vocab_size=num_classes,embedding_dim=64):
        super(PostionalEmbedding,self).__init__()
        self.embedding = layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=30)
    
    def call(self,input,training=None):
        output = self.embedding(input)
        return output



In [20]:
class DecoderLayer(tf.keras.Model):
    def __init__(self, vocab_size,d_model,num_heads, embedding_dim,dropout_rate=0.1):
        super(DecoderLayer, self).__init__()
        self.embedding = PostionalEmbedding(vocab_size=num_classes, embedding_dim=64)
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.multihead_attention = MultiHeadAttention(d_model, num_heads)

    def call(self, inputs,training=None,mask=None):
        x = self.embedding(inputs)
        x_att = self.multihead_attention(x, mask)
        x = x + x_att
        x = self.dropout(x)
        return x

In [21]:
BATCH_SIZE = 100
dataset = tf.data.Dataset.from_tensor_slices((Y_data))
dataset = dataset.batch(BATCH_SIZE) 

In [22]:
decoder = DecoderLayer(vocab_size=num_classes, d_model=64, num_heads=8,embedding_dim=64)
for batch_X in dataset:
    output = decoder(batch_X, training=True)
    print("Decoder output shape for a batch:", output.shape)

Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 30, 64)
Decoder output shape for a batch: (100, 