In [43]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
import re
import json
import gc
from langdetect import detect
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import Sequence
from keras import mixed_precision
from keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding


# Enable eager execution in TensorFlow
tf.compat.v1.enable_eager_execution()


In [44]:
# Configure TensorFlow GPU settings
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            print(f"Enabled memory growth for GPU: {gpu}")
    except RuntimeError as e:
        print(f"Failed to set memory growth: {e}")  # Memory growth must be set before GPUs have been initialized
else:
    print("No GPUs found.")

In [45]:
# Import the dataset
data_path = "E:/archive (9)/all_songs_data.csv"
data = pd.read_csv(data_path, sep=',')

# Display the first few rows of the dataframe
data.head()

Unnamed: 0,Album,Album URL,Artist,Featured Artists,Lyrics,Media,Rank,Release Date,Song Title,Song URL,Writers,Year
0,Battle of New Orleans,https://genius.com/albums/Johnny-horton/Battle...,Johnny Horton,[],[Verse 1] In 1814 we took a little trip Along ...,[{'native_uri': 'spotify:track:0dwpdcQkeZqpuoA...,1,1959-04-01,The Battle Of New Orleans,https://genius.com/Johnny-horton-the-battle-of...,"[{'api_path': '/artists/561913', 'header_image...",1959.0
1,That’s All,https://genius.com/albums/Bobby-darin/That-s-all,Bobby Darin,[],"Oh the shark, babe Has such teeth, dear And he...",[{'native_uri': 'spotify:track:3E5ndyOfO6vFDEI...,2,,Mack The Knife,https://genius.com/Bobby-darin-mack-the-knife-...,"[{'api_path': '/artists/218851', 'header_image...",1959.0
2,“Mr Personality’s” 15 Big Hits,https://genius.com/albums/Lloyd-price/Mr-perso...,Lloyd Price,[],Over and over I tried to prove my love to you ...,"[{'provider': 'youtube', 'start': 0, 'type': '...",3,,Personality,https://genius.com/Lloyd-price-personality-lyrics,"[{'api_path': '/artists/355804', 'header_image...",1959.0
3,The Greatest Hits Of Frankie Avalon,https://genius.com/albums/Frankie-avalon/The-g...,Frankie Avalon,[],"Hey, Venus! Oh, Venus! Venus, if you will Ple...",[],4,,Venus,https://genius.com/Frankie-avalon-venus-lyrics,"[{'api_path': '/artists/1113175', 'header_imag...",1959.0
4,Paul Anka Sings His Big 15,https://genius.com/albums/Paul-anka/Paul-anka-...,Paul Anka,[],I'm just a lonely boy Lonely and blue I'm all ...,[],5,,Lonely Boy,https://genius.com/Paul-anka-lonely-boy-lyrics,[],1959.0
...,...,...,...,...,...,...,...,...,...,...,...,...
6495,BZRP Music Sessions,https://genius.com/albums/Bizarrap/Bzrp-music-...,Bizarrap and Shakira,[],"[Intro: Shakira] (Pa' tipos como tú, uh-uh-uh-...","[{'provider': 'youtube', 'start': 0, 'type': '...",96,2023-01-11,"Bzrp Music Sessions, Vol. 53",https://genius.com/Bizarrap-and-shakira-shakir...,"[{'api_path': '/artists/1405', 'header_image_u...",2023.0
6496,Travis Scott - UTOPIA (Русский перевод),https://genius.com/albums/Genius-russian-trans...,Travis Scott featuring Drake,[],[Часть I] [Интро: Drake] Е Напряжение точно р...,"[{'provider': 'youtube', 'start': 0, 'type': '...",97,2023-07-28,Meltdown,https://genius.com/Genius-russian-translations...,[],2023.0
6497,L3*,https://genius.com/albums/Latto/L3,Latto featuring Cardi B,"[{'api_path': '/artists/621678', 'header_image...",(Go Grizz) Ah (Squat made the beat) What's hap...,"[{'provider': 'youtube', 'start': 0, 'type': '...",98,2023-06-02,Put It on da Floor Again,https://genius.com/Latto-put-it-on-da-floor-ag...,"[{'api_path': '/artists/163578', 'header_image...",2023.0
6498,Born This Way,https://genius.com/albums/Lady-gaga/Born-this-way,Lady Gaga,[],Money [Verse 1] Love is just a history that t...,[{'native_uri': 'spotify:track:11BKm0j4eYoCPPp...,99,2011-05-23,Bloody Mary,https://genius.com/Lady-gaga-bloody-mary-lyrics,"[{'api_path': '/artists/65581', 'header_image_...",2023.0


In [46]:
# Drop unnecessary columns from the dataframe
columns_to_drop = ['Album', 'Album URL', 'Artist', 'Featured Artists', 'Media', 
                   'Rank', 'Release Date', 'Song URL', 'Writers', 'Year']
df = data.drop(columns=columns_to_drop, axis=1)

# Display the first few rows of the modified dataframe
df.head()

Unnamed: 0,Lyrics,Song Title
0,[Verse 1] In 1814 we took a little trip Along ...,The Battle Of New Orleans
1,"Oh the shark, babe Has such teeth, dear And he...",Mack The Knife
2,Over and over I tried to prove my love to you ...,Personality
3,"Hey, Venus! Oh, Venus! Venus, if you will Ple...",Venus
4,I'm just a lonely boy Lonely and blue I'm all ...,Lonely Boy
...,...,...
6495,"[Intro: Shakira] (Pa' tipos como tú, uh-uh-uh-...","Bzrp Music Sessions, Vol. 53"
6496,[Часть I] [Интро: Drake] Е Напряжение точно р...,Meltdown
6497,(Go Grizz) Ah (Squat made the beat) What's hap...,Put It on da Floor Again
6498,Money [Verse 1] Love is just a history that t...,Bloody Mary


In [47]:
# Delete the original data dataframe to free up memory
del data

# Run garbage collection to reclaim memory
gc.collect()

2410

In [48]:
# Function to detect if the language of the text is English
def detect_language(text):
    try:
        # Detect the language of the text
        lang = detect(text)
        # Return True if the detected language is English, False otherwise
        return lang == 'en'
    except:
        # Return False if language detection fails
        return False

# Apply the function to detect language for each row in the 'Lyrics' column
english_lyrics_mask = df['Lyrics'].apply(detect_language)

# Filter the dataframe to keep only rows with English lyrics
df = df[english_lyrics_mask]

# Display the resulting dataframe
print(df)

                                                 Lyrics  \
0     [Verse 1] In 1814 we took a little trip Along ...   
1     Oh the shark, babe Has such teeth, dear And he...   
2     Over and over I tried to prove my love to you ...   
3     Hey, Venus! Oh, Venus!  Venus, if you will Ple...   
4     I'm just a lonely boy Lonely and blue I'm all ...   
...                                                 ...   
6493  This how it sound when I hit your ho Be like t...   
6494  When it comes to hitchin' the boat up Backin' ...   
6497  (Go Grizz) Ah (Squat made the beat) What's hap...   
6498  Money  [Verse 1] Love is just a history that t...   
6499  It was right after senior year Just before the...   

                     Song Title  
0     The Battle Of New Orleans  
1                Mack The Knife  
2                   Personality  
3                         Venus  
4                    Lonely Boy  
...                         ...  
6493        Peaches & Eggplants  
6494           I Wr

In [49]:
import re

# Function to preprocess text (in this case, lyrics)
def preprocessing_text(text):
    text = str(text)  # Convert text to string if not already
    text = text.lower()  # Convert text to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# Apply the preprocessing function to the 'Lyrics' column
df['Lyrics'] = df['Lyrics'].apply(preprocessing_text)

# Display the modified dataframe
print(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Lyrics'] = df['Lyrics'].apply(preprocessing_text)


Unnamed: 0,Lyrics,Song Title
0,verse 1 in 1814 we took a little trip along wi...,The Battle Of New Orleans
1,oh the shark babe has such teeth dear and he s...,Mack The Knife
2,over and over i tried to prove my love to you ...,Personality
3,hey venus oh venus venus if you will please s...,Venus
4,im just a lonely boy lonely and blue im all al...,Lonely Boy
...,...,...
6493,this how it sound when i hit your ho be like t...,Peaches & Eggplants
6494,when it comes to hitchin the boat up backin do...,I Wrote the Book
6497,go grizz ah squat made the beat whats happenin...,Put It on da Floor Again
6498,money verse 1 love is just a history that the...,Bloody Mary


In [50]:
from tensorflow.keras.preprocessing.text import Tokenizer

# Preprocess the data
lyrics = df['Lyrics'].tolist()
titles = df['Song Title'].tolist()

# Tokenize the lyrics
lyrics_tokenizer = Tokenizer()
lyrics_tokenizer.fit_on_texts(lyrics)
lyrics_vocab_size = len(lyrics_tokenizer.word_index) + 1  # Vocabulary size for lyrics

# Tokenize the titles
title_tokenizer = Tokenizer()
title_tokenizer.fit_on_texts(titles)
title_vocab_size = len(title_tokenizer.word_index) + 1  # Vocabulary size for titles

# Pad sequences
max_lyrics_len = max([len(l.split()) for l in lyrics])  # Maximum length of lyrics sequence
max_title_len = max([len(t.split()) for t in titles])  # Maximum length of titles sequence

# Display vocabulary sizes and maximum sequence lengths
print(f"Lyrics Vocabulary Size: {lyrics_vocab_size}")
print(f"Title Vocabulary Size: {title_vocab_size}")
print(f"Max Lyrics Length: {max_lyrics_len}")
print(f"Max Title Length: {max_title_len}")

84383 3425 148742 14


In [51]:
import pandas as pd

# Assuming df is already defined and contains your data

# Drop rows with NaN values in the 'Lyrics' column
df = df.dropna(subset=['Lyrics'])

# Save the cleaned dataframe to a CSV file without index column
df.to_csv('lyrics_data.csv', index=False)

# Confirmation message
print("Cleaned data saved to 'lyrics_data.csv'")

In [52]:
from keras.preprocessing.sequence import pad_sequences

# Convert lyrics to sequences of integers and pad them
lyrics_sequences = lyrics_tokenizer.texts_to_sequences(lyrics)
lyrics_padded = pad_sequences(lyrics_sequences, maxlen=max_lyrics_len, padding='post')

# Convert titles to sequences of integers and pad them
title_sequences = title_tokenizer.texts_to_sequences(titles)
title_padded = pad_sequences(title_sequences, maxlen=max_title_len, padding='post')

In [53]:
# Delete unnecessary variables to free up memory
del title_sequences, lyrics_sequences, df

# Run garbage collection to reclaim memory
gc.collect()

0

In [54]:
# Save lyrics_tokenizer to a pickle file
with open('lyrics_tokenizer.pkl', 'wb') as f:
    pickle.dump(lyrics_tokenizer, f)

# Save title_tokenizer to a pickle file
with open('title_tokenizer.pkl', 'wb') as f:
    pickle.dump(title_tokenizer, f)

In [56]:
class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super(SelfAttention, self).__init__()

        self.d_model = d_model
        
        # Initialize weight matrices
        self.W_q = tf.keras.layers.Dense(d_model)
        self.W_k = tf.keras.layers.Dense(d_model)
        self.W_v = tf.keras.layers.Dense(d_model)
        
    def call(self, q, k, v):
        # Compute the self attention scores
        scores = tf.matmul(self.W_q(q), tf.transpose(self.W_k(k), perm=[0, 2, 1]))
        
        # Scale and normalize
        scaled_scores = scores / tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        attention_weights = tf.nn.softmax(scaled_scores, axis=-1)
        
        # Apply attention weights to values
        output = tf.matmul(attention_weights, self.W_v(v))
        
        return output, attention_weights

In [57]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model):
        super(MultiHeadAttention, self).__init__()
        
        self.num_heads = num_heads
        self.d_model = d_model
        self.head_size = d_model // num_heads
        
        # Create multiple SelfAttention heads
        self.attention_heads = [SelfAttention(self.head_size) for _ in range(num_heads)]
        
        # Final dense layer to combine outputs from all heads
        self.dense = tf.keras.layers.Dense(d_model)
        
    def call(self, q, k, v):
        # List comprehension to apply each head to q, k, v
        head_outputs = [head(q, k, v)[0] for head in self.attention_heads]
        
        # Concatenate the outputs of all attention heads
        concatenated = tf.concat(head_outputs, axis=-1)
        
        # Apply the final dense layer
        output = self.dense(concatenated)
        
        return output

In [58]:
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, d_ff):
        super(FeedForward, self).__init__()
        self.dense1 = tf.keras.layers.Dense(d_ff, activation="relu")
        self.dense2 = tf.keras.layers.Dense(d_model)
        
    def call(self, inputs):
        intermediate_output = self.dense1(inputs)
        output = self.dense2(intermediate_output)
        return output

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model, d_ff):
        super(EncoderLayer, self).__init__()
        self.multihead_attention = MultiHeadAttention(num_heads, d_model)
        self.feed_forward = FeedForward(d_model, d_ff)
        self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        
    def call(self, inputs, training=False):
        # Self-attention mechanism
        attn_output = self.multihead_attention(inputs, inputs, inputs)
        
        # Residual connection and layer normalization
        attn_output = self.layer_norm1(inputs + attn_output)
        
        # Feed-forward neural network
        ff_output = self.feed_forward(attn_output)
        
        # Residual connection and layer normalization
        output = self.layer_norm2(attn_output + ff_output)
        
        return output


In [59]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model, d_ff):
        super(EncoderLayer, self).__init__()
        self.multihead_attention = MultiHeadAttention(num_heads, d_model)
        self.feed_forward = FeedForward(d_model, d_ff)
        self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        
    def call(self, inputs, training=False):
        # Multi-head self-attention
        attn_output = self.multihead_attention(inputs, inputs, inputs)
        
        # Residual connection and layer normalization
        attn_output = self.layer_norm1(inputs + attn_output)
        
        # Feed-forward neural network
        ff_output = self.feed_forward(attn_output)
        
        # Another residual connection and layer normalization
        output = self.layer_norm2(attn_output + ff_output)
        
        return output

In [60]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model, d_ff):
        super(DecoderLayer, self).__init__()
        
        # Masked self-attention layer
        self.masked_self_attention = MultiHeadAttention(num_heads, d_model)
        
        # Cross-attention layer with encoder output
        self.cross_attention = MultiHeadAttention(num_heads, d_model)
        
        # Feed-forward neural network layer
        self.feed_forward = FeedForward(d_model, d_ff)
        
        # Layer normalization layers
        self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
            
    def call(self, inputs, enc_output, training=False):
        # Apply masked self-attention
        masked_attn_output = self.masked_self_attention(inputs, inputs, inputs)
        
        # Residual connection and layer normalization
        masked_attn_output = self.layer_norm1(inputs + masked_attn_output)
        
        # Apply cross-attention with encoder output
        cross_attn_output = self.cross_attention(masked_attn_output, enc_output, enc_output)
        
        # Residual connection and layer normalization
        cross_attn_output = self.layer_norm2(masked_attn_output + cross_attn_output)
        
        # Apply feed-forward neural network
        ff_output = self.feed_forward(cross_attn_output)
        
        # Residual connection and layer normalization
        output = self.layer_norm3(cross_attn_output + ff_output)
        
        return output

In [61]:
class Transformer(tf.keras.Model):
    def __init__(self, num_layers, num_heads, d_model, d_ff, input_vocab_size, target_vocab_size, max_seq_len):
        super(Transformer, self).__init__()
        
        # Initialize encoder layers
        self.encoder_layers = [EncoderLayer(num_heads, d_model, d_ff) for _ in range(num_layers)]
        
        # Initialize decoder layers
        self.decoder_layers = [DecoderLayer(num_heads, d_model, d_ff) for _ in range(num_layers)]
        
        # Embedding layer for input and target sequences
        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        
        # Final dense layer for prediction
        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
        
        # Softmax layer for output
        self.softmax = tf.keras.layers.Softmax(axis=-1)
        
        # Maximum sequence length
        self.max_seq_len = max_seq_len
        
    def call(self, inputs, training=False):
        enc_inputs, dec_inputs = inputs
        
        # Encode input sequence
        enc_output = self.encode(enc_inputs, training=training)
        
        # Decode output sequence
        dec_output = self.decode(dec_inputs, enc_output, training=training)
        
        return dec_output

    def encode(self, inputs, training=False):
        # Embedding and positional encoding
        x = self.embedding(inputs)
        
        # Iterate through encoder layers
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x, training=training)
        
        return x
    
    def decode(self, targets, enc_output, training=False):
        # Embedding and positional encoding
        x = self.embedding(targets)
        
        # Iterate through decoder layers
        for decoder_layer in self.decoder_layers:
            x = decoder_layer(x, enc_output, training=training)
        
        # Final dense layer for prediction
        dec_output = self.final_layer(x)
        
        # Apply softmax activation
        dec_output = self.softmax(dec_output)
        
        return dec_output

In [63]:
# Load lyrics_tokenizer
with open('lyrics_tokenizer.pkl', 'rb') as f:
    lyrics_tokenizer = pickle.load(f)

# Load title_tokenizer
with open('title_tokenizer.pkl', 'rb') as f:
    title_tokenizer = pickle.load(f)

In [64]:
# Load the dataset
df = pd.read_csv('lyrics_data.csv')

# Function to extract first 100 characters from text
def first_100_chars(text):
    return text[:100]

# Apply the function to the 'Lyrics' column
df['Lyrics'] = df['Lyrics'].apply(first_100_chars)

# Constants
max_lyrics_len = 100
max_title_len = 14

# Tokenize lyrics using pre-trained lyrics_tokenizer
lyrics_sequences = lyrics_tokenizer.texts_to_sequences(df['Lyrics'])
lyrics_padded = pad_sequences(lyrics_sequences, maxlen=max_lyrics_len, padding='post')

# Tokenize titles using pre-trained title_tokenizer
title_sequences = title_tokenizer.texts_to_sequences(df['Song Title'])
title_padded = pad_sequences(title_sequences, maxlen=max_title_len, padding='post')

# Split the data into inputs and outputs
title_inputs = title_padded
lyrics_inputs = lyrics_padded[:, :-1]  # Use all but the last token as input
lyrics_outputs = lyrics_padded[:, 1:]  # Predict the next token in the sequence

# Print shapes for verification
print(title_inputs.shape, lyrics_inputs.shape, lyrics_outputs.shape)

Vocabulary Size: 9130
(6231, 14) (6231, 99) (6231, 99)


In [74]:
# Define parameters
num_layers = 2
num_heads = 4
d_model = 128
d_ff = 512
input_vocab_size = 9130
target_vocab_size = 9140
max_lyrics_len = 99
max_title_len = 14

# Define the input shapes
encoder_input = Input(shape=(max_title_len,), dtype=tf.int32, name="encoder_input")
decoder_input = Input(shape=(max_lyrics_len,), dtype=tf.int32, name="decoder_input")

# Instantiate the Transformer model
transformer = Transformer(num_layers, num_heads, d_model, d_ff, input_vocab_size, target_vocab_size, max_lyrics_len)

# Get the outputs by passing the inputs through the transformer model
outputs = transformer([encoder_input, decoder_input])

# Create the Keras model
model = tf.keras.Model(inputs=[encoder_input, decoder_input], outputs=outputs)

# Print the model summary
model.summary()

In [75]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Compile the model with optimizer and loss function
model.compile(optimizer=Adam(learning_rate=1e-3), 
              loss=SparseCategoricalCrossentropy(from_logits=False))

# Define a learning rate scheduler callback
lr_scheduler = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, min_lr=1e-7)

# Train the model
history = model.fit([title_inputs, lyrics_inputs], 
                    lyrics_outputs, 
                    epochs=500, 
                    batch_size=64, 
                    callbacks=[lr_scheduler], 
                    verbose=1)

# Print training history
print(history.history)

Epoch 1/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 605ms/step - loss: 4.7883 - learning_rate: 0.0010
Epoch 2/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 605ms/step - loss: 1.3992 - learning_rate: 0.0010
Epoch 3/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 611ms/step - loss: 1.3658 - learning_rate: 0.0010
Epoch 4/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 604ms/step - loss: 1.3186 - learning_rate: 0.0010
Epoch 5/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 604ms/step - loss: 1.1836 - learning_rate: 0.0010
Epoch 6/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 604ms/step - loss: 1.0284 - learning_rate: 0.0010
Epoch 7/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 606ms/step - loss: 0.9072 - learning_rate: 0.0010
Epoch 8/500
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 607ms/step - loss: 0.7865 - learni

In [77]:
# Save the model to a file
model.save('tf_model.keras')

In [84]:
import tensorflow as tf
from tensorflow.keras.layers import Input

@register_keras_serializable()
class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super(SelfAttention, self).__init__()
        self.d_model = d_model
        self.W_q = tf.keras.layers.Dense(d_model)
        self.W_k = tf.keras.layers.Dense(d_model)
        self.W_v = tf.keras.layers.Dense(d_model)
        
    def call(self, q, k, v):
        scores = tf.matmul(self.W_q(q), self.W_k(k), transpose_b=True)
        scaled_scores = scores / tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        attention_weights = tf.nn.softmax(scaled_scores, axis=-1)
        output = tf.matmul(attention_weights, self.W_v(v))
        return output, attention_weights

@register_keras_serializable()
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        self.head_size = d_model // num_heads
        self.attention_heads = [SelfAttention(self.head_size) for _ in range(num_heads)]
        self.dense = tf.keras.layers.Dense(d_model)
        
    def call(self, q, k, v):
        head_outputs = [head(q, k, v)[0] for head in self.attention_heads]
        concatenated = tf.concat(head_outputs, axis=-1)
        output = self.dense(concatenated)
        return output

@register_keras_serializable()
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, d_ff):
        super(FeedForward, self).__init__()
        self.dense1 = tf.keras.layers.Dense(d_ff, activation="relu")
        self.dense2 = tf.keras.layers.Dense(d_model)
        
    def call(self, inputs):
        intermediate_output = self.dense1(inputs)
        output = self.dense2(intermediate_output)
        return output

@register_keras_serializable()
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model, d_ff):
        super(EncoderLayer, self).__init__()
        self.multihead_attention = MultiHeadAttention(num_heads, d_model)
        self.feed_forward = FeedForward(d_model, d_ff)
        self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        
    def call(self, inputs, training=False):
        attn_output = self.multihead_attention(inputs, inputs, inputs)
        attn_output = self.layer_norm1(inputs + attn_output)
        ff_output = self.feed_forward(attn_output)
        output = self.layer_norm2(attn_output + ff_output)
        return output

@register_keras_serializable()
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, num_heads, d_model, d_ff):
        super(DecoderLayer, self).__init__()
        self.masked_self_attention = MultiHeadAttention(num_heads, d_model)
        self.cross_attention = MultiHeadAttention(num_heads, d_model)
        self.feed_forward = FeedForward(d_model, d_ff)
        self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layer_norm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
            
    def call(self, inputs, enc_output, training=False):
        masked_attn_output = self.masked_self_attention(inputs, inputs, inputs)
        masked_attn_output = self.layer_norm1(inputs + masked_attn_output)
        cross_attn_output = self.cross_attention(masked_attn_output, enc_output, enc_output)
        cross_attn_output = self.layer_norm2(masked_attn_output + cross_attn_output)
        ff_output = self.feed_forward(cross_attn_output)
        output = self.layer_norm3(cross_attn_output + ff_output)
        return output

@register_keras_serializable()
class Transformer(tf.keras.Model):
    def __init__(self, num_layers, num_heads, d_model, d_ff, input_vocab_size, target_vocab_size, max_seq_len):
        super(Transformer, self).__init__()
        self.encoder_layers = [EncoderLayer(num_heads, d_model, d_ff) for _ in range(num_layers)]
        self.decoder_layers = [DecoderLayer(num_heads, d_model, d_ff) for _ in range(num_layers)]
        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
        self.softmax = tf.keras.layers.Softmax(axis=-1)
        self.max_seq_len = max_seq_len
        
    def call(self, inputs, training=False):
        enc_inputs, dec_inputs = inputs
        enc_output = self.encode(enc_inputs, training=training)
        dec_output = self.decode(dec_inputs, enc_output, training=training)
        return dec_output

    def encode(self, inputs, training=False):
        x = self.embedding(inputs)
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x, training=training)
        return x
    
    def decode(self, targets, enc_output, training=False):
        x = self.embedding(targets)
        for decoder_layer in self.decoder_layers:
            x = decoder_layer(x, enc_output, training=training)
        dec_output = self.final_layer(x)
        dec_output = self.softmax(dec_output)
        return dec_output

In [86]:
model.save('tf_model1.keras')

In [89]:
import numpy as np
import tensorflow as tf

def generate_lyrics(model, tokenizer, title, max_lyrics_len, temperature=1.0, top_k=50):
    # Encode the title
    title_sequence = tokenizer.texts_to_sequences([title])[0]
    title_sequence = np.array(title_sequence).reshape(1, -1)
    
    # Start with the beginning of sequence token
    BOS_token = tokenizer.word_index.get('<bos>', 0)  # Adjust based on your tokenizer
    EOS_token = tokenizer.word_index.get('<eos>', 0)  # Adjust based on your tokenizer
    decoder_input = [BOS_token]
    output_sequence = []
    
    for i in range(max_lyrics_len):
        # Prepare the inputs
        decoder_input_array = np.array(decoder_input).reshape(1, -1)
        
        # Predict the next token
        predictions = model([title_sequence, decoder_input_array], training=False)
        predictions = predictions[0, -1, :]
        
        # Apply temperature scaling
        predictions = predictions / temperature
        
        # Clip the predictions to prevent NaN values
        predictions = tf.clip_by_value(predictions, -1e9, 1e9)
        
        # Apply top-k sampling
        sorted_indices = tf.argsort(predictions, direction='DESCENDING')[:top_k]
        sorted_predictions = tf.gather(predictions, sorted_indices)
        
        # Check for NaN values
        if tf.reduce_any(tf.math.is_nan(sorted_predictions)):
            print("NaN values found in predictions. Skipping this step.")
            break
        
        probabilities = tf.nn.softmax(sorted_predictions).numpy()
        
        # Check for NaN values in probabilities
        if np.isnan(probabilities).any():
            print("NaN values found in probabilities. Skipping this step.")
            break
        
        next_token = np.random.choice(sorted_indices.numpy(), p=probabilities)
        
        # Append the token to the output sequence
        output_sequence.append(next_token)
        
        # Check if the end-of-sequence token is generated
        if next_token == EOS_token:
            break
        
        # Append the next token to the decoder input
        decoder_input.append(next_token)
    
    # Convert the output sequence to text
    generated_lyrics = tokenizer.sequences_to_texts([output_sequence])[0]
    
    return generated_lyrics

In [90]:
title = "like that"
generated_lyrics = generate_lyrics(transformer, lyrics_tokenizer, title, max_lyrics_len=100)
print(f"Generated Lyrics: {generated_lyrics}")

Generated Lyrics: of fire day when only chorus billy l only l make out rest quiet mirror live night never seem clear sunshine s nice stone never start workin weak right rest of together verse control before mirror small trial verse when celebrate faces aw only workin nothing rest whod ne another living together only better summers c shabada make going through to spend weary rocky no my dr control communicating cant barely s seven sale then placing if on living fall through heartache should never reaching my better make clear cant ne ends waste time troubadour please still together only
