### Installing packages and setting up google drive

In [2]:
!pip install datasets tokenizers torch

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Loading the dataset and pre-processing

In [4]:
from datasets import load_from_disk
ds = load_from_disk('/content/drive/My Drive/genius-song-lyrics-dataset')
ds

Loading dataset from disk:   0%|          | 0/19 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['title', 'tag', 'artist', 'year', 'views', 'features', 'lyrics', 'id', 'language_cld3', 'language_ft', 'language'],
        num_rows: 5134856
    })
})

In [5]:
ds_english = ds['train'].filter(lambda x: x['language'] == 'en')
ds_english

Dataset({
    features: ['title', 'tag', 'artist', 'year', 'views', 'features', 'lyrics', 'id', 'language_cld3', 'language_ft', 'language'],
    num_rows: 3374198
})

In [6]:
columns_to_remove = ['title', 'artist', 'year', 'views', 'features', 'id', 'language_cld3', 'language_ft', 'language']
ds_selected = ds_english.remove_columns(columns_to_remove)
ds_selected

Dataset({
    features: ['tag', 'lyrics'],
    num_rows: 3374198
})

### Tokenizing the dataset

In [5]:
import torch
from datasets import Dataset, load_dataset
from tokenizers import Tokenizer, models, pre_tokenizers, trainers
from torch.nn.utils.rnn import pad_sequence

In [8]:
genre_samples = {genre: [] for genre in ['rap', 'rb', 'rock', 'pop', 'misc', 'country']}
sample_size = 2000

for entry in ds_selected:
    genre = entry['tag']
    if genre in genre_samples and len(genre_samples[genre]) < sample_size:
        genre_samples[genre].append(entry)
    if all(len(samples) >= sample_size for samples in genre_samples.values()):
        break

# Convert to Dataset format
ds_test = Dataset.from_list([entry for genre in genre_samples for entry in genre_samples[genre]])

In [9]:
with open("lyrics_corpus.txt", "w", encoding="utf-8") as f:
    for lyric in ds_test["lyrics"]:
        f.write(lyric + "\n")

# Initialize BPE Tokenizer
tokenizer = Tokenizer(models.BPE())
tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
trainer = trainers.BpeTrainer(special_tokens=["<PAD>", "<UNK>", "<SOS>", "<EOS>"], vocab_size=30000)

tokenizer.train(["lyrics_corpus.txt"], trainer)

tokenizer.save("/content/drive/My Drive/lyrics_tokenizer.json")

In [10]:
# Load trained tokenizer
tokenizer = Tokenizer.from_file("/content/drive/My Drive/lyrics_tokenizer.json")

unique_genres = sorted(set(ds_test["tag"]))  # Ensure fixed ordering
genre_to_id = {genre: idx for idx, genre in enumerate(unique_genres)}

def preprocess_data(genre, lyric):
    genre_id = genre_to_id[genre]
    lyric_tokens = tokenizer.encode(lyric).ids
    output_ids = torch.tensor(
        [tokenizer.token_to_id("<SOS>")] + lyric_tokens + [tokenizer.token_to_id("<EOS>")], dtype=torch.long
    )  # Target for the Decoder layer
    return genre_id, output_ids

processed_data = [preprocess_data(genre, lyric) for genre, lyric in zip(ds_test["tag"], ds_test["lyrics"])]

input_ids = torch.tensor([x[0] for x in processed_data], dtype=torch.long)

# Padding lyrics sequences for uniform batch sizes
output_ids = pad_sequence([x[1] for x in processed_data], batch_first=True, padding_value=tokenizer.token_to_id("<PAD>"))

torch.save((input_ids, output_ids, genre_to_id, tokenizer), "/content/drive/My Drive/processed_data.pt")

print("Preprocessing complete! Data saved successfully.")

Preprocessing complete! Data saved successfully.


In [17]:
input_ids, output_ids, genre_to_id, tokenizer = torch.load("/content/drive/My Drive/processed_data.pt")
print(f"Input shape: {input_ids.shape}, Output shape: {output_ids.shape}")

  input_ids, output_ids, genre_to_id, tokenizer = torch.load("/content/drive/My Drive/processed_data.pt")


Input shape: torch.Size([12000]), Output shape: torch.Size([12000, 24097])


### Building the Transformer architecture step by step

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

#### Positional Embeddings: for retaining the word order

In [10]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)  # Shape: (1, max_len, d_model)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :].to(x.device)

#### Building Decoder Layer which includes
1. Masked Multi-Head Self-Attention: Prevents looking ahead in sequence
2. Feed-Forward Network (FFN): Processes hidden representations
3. Residual Connections & Layer Normalization : Stabilizes training

In [11]:
class TransformerDecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, dim_ff, dropout=0.1):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, num_heads, dropout=dropout, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(d_model, dim_ff),
            nn.ReLU(),
            nn.Linear(dim_ff, d_model),
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, attn_mask):
        # Self-attention with masking
        attn_output, _ = self.self_attn(x, x, x, attn_mask=attn_mask)
        x = self.norm1(x + self.dropout(attn_output))

        # Feed-forward network
        ffn_output = self.ffn(x)
        x = self.norm2(x + self.dropout(ffn_output))

        return x

#### Full Transformer Model includes
1. Genre Embedding
2. Token Embedding
3. Positional Encoding
4. Stacked Transformer Decoder Blocks
5. Final Linear Projection to Vocabulary Size

In [12]:
class LyricsTransformer(nn.Module):
    def __init__(self, vocab_size, num_genres, d_model=256, num_heads=8, num_layers=6, dim_ff=512, max_len=512, dropout=0.1):
        super().__init__()

        # Genre embedding (maps genre to a dense vector)
        self.genre_embedding = nn.Embedding(num_genres, d_model)

        # Token embedding (maps words to vectors)
        self.token_embedding = nn.Embedding(vocab_size, d_model)

        # Positional encoding
        self.positional_encoding = PositionalEncoding(d_model, max_len)

        # Transformer decoder layers
        self.decoder_layers = nn.ModuleList([
            TransformerDecoderLayer(d_model, num_heads, dim_ff, dropout) for _ in range(num_layers)
        ])

        # Final linear projection
        self.fc_out = nn.Linear(d_model, vocab_size)

    def generate_mask(self, seq_len, device):
        """Creates a mask to prevent future words from being seen."""
        mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).to(device)
        return mask.masked_fill(mask == 1, float('-inf'))

    def forward(self, genre, input_tokens):
        """
        genre: (batch_size,) -> Genre indices
        input_tokens: (batch_size, seq_len) -> Tokenized lyrics
        """

        # Embed genre and expand to match sequence length
        genre_emb = self.genre_embedding(genre).unsqueeze(1)  # Shape: (batch, 1, d_model)

        # Embed input tokens
        token_emb = self.token_embedding(input_tokens)  # (batch, seq_len, d_model)

        # Apply positional encoding
        x = self.positional_encoding(token_emb)

        # Add genre embedding to the first token's position
        x[:, 0, :] += genre_emb.squeeze(1)

        # Generate causal mask
        mask = self.generate_mask(input_tokens.shape[1], input_tokens.device)

        # Pass through transformer decoder layers
        for layer in self.decoder_layers:
            x = layer(x, attn_mask=mask)

        # Final projection to vocabulary
        logits = self.fc_out(x)  # (batch, seq_len, vocab_size)

        return logits

### Prepare Data for Training

In [12]:
from torch.utils.data import Dataset, DataLoader

class LyricsDataset(Dataset):
    def __init__(self, input_ids, output_ids):
        self.input_ids = input_ids
        self.output_ids = output_ids

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.output_ids[idx]

output_ids = output_ids[:, :512]  # Truncate long sequences

# Initialize dataset
dataset = LyricsDataset(input_ids, output_ids)

# Define batch size
BATCH_SIZE = 8

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

###  Initialize Model & Training Components

In [13]:
# Load tokenizer again
tokenizer = Tokenizer.from_file("/content/drive/My Drive/lyrics_tokenizer.json")

# Get vocabulary size
VOCAB_SIZE = len(tokenizer.get_vocab())
NUM_GENRES = len(genre_to_id)

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LyricsTransformer(vocab_size=VOCAB_SIZE, num_genres=NUM_GENRES).to(device)

# Define optimizer and loss function
loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer.token_to_id("<PAD>"))
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, betas=(0.9, 0.98), weight_decay=1e-4)

 ### Training Loop

In [15]:
from tqdm import tqdm

# Training loop
NUM_EPOCHS = 5

for epoch in range(NUM_EPOCHS):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}")

    for genre, lyrics in progress_bar:
        genre, lyrics = genre.to(device), lyrics.to(device)

        optimizer.zero_grad()

        # Forward pass
        logits = model(genre, lyrics[:, :-1])  # Remove last token for teacher forcing

        # Compute loss
        loss = loss_fn(logits.reshape(-1, VOCAB_SIZE), lyrics[:, 1:].reshape(-1))  # Shift target left
        loss.backward()

        # Update weights
        optimizer.step()

        epoch_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1}: Average Loss = {epoch_loss / len(train_loader)}")

    # Save model after each epoch
    torch.save(model.state_dict(), f"/content/drive/My Drive/lyrics_transformer_epoch{epoch+1}.pth")

Epoch 1/5: 100%|██████████| 1500/1500 [04:07<00:00,  6.06it/s, loss=5.48]


Epoch 1: Average Loss = 5.389508797009786


Epoch 2/5: 100%|██████████| 1500/1500 [04:07<00:00,  6.05it/s, loss=4.89]


Epoch 2: Average Loss = 5.015917940616608


Epoch 3/5: 100%|██████████| 1500/1500 [04:07<00:00,  6.05it/s, loss=5.12]


Epoch 3: Average Loss = 4.805589326699574


Epoch 4/5: 100%|██████████| 1500/1500 [04:08<00:00,  6.05it/s, loss=4.12]


Epoch 4: Average Loss = 4.5994695971806845


Epoch 5/5: 100%|██████████| 1500/1500 [04:06<00:00,  6.08it/s, loss=4.4]


Epoch 5: Average Loss = 4.376181246598562


In [15]:
model.load_state_dict(torch.load("/content/drive/My Drive/lyrics_transformer_epoch5.pth"))

  model.load_state_dict(torch.load("/content/drive/My Drive/lyrics_transformer_epoch5.pth", map_location='cpu'))


<All keys matched successfully>

### Model evaluation
1. Random Sampling (Selects a token based on probabilities instead of always choosing the highest one)
2. Top-k Sampling (Restricts choices to the top k most probable tokens before sampling)

In [40]:
def generate_lyrics_sampling(model, genre_name, max_length=100, temperature=1.0):
  model.eval()
  genre_id = torch.tensor([genre_to_id[genre_name]], dtype=torch.long).to(device)
  generated = [tokenizer.token_to_id("<SOS>")]
  generated_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(device)

  for _ in range(max_length):
      with torch.no_grad():
          logits = model(genre_id, generated_tensor)[:, -1, :] / temperature  # Scaling the logits
          probs = F.softmax(logits, dim=-1)
          next_token = torch.multinomial(probs, num_samples=1).item()  # Sample token

      if next_token == tokenizer.token_to_id("<EOS>"):
          break

      generated.append(next_token)
      generated_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(device)

  return tokenizer.decode(generated)

generate_lyrics_sampling(model, genre_name="misc", temperature=0.7)

'Don ’ t know you have a little bit of the world , but a little bit of a little bit of the world , and a little hearts , the little , and a little without a little world , and a little thing , and a little thing , and a little bit of the world . It was your little too much to think that ORS to the world . If you leave you read you , you took the little few things to make this — a little girl . It was your little thing ,'

In [68]:
def generate_lyrics_top_k(model, genre_name, max_length=100, k=50, temperature=1.0):
    model.eval()
    genre_id = torch.tensor([genre_to_id[genre_name]], dtype=torch.long).to(device)
    generated = [tokenizer.token_to_id("<SOS>")]
    generated_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(device)

    for _ in range(max_length):
        with torch.no_grad():
            logits = model(genre_id, generated_tensor)[:, -1, :] / temperature
            probs = F.softmax(logits, dim=-1)

            # Get top-k tokens
            top_k_probs, top_k_indices = torch.topk(probs, k)
            next_token = top_k_indices.squeeze(0)[torch.multinomial(top_k_probs.squeeze(0), 1)].item()

        if next_token == tokenizer.token_to_id("<EOS>"):
            break

        generated.append(next_token)
        generated_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(device)

    return tokenizer.decode(generated)

generate_lyrics_top_k(model, genre_name="pop", k=60, temperature=0.9)

"[ Verse 1 ] What ' s a thing that I ' m going to sing To take it on me When she ' s looking to my head I had to sing my heart of my heart of me And I had a heart of her heart , and me , and me I had a heart , and me and me [ Pre - Chorus ] What ' s my thoughts of me is done , my heart is like a star I like a star , and me That ' s why ' s alright ' cause"

### Perplexity (PPL) Score Analysis: for evaluating lyrics generation
- A score between 1.0 - 10 indicates -> Excellent, fluent lyrics, beyond that indicates -> decent with some uncertain words chosen
- Further I will analyze these scores for some of the genres using the lyrics generated by the two methods defined above

In [22]:
def calculate_perplexity(model, genre_name, generated_text):
    model.eval()
    genre_id = torch.tensor([genre_to_id[genre_name]], dtype=torch.long).to(device)

    # Tokenize generated text
    token_ids = tokenizer.encode(generated_text).ids
    input_tensor = torch.tensor(token_ids, dtype=torch.long).unsqueeze(0).to(device)

    with torch.no_grad():
        logits = model(genre_id, input_tensor[:, :-1])  # Ignore last token for prediction
        log_probs = F.log_softmax(logits, dim=-1)  # Convert logits to log probabilities

    # Compute perplexity
    target_tokens = input_tensor[:, 1:]  # Shift target tokens
    token_log_probs = log_probs.gather(2, target_tokens.unsqueeze(-1)).squeeze(-1)  # Get token-wise log probs
    avg_log_prob = token_log_probs.mean()  # Average log prob

    perplexity = math.exp(-avg_log_prob.item())  # Apply exp to get PPL score
    return perplexity

#### Testing for Random Sampling method

In [24]:
# Checking the score for genre `country`
generated_lyrics1 = "I ' m a long ride , I ' m a long way around the left this day must ' ve been on a long way I ' m a long time I ' m a long time I ' ve been a long time and I ' m a long time , long time , long time I ' m a long time on a long time I ' m a long time if you do"
generated_lyrics2 = "If I could be a man ' s like a man ' s about a man who ' s talking about daddy and his kids need a man who ' s , he ' s gonna be a man who ' s talking about a man ' s son of the man ' s in New York , he ' s your man , he ' s got a man And you can ' t know , I ' d be a man ' s good man , he ' s gonna be a man"
print("Perplexity 01:", calculate_perplexity(model, "country", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "country", generated_lyrics2))

Perplexity 01: 5.494203505034539
Perplexity 02: 7.2454486677982075


In [27]:
# Checking the score for genre `rap`
generated_lyrics1 = "I ' m talking , I ' m a simple , I ' m just a lot of shit , it ' s a thing , come , that ' s the world , this is what it ' s that ain ' t nobody tell me what you do ? I ' m just a feeling , this is what I ' m , my style is for you , this is what you are My people right , don ' t know what or they say I ' m talking bout to"
generated_lyrics2 = "Yeah , yeah [ Verse 1 ] I get my money in the knowledge , I ' m a nasty , I ' m a hater , damn , so I ' m a different thing I ' m a hot hit the floor when I ' m a fool , I ' m a lazy shit on the hood , but I ' m a hot thing , I ' m a lot , no one , man , I ' m a hustler , you can ' t handle it , I ' m"
print("Perplexity 01:", calculate_perplexity(model, "rap", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "rap", generated_lyrics2))

Perplexity 01: 13.556865020324194
Perplexity 02: 10.886218848747491


In [30]:
# Checking the score for genre `rock`
generated_lyrics1 = "What ' s got to be this here , this old and this whole world is here , this is here with a little money Don ' t let it come , this is here but a good time Even if it comes to me ? I know about what I ' m from you Don ' t let it , this is this is here , this is here , this is here to me , this is here with you , this is here , this is right , this is here with you"
generated_lyrics2 = "I ' m going to see you ' re going nowhere I ' m going to see you ' re going to see the way it ' s all that I ' ve been going to see it ' s always gonna see you ' re going to see your turn , I ' m going to see you ' re going to see you ' re going to see you ' re going to see you ' re going to see you ' re going to see you ' re going to see you"
print("Perplexity 01:", calculate_perplexity(model, "rock", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "rock", generated_lyrics2))

Perplexity 01: 9.79094423678175
Perplexity 02: 3.2399278364704016


In [33]:
# Checking the score for genre `pop`
generated_lyrics1 = "I close my heart is running ' Cause they almost told me to stay , but there ' s no betray me , but I ' m a fresh , but it ' s just a lovely - yeah , but it ' s just a gh ' cause I ' m a while , now it ' s just a little bit , but it ' s just a little bit of us , but a little bit waited , but I know I ' m a record , when I ' m a couple"
generated_lyrics2 = "[ Intro : Robin Thicke ] Yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah , yeah [ Verse 1 : rog Glocks ] Baby , you can ' t be no more , I ' ll be there ' Cause my time , and I ' m not everything that I ' m from a thing that I ' m saying , I GU into a show you so what I do I ' m from being honest and I"
print("Perplexity 01:", calculate_perplexity(model, "pop", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "pop", generated_lyrics2))

Perplexity 01: 13.291694520983032
Perplexity 02: 7.987691866098484


#### Testing for Top-k Sampling method

In [43]:
# Checking the score for genre `country`
generated_lyrics1 = "The sun is going out of the end The fire is going to the world is going home [ Pre - Chorus ] The sun is going to get lost my shoes And , in my hands , we were just trying to know That the world is going to get lost my eyes And it ' s going to get lost [ Chorus ] I ' m going to get lost with my hands And all the dreams is going to get lost , you can ' t stand up high , I ' m"
generated_lyrics2 = "The only one that ' s the only one that ' s just been gone , you were all gone I ' m a girl , and I ' m a girl , I ' m a girl , but you know I ' ll go , but I ' m a girl ' s what I ' m a girl , the only one that should know [ Chorus ] I ' ll be a little boy , I ' ll be a little boy , I ' ll be a little boy , it"
print("Perplexity 01:", calculate_perplexity(model, "country", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "country", generated_lyrics2))

Perplexity 01: 10.395576277687347
Perplexity 02: 5.852830175452453


In [56]:
# Checking the score for genre `rap`
generated_lyrics1 = "Let us get right on , let us get here , let us get right , get right If you ever seen it out , and let ' s let us get so [ Verse 1 : Eminem ] I wanna spend this far wrong , I wanna get it ' Cause I ' m a thing , like a couple weeks of his game I ' ll make ' m just like a man , the whole world ' ll be done , that if anybody ' s happening , I ' m just"
generated_lyrics2 = "You know why you ' re on the floor You know why you need a fuck you more than the ones that we been thinkin ' on the scene You know why you want every money ? You know why you want your money ? You and that ' re on the radio You know how it ' s your money ? You know you know if you want to get a fuck you life ? You know why your money ? You know why you need your money ?"
print("Perplexity 01:", calculate_perplexity(model, "rap", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "rap", generated_lyrics2))

Perplexity 01: 21.469948331038108
Perplexity 02: 10.276203525243108


In [65]:
# Checking the score for genre `rock`
generated_lyrics1 = "Oh , you ' re not a little girl , your daddy ' s not the perfect , and you ' re not a little girl , but I ' m the one man that the only one Who wants to stay ? You ' re not a little girl , so I ' m the one Who gave you in the world ? Oh , I ' m the one man that I ' m the one and the man that I ' m here , but I ' m the one who ' s"
generated_lyrics2 = "I see you when I see you all the world is the same You are the same old ones who are you and I see you all the world is in the world of the world I see you all the world is always over [ Chorus : Mark Hoppus ] I know you and the world was so many times before I see you and the world was so good for you [ Verse 2 : Mark Hoppus ] I had a dream of your life and I know you , all the"
print("Perplexity 01:", calculate_perplexity(model, "rock", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "rock", generated_lyrics2))

Perplexity 01: 12.248845064157232
Perplexity 02: 10.090433638679968


In [69]:
# Checking the score for genre `pop`
generated_lyrics1 = "Yeah - I know what to get on you Yeah [ Verse 1 : Zayn ] If you should know what in the time I was always trying to be your man It ' s a lot of the world of the world I was the world , so I don ' t need you , but it ' s already too long I like it But I ' m in my favorite world for you [ Pre - Chorus ] I know , I know you ' re happy it ' s real slow I know"
generated_lyrics2 = "What ' s a thing that I ' m going to sing To take it on me When she ' s looking to my head I had to sing my heart of my heart of me And I had a heart of her heart , and me , and me I had a heart , and me and me [ Pre - Chorus ] What ' s my thoughts of me is done , my heart is like a star I like a star , and me That ' s why ' s alright ' cause"
print("Perplexity 01:", calculate_perplexity(model, "pop", generated_lyrics1))
print("Perplexity 02:", calculate_perplexity(model, "pop", generated_lyrics2))

Perplexity 01: 14.599365367187545
Perplexity 02: 13.90482641370274
