### Set Up the Environment and Define the Model

In [13]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import pandas as pd

# load the dataset
file_path = '/kaggle/input/spotify-million-song-dataset/spotify_millsongdata.csv'
df = pd.read_csv(file_path)

# hyperparameters
batch_size = 64
block_size = 128
learning_rate = 1e-3
device = 'cuda' if torch.cuda.is_available() else 'cpu'
n_embd = 128
n_head = 4
n_layer = 4
dropout = 0.0

# concatenates all the strings into a single string, with each original string separated by a newline character
text = df['text'].str.cat(sep='\n')

# text tokenize function
def text_tokenize(text):
    chars = sorted(list(set(text)))
    vocab_size = len(chars)
    stoi = {ch: i for i, ch in enumerate(chars)}
    itos = {i: ch for i, ch in enumerate(chars)}

    # encoding function
    encode = lambda s: [stoi[c] for c in s]

    # decoding function
    decode = lambda l: ''.join([itos[i] for i in l])

    data = torch.tensor(encode(text), dtype=torch.long)
    
    return data, stoi, itos, vocab_size, encode, decode

# apply the tokenize function
data, stoi, itos, vocab_size, encode, decode = text_tokenize(text)

# Define the necessary model components (Head, MultiHeadAttention, FeedFoward, Block)
class Head(nn.Module):
    """ one head of self-attention """
    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B,T,C = x.shape
        k = self.key(x)   # (B,T,C)
        q = self.query(x) # (B,T,C)
        wei = q @ k.transpose(-2,-1) * C**-0.5 # (B, T, T)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
        wei = F.softmax(wei, dim=-1) # (B, T, T)
        wei = self.dropout(wei)
        v = self.value(x) # (B,T,C)
        out = wei @ v # (B, T, T) @ (B, T, C) -> (B, T, C)
        return out

class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """
    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        return out

class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    """ Transformer block: communication followed by computation """
    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

# Define the main model class: BigramLanguageModel
class BigramLanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.lm_head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx, targets=None):
        B, T = idx.shape
        tok_emb = self.token_embedding_table(idx) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = tok_emb + pos_emb # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.ln_f(x) # (B,T,C)
        logits = self.lm_head(x) # (B,T,vocab_size)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, loss = self(idx_cond)
            logits = logits[:, -1, :] # (B, C)
            probs = F.softmax(logits, dim=-1) # (B, C)
            idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
        return idx

### Load the Saved Model

In [14]:
# load the saved model weights
model = BigramLanguageModel()
model.load_state_dict(torch.load('/kaggle/input/lyrics-generator-by-spotify-million-song-dataset/pytorch/default/1/lyric_generator_model.pth', 
                                 map_location=torch.device('cpu')))
model = model.to(torch.device('cpu'))

### Generate Text with the Loaded Model

In [15]:
# a function to generate a song
def generate_song(model, max_new_tokens=2000):
    context = torch.zeros((1, 1), dtype=torch.long, device=device)
    song = decode(model.generate(context, max_new_tokens=max_new_tokens)[0].tolist())
    return song

# generate three different songs
song_1 = generate_song(model, max_new_tokens=2000)
song_2 = generate_song(model, max_new_tokens=2000)
song_3 = generate_song(model, max_new_tokens=2000)

In [16]:
# print the generated songs
print("1st Song:\n", song_1)
print("\n2nd Song:\n", song_2)
print("\n3rd Song:\n", song_3)

1st Song:
 
Woooh tell me down  
Get out of my heart


Boy holla, but something's coastes  
I woohah, I woohah  
Now not when I'm an angry likes  
Cause you look at the school, now I'm getting's coastest  
Caulin, into Jean, but seems and it kinda fade tight


Whoa For Gor Gor  
Oh Gorgin  
Hiding message rock thurks  
Stop from thought the deseculation  
From my boided tonight hope (Christen)  
From the lane young, From yours right bonight your schprite on me  
Been love is so fragil jar, you


Where everything enough, I make your taxis round, let his machine  
Let his everything but a round and situate, let his mind 'round  
Some mind was such a vachane  
And I'd let us alive  
A victor in paper and I'd better now  
Right without it in my life  
So I better now eat the field


This moment agesolution  
This most kinda pan right throughed me  
A young and a lifetime so thing will we make sene  
So my comfort ain't make sense a gentles  
I'll have right back,  
So you still like thing 