In [34]:
import torch
import pandas as pd
import numpy as np

df=pd.read_csv('Songs.csv')
text=" ".join(df['Lyrics'].astype(str))
chars=sorted(list(set(text)))
vocab_size=len(chars)
char_to_idx={c:i for i,c in enumerate(chars)}
idx_to_char={i:c for i,c in enumerate(chars)}
print("Unique characters:",vocab_size)


Unique characters: 118


loads the lyrics dataset and combines all lyrics into one text.

In [45]:
hidden_size=512
seq_length=10
W_xh=torch.randn(vocab_size,hidden_size)*0.01
W_hh=torch.randn(hidden_size,hidden_size)*0.01
W_hy=torch.randn(hidden_size,vocab_size)*0.01
b_h=torch.zeros(1,hidden_size)
b_y=torch.zeros(1,vocab_size)
parameters=[W_xh,W_hh,W_hy,b_h,b_y]
for p in parameters:p.requires_grad=True


In [46]:
def forward_pass(inputs,targets,h_prev):
    loss=0
    h=h_prev
    for t in range(len(inputs)):
        x_t=torch.zeros(1,vocab_size)
        x_t[0,inputs[t]]=1
        h=torch.tanh(torch.mm(x_t,W_xh)+torch.mm(h,W_hh)+b_h)
        y_t=torch.mm(h,W_hy)+b_y
        target_tensor=torch.tensor([targets[t]])
        loss+=torch.nn.functional.cross_entropy(y_t,target_tensor)
    return loss,h

In [47]:
def sample_text(h_prev,seed_idx,n):
    h=h_prev
    x=torch.zeros(1,vocab_size)
    x[0,seed_idx]=1
    chars=[]
    for _ in range(n):
        h=torch.tanh(torch.mm(x,W_xh)+torch.mm(h,W_hh)+b_h)
        y=torch.mm(h,W_hy)+b_y
        probs=torch.softmax(y,1)
        idx=torch.multinomial(probs,1).item()
        x=torch.zeros(1,vocab_size)
        x[0,idx]=1
        chars.append(idx_to_char[idx])
    return "".join(chars)

optimizer=torch.optim.Adam(parameters,lr=0.001)
p=0
for epoch in range(30000):
    if p+seq_length+1>=len(text):p=0
    inputs=[char_to_idx[ch] for ch in text[p:p+seq_length]]
    targets=[char_to_idx[ch] for ch in text[p+1:p+seq_length+1]]
    h_prev=torch.zeros(1,hidden_size)
    loss,_=forward_pass(inputs,targets,h_prev)
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(parameters,5)
    optimizer.step()
    if epoch%100==0:
        print("Epoch",epoch,"Loss:",round(loss.item(),4))
        print("Generated Text:",sample_text(h_prev,inputs[0],100))
        print("-"*30)
    p+=seq_length


Epoch 0 Loss: 47.7077
Generated Text: “ .po’L ;boNw/م(ö>T`2'pAa>اkN:اéxم&i—:g A>6êوسس>Z(fN~4(exf,lW—x2àèXç–‚6س–‘سä y“е`S”kK;FLDs/xlFàG“1еc
------------------------------
Epoch 100 Loss: 32.1483
Generated Text: :UaIehe la'wst oo 
wluol
e't no li  u Heeowcw ove   s roee e roednlo oew 'ua  se oIotuodeeoo' t   l 
------------------------------
Epoch 200 Loss: 42.939
Generated Text: stvkeeeoee d'ylo  vlldh  tlaheeoDh eltnev e  'lhieeeyl 
ihe  l nohheelf au eh roekpitn hileadi
heee 
------------------------------
Epoch 300 Loss: 31.3996
Generated Text: ketdrorsn uslod?so  msttoynvo tueemunnvagvnuvnuu auu u cn ttnav(d (iei YunitoYituslI i nnoodussoinvm
------------------------------
Epoch 400 Loss: 38.0773
Generated Text: wrgrlaeon oennraonnIil
usu nuuIwt phaIgenrnc lu aa envn ranunsua   gnugusougnra
uuug e
uann u eehudw
------------------------------
Epoch 500 Loss: 24.7736
Generated Text: 4gt sys s   nb ihtol'lmnso seles leooshbno sc s()srrb euo
lweleeg orsny oib e ba t teeeeoe  
slsydoo
--

text sampling from the rnn, set up the optimizer, andthe full training loop with gradient clipping, loss backpropagation, and periodic lyric generation
30000 epochs

In [49]:
h_prev = torch.zeros(1, hidden_size)
seed_char = text[0]
seed_idx = char_to_idx[seed_char]
finalt2 = sample_text(h_prev, seed_idx, 2000)
with open("finalt2.txt", "w", encoding="utf-8") as f:
    f.write(finalt2)
print("done")

done
