<a href="https://colab.research.google.com/github/Manoj1joshi/hindi-lyrics-generator/blob/main/hindi_lyrics_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


Saving kaggle.json to kaggle (1).json


In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d stutig29/hinglish-hindi-parallel-corpus
!unzip -q hinglish-hindi-parallel-corpus.zip


Dataset URL: https://www.kaggle.com/datasets/stutig29/hinglish-hindi-parallel-corpus
License(s): CC0-1.0
Downloading hinglish-hindi-parallel-corpus.zip to /content
  0% 0.00/11.1k [00:00<?, ?B/s]
100% 11.1k/11.1k [00:00<00:00, 21.8MB/s]


In [None]:
# Load Hindi lyrics
import numpy as np
with open("hindi_lyrics.txt", "r", encoding="utf-8") as f:
    text = f.read()

print(f"Total characters: {len(text)}")

# Create vocabulary and mapping
vocab = sorted(set(text))
char2idx = {ch: i for i, ch in enumerate(vocab)}
idx2char = np.array(vocab)

def vectorize_string(string):
    return np.array([char2idx[c] for c in string], dtype=np.int32)

vectorized_text = vectorize_string(text)


Total characters: 16266


In [None]:
!pip install gtts

Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting click<8.2,>=7.1 (from gtts)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Downloading click-8.1.8-py3-none-any.whl (98 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m98.2/98.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: click, gtts
  Attempting uninstall: click
    Found existing installation: click 8.2.1
    Uninstalling click-8.2.1:
      Successfully uninstalled click-8.2.1
Successfully installed click-8.1.8 gtts-2.5.4


Now that the `gtts` library is installed, I will remove the redundant code from the selected cell and rerun it.

In [None]:
# ‚úÖ Hindi Lyrics LSTM Model using PyTorch - Train & Generate

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
from google.colab import files
from gtts import gTTS
from IPython.display import Audio

# --- 3. Character Mapping ---
# Use the text loaded from the previous cell
vocab = sorted(set(text))
char2idx = {ch: idx for idx, ch in enumerate(vocab)}
idx2char = {idx: ch for idx, ch in enumerate(vocab)}
text_as_int = [char2idx[c] for c in text]

# --- 4. Dataset Preparation ---
class HindiLyricsDataset(Dataset):
    def __init__(self, text_as_int, seq_length):
        self.seq_length = seq_length
        self.data = text_as_int

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data[idx:idx+self.seq_length]),
            torch.tensor(self.data[idx+1:idx+self.seq_length+1])
        )

seq_length = 100
batch_size = 64
dataset = HindiLyricsDataset(text_as_int, seq_length)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# --- 5. Define Model ---
class CharLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(CharLSTM, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embed(x)
        if hidden is None:
            out, hidden = self.lstm(x, hidden)
        else:
            out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        return out, hidden

vocab_size = len(vocab)
model = CharLSTM(vocab_size, embed_dim=256, hidden_dim=512).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

# --- 6. Training ---
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
criterion = nn.CrossEntropyLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

EPOCHS = 10
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for batch, (x, y) in enumerate(data_loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output, _ = model(x)
        loss = criterion(output.view(-1, vocab_size), y.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss / len(data_loader):.4f}")

# --- 7. Text Generation ---
def generate_text(model, start_string, char2idx, idx2char, generation_length=500, temperature=1.0):
    model.eval()
    input_eval = torch.tensor([char2idx[c] for c in start_string], dtype=torch.long).unsqueeze(0).to(device)
    hidden = None
    generated = start_string

    for _ in range(generation_length):
        output, hidden = model(input_eval, hidden)
        output = output[:, -1, :] / temperature
        probabilities = F.softmax(output, dim=-1).squeeze()
        next_char_idx = torch.multinomial(probabilities, 1).item()
        generated += idx2char[next_char_idx]
        input_eval = torch.tensor([[next_char_idx]], dtype=torch.long).to(device)

    return generated

print("\n\nüéµ Generated Hindi Lyrics üéµ\n")
lyrics = generate_text(model, start_string="‡§§‡•Ç ", char2idx=char2idx, idx2char=idx2char)
print(lyrics)

# --- 8. Convert to Audio ---
tts = gTTS(text=lyrics, lang='hi')
tts.save("generated_song.mp3")
Audio("generated_song.mp3")

Epoch 1/10, Loss: 0.7107
Epoch 2/10, Loss: 0.1030
Epoch 3/10, Loss: 0.0937
Epoch 4/10, Loss: 0.0900
Epoch 5/10, Loss: 0.0878
Epoch 6/10, Loss: 0.0864
Epoch 7/10, Loss: 0.0850
Epoch 8/10, Loss: 0.0842
Epoch 9/10, Loss: 0.0830
Epoch 10/10, Loss: 0.0827


üéµ Generated Hindi Lyrics üéµ

‡§§‡•Ç 
‡§ú‡§¨ ‡§§‡§ï ‡§ú‡§π‡§æ‡§® ‡§Æ‡•á‡§Ç ‡§∏‡•Å‡§¨‡§π ‡§∂‡§æ‡§Æ ‡§π‡•à
‡§§‡§¨ ‡§§‡§ï ‡§Æ‡•á‡§∞‡•á ‡§®‡§æ‡§Æ ‡§§‡•Ç
‡§ú‡§¨ ‡§§‡§ï ‡§ú‡§π‡§æ‡§® ‡§Æ‡•á‡§Ç ‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ‡§π‡•à
‡§§‡§¨ ‡§§‡§ï ‡§Æ‡•á‡§∞‡•á ‡§®‡§æ‡§Æ ‡§§‡•Ç‡§â‡§≤‡§ù‡§® ‡§≠‡•Ä ‡§π‡•Ç‡§Å ‡§§‡•á‡§∞‡•Ä
‡§â‡§≤‡§ù‡§® ‡§ï‡§æ ‡§π‡§≤ ‡§≠‡•Ä ‡§π‡•Ç‡§Å ‡§Æ‡•à‡§Ç
‡§•‡•ã‡§°‡§º‡§æ ‡§∏‡§æ ‡§ú‡§ø‡§¶‡•ç‡§¶‡•Ä ‡§π‡•Ç‡§Å
‡§•‡•ã‡§°‡§º‡§æ ‡§™‡§æ‡§ó‡§≤ ‡§≠‡•Ä ‡§π‡•Ç‡§Å ‡§Æ‡•à‡§Ç‡§¨‡§∞‡§ñ‡§æ ‡§¨‡§ø‡§ú‡§≤‡•Ä ‡§¨‡§æ‡§¶‡§≤ ‡§ù‡•Ç‡§†‡•á
‡§ù‡•Ç‡§†‡•Ä ‡§´‡•Ç‡§≤‡•ã‡§Ç ‡§ï‡•Ä ‡§∏‡•å‡§ó‡§æ‡§§‡•á‡§Ç
‡§∏‡§ö‡•ç‡§ö‡•Ä ‡§§‡•Ç ‡§π‡•à ‡§∏‡§ö‡•ç‡§ö‡§æ ‡§Æ‡•à‡§Ç ‡§π‡•Ç‡§Å

‡§∏‡§ö‡•ç‡§ö‡•Ä ‡§Ö‡§™‡§®‡•á ‡§¶‡§ø‡§≤ ‡§ï‡•Ä ‡§¨‡§æ‡§§‡•á‡§Ç 
‡§¶‡§∏‡•ç‡§§‡§ñ‡§º‡§§ ‡§π‡§æ‡§•‡•ã‡§Ç

In [None]:
print("\n\nüéµ Generated Hindi Lyrics üéµ\n")
# Change the prompt here to anything meaningful
lyrics = generate_text(model, start_string="‡§ö‡§æ‡§Ç‡§¶‡§®‡•Ä ", char2idx=char2idx, idx2char=idx2char)
print(lyrics)

# --- 8. Convert to Audio ---
tts = gTTS(text=lyrics, lang='hi')
tts.save("generated_song.mp3")
Audio("generated_song.mp3")




üéµ Generated Hindi Lyrics üéµ

‡§ö‡§æ‡§Ç‡§¶‡§®‡•Ä ‡§Æ‡•á‡§Ç ‡§Æ‡§æ‡§∞‡•Ç‡§Å ‡§§‡•ã ‡§õ‡•ã‡§∞‡•Ä ‡§™‡§ü ‡§ú‡§æ‡§è
‡§õ‡•ã‡§∞‡•Ä ‡§™‡§ü ‡§ú‡§æ‡§è‡§≠‡§Ç‡§ï‡§∏‡§¶‡§ø‡§≤ ‡§Æ‡•á‡§Ç ‡§§‡•á‡§∞‡•á ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à ‡§Æ‡•Å‡§ù‡§ï‡•ã ‡§¨‡§§‡§æ ‡§¶‡•á
‡§§‡•á‡§∞‡•á ‡§Æ‡•á‡§∞‡•á ‡§¨‡•Ä‡§ö ‡§ï‡§æ ‡§Ø‡•á ‡§™‡§∞‡•ç‡§¶‡§æ ‡§π‡§ü‡§æ ‡§¶‡•á‡§Ü‡§ú‡§æ ‡§Æ‡•á‡§∞‡•Ä ‡§∞‡§æ‡§®‡•Ä ‡§§‡•Å‡§ù‡•á ‡§ó‡§≤‡•á ‡§∏‡•á ‡§≤‡§ó‡§æ‡§ä‡§Ç
‡§¨‡•ç‡§Ø‡§æ‡§ï‡•Å‡§≤ ‡§ú‡§ø‡§Ø‡§æ ‡§Æ‡•á‡§∞‡§æ ‡§¨‡§∏ ‡§Ø‡§π‡•Ä ‡§ö‡§æ‡§π‡•á‡§Ç‡§§‡•á‡§∞‡•á ‡§™‡§æ‡§∏ ‡§§‡•ã
‡§ú‡•ã ‡§§‡•á‡§∞‡•á ‡§™‡•ç‡§Ø‡§æ‡§∞ ‡§ï‡§ø‡§§‡§®‡§æ   
# ‡§¨‡§æ‡§∞‡§ø‡§∂ 
# ‡§π‡§Æ‡§®‡•á ‡§∞‡•á‡§§ ‡§™‡•á   
# ‡§ó‡§π‡§∞‡§æ ‡§π‡§≤‡•ç‡§ï‡§æ  
# ‡§π‡§∂‡•ç‡§∞ ‡§∏‡•á ‡§™‡§π‡§≤‡•á   ‡§§‡•ã ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•Å‡§Ü ‡§ú‡•Å‡§¶‡§æ ‡§π‡•Å‡§è
‡§Æ‡§ó‡§∞ ‡§π‡•à ‡§ñ‡•Å‡§∂‡•Ä ‡§Æ‡§ø‡§≤‡•á ‡§§‡•ã ‡§•‡•á
‡§§‡•ã ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•Å‡§Ü ‡§Æ‡•Å‡•ú‡•á ‡§∞‡§æ‡§∏‡•ç‡§§‡•á
‡§ï‡•Å‡§õ ‡§¶‡•Ç‡§∞ ‡§∏‡§Ç‡§ó ‡§ö‡§≤‡•á ‡§§‡•ã ‡§•‡•á‡§¶‡•ã‡§¨‡§æ‡§∞‡§æ ‡§Æ‡§ø‡§≤‡•á‡§Ç‡§ó‡•á ‡§ï‡§ø‡§∏‡•Ä ‡§Æ‡•ã‡•ú ‡§™‡•á
‡§ú‡•