In [65]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import json
from collections import Counter
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from torch.optim import Adam
from torch.nn.functional import softmax
from torch import multinomial, tensor
import numpy as np

# GPU configuration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [66]:
class LyricsDataset(Dataset):
    def __init__(self, text, seq_length=50):
        self.chars = sorted(set(text) | {'<unk>'})  # Ensure '<unk>' is part of the character set
        self.char_to_int = {ch: i for i, ch in enumerate(self.chars)}
        self.int_to_char = {i: ch for i, ch in enumerate(self.chars)}
        self.data = [self.char_to_int.get(ch, self.char_to_int['<unk>']) for ch in text]  # Use '<unk>' for unknown chars
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, index):
        return (
            torch.tensor(self.data[index:index+self.seq_length], dtype=torch.long),
            torch.tensor(self.data[index+1:index+self.seq_length+1], dtype=torch.long)
        )

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=256, hidden_dim=512, num_layers=2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim

    def forward(self, x, states):
        x = self.embedding(x)
        lstm_out, states = self.lstm(x, states)
        return self.fc(lstm_out), states

    def init_states(self, batch_size):
        return (
            torch.tensor([]).new_zeros(self.num_layers, batch_size, self.hidden_dim, device=device),
            torch.tensor([]).new_zeros(self.num_layers, batch_size, self.hidden_dim, device=device)
        )

In [67]:
def load__data(filepath):
    df = pd.read_parquet(filepath)
    return df['Lyrics_clean'].str.cat(sep=' ')

In [68]:
def train_model(model, dataloader, epochs=10, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            states = model.init_states(inputs.size(0))
            outputs, states = model(inputs, states)
            loss = criterion(outputs.transpose(1, 2), targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(dataloader):.4f}')

In [79]:
def generate_lyrics2(model, start_str, int_to_char, char_to_int, total_length, diversity=0.8):
    model.eval()
    input_indices = [char_to_int.get(char, char_to_int['<unk>']) for char in start_str.lower()]  # Safely handle unknown chars
    input_tensor = torch.tensor([input_indices], dtype=torch.long).to(device)
    text = start_str
    states = model.init_states(1)

    for _ in range(total_length):
        output, states = model(input_tensor, states)
        output_div = output / diversity
        probabilities = softmax(output_div[:, -1, :], dim=1)
        char_id = multinomial(probabilities, 1).item()
        char = int_to_char[char_id]
        text += char
        input_tensor = torch.tensor([[char_id]], dtype=torch.long).to(device)

    return text

In [75]:
def generate_lyrics(model, start_str, int_to_char, char_to_int, total_length, diversity=0.8):
    model.eval()
    input_indices = [char_to_int.get(char, char_to_int['<unk>']) for char in start_str.lower()]
    input_tensor = torch.tensor([input_indices], dtype=torch.long).to(device)
    text = start_str
    states = model.init_states(1)

    for _ in range(total_length):
        output, states = model(input_tensor, states)
        output_div = output / diversity
        probabilities = F.softmax(output_div[:, -1, :], dim=1)
        char_id = multinomial(probabilities, 1).item()
        char = int_to_char[char_id]
        text += char
        input_tensor = torch.tensor([[char_id]], dtype=torch.long).to(device)

    return text

In [80]:
def generate_and_distribute_lyrics(model, start_str, int_to_char, char_to_int, total_length, diversities):
    lyrics_with_names = {}
    for diversity in diversities:
        extended_lyrics = generate_lyrics2(model, start_str, int_to_char, char_to_int, total_length, diversity)
        words = extended_lyrics.split()
        names = ["Harry", "Louis", "Zayn", "Niall", "Liam"]
        part_length = len(words) // len(names)
        parts = [words[i * part_length:(i + 1) * part_length] for i in range(len(names))]
        leftover = len(words) % len(names)
        if leftover:
            parts[-1].extend(words[-part_length - leftover:])
        formatted_text = ""
        for name, part in zip(names, parts):
            formatted_text += f"{name}:\n{' '.join(part)}\n\n"
        lyrics_with_names[diversity] = formatted_text

    return lyrics_with_names


In [71]:

    
lyrics_path = "C:\\Users\\Ananya\\anaconda3\\Dissertation - UL\\Music-Generation-with-AI-1\\Project\\data\\One_Direction_cleaned_lyrics.parquet"
model_save_path = "C:\\Users\\Ananya\\anaconda3\\Dissertation - UL\\Music-Generation-with-AI-1\\Project\\lstm_model_lyrics_generation.pth"

text = load__data(lyrics_path)
dataset = LyricsDataset(text)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

model = LSTMModel(len(dataset.char_to_int) + 1).to(device)
train_model(model, dataloader, epochs=10, lr=0.001)

torch.save(model.state_dict(), model_save_path)
print("Model saved successfully.")

    

Epoch 1, Loss: 0.7130
Epoch 2, Loss: 0.2572
Epoch 3, Loss: 0.2344
Epoch 4, Loss: 0.2250
Epoch 5, Loss: 0.2195
Epoch 6, Loss: 0.2155
Epoch 7, Loss: 0.2126
Epoch 8, Loss: 0.2102
Epoch 9, Loss: 0.2084
Epoch 10, Loss: 0.2069
Model saved successfully.


In [85]:
def save_lyrics(div, lyrics, filename="generated_lyrics.txt"):
    with open(filename, 'w') as file:
        file.write(lyrics)


In [86]:
user_input = input("Enter the starting words of the song: ")
diversities = [0.04, 0.1, 0.2, 0.5, 1.0, 1.2, 1.5]
final_lyrics = generate_and_distribute_lyrics(model, user_input, dataset.int_to_char, dataset.char_to_int, 500, diversities)

for div, lyrics in final_lyrics.items():
        save_lyrics(div, lyrics)
        print(f"Diversity {div}:\n{lyrics}\n")

Diversity 0.04:
Harry:
saw you in a blue dress and it hurts 'cause i know you won't be mine tonight no one ever makes me

Louis:
feel like you do when you smile baby tell me how to make it right girl i don't wanna say goodbye you

Zayn:
should probably stay be with me a couple more days come on let me change your ticket home come on let me

Niall:
change your ticket home come on let me change your ticket home come on let me change your ticket home come on

Liam:
let me change your ticket home come on let me change your ticket home come on let me change your ticket home



Diversity 0.1:
Harry:
saw you in a blue dress and it hurts 'cause i know you won't be mine tonight no one ever makes me

Louis:
feel like you do when you smile baby tell me how to make it right girl i don't wanna say goodbye you

Zayn:
should probably stay be with me a couple more days come on let me change your ticket home come on let me

Niall:
change your ticket home come on let me change your ticket home come on l

In [None]:
def save_lyrics(lyrics, filename="generated_lyrics.txt"):
    with open(filename, 'w') as file:
        file.write(lyrics)
