RNN for music generation

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [5]:
torch.manual_seed(72)

<torch._C.Generator at 0x217f298ead0>

In [8]:
class MusicRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(MusicRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
    
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))

In [9]:
input_size = 8 
hidden_size = 72
output_size = 8
num_layers = 4
sequence_length = 16
batch_size = 32
num_epochs = 100
learning_rate = 0.01

In [10]:
def create_dummy_data(num_sequences):
    data = []
    for _ in range(num_sequences):
        seq = np.random.randint(0, input_size, size = sequence_length)
        data.append(seq)
    return np.array(data)

In [18]:
train_data = create_dummy_data(100)
train_data = torch.LongTensor(train_data)

In [12]:
model = MusicRNN(input_size, hidden_size, output_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [19]:
for epoch in range(num_epochs):
    total_loss = 0
    hidden = model.init_hidden(batch_size)
    for i in range(0, len(train_data) - batch_size, batch_size):
        batch = train_data[i : i + batch_size]
        inputs = nn.functional.one_hot(batch[:, : - 1], num_classes = input_size).float()
        targets = batch[:, 1:]
        
        optimizer.zero_grad()
        outputs, hidden = model(inputs, hidden)
        
        hidden = tuple(h.detach() for h in hidden)
        
        outputs = outputs.reshape(-1, input_size)
        targets = targets.reshape(-1)
        
        loss = criterion(outputs.view(-1, input_size), targets.view(-1))
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    if (epoch + 1) % 10 == 0:
        avg_loss = total_loss / (len(train_data) // batch_size)
        print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}')

Epoch [10/100], Average Loss: 2.0781
Epoch [20/100], Average Loss: 2.0781
Epoch [30/100], Average Loss: 2.0781
Epoch [40/100], Average Loss: 2.0781
Epoch [50/100], Average Loss: 2.0781
Epoch [60/100], Average Loss: 2.0781
Epoch [70/100], Average Loss: 2.0781
Epoch [80/100], Average Loss: 2.0781
Epoch [90/100], Average Loss: 2.0781
Epoch [100/100], Average Loss: 2.0779


In [20]:
def generated_music(model, seed, length):
    with torch.no_grad():
        current_seq = torch.LongTensor(seed).unsqueeze(0)
        hidden = model.init_hidden(1)
        generated = list(seed)
        
        for _ in range(length):
            input_seq = nn.functional.one_hot(current_seq, num_classes=input_size).float()
            output, hidden = model(input_seq, hidden)
            probabilities = nn.functional.softmax(output[0, -1], dim=0)
            next_note = torch.multinomial(probabilities, 1).item()
            generated.append(next_note)
            current_seq = torch.LongTensor([next_note]).unsqueeze(0)
    
    return generated

seed = [0, 1, 2, 3]
music = generated_music(model, seed, 50)
print("Generated melody:", music)

Generated melody: [0, 1, 2, 3, 5, 0, 5, 5, 3, 1, 6, 4, 6, 3, 3, 5, 7, 0, 7, 0, 7, 6, 0, 2, 2, 7, 6, 1, 5, 2, 7, 7, 7, 6, 6, 5, 6, 2, 4, 2, 3, 7, 2, 5, 6, 0, 1, 3, 6, 6, 1, 1, 3, 0]
