<a href="https://colab.research.google.com/github/BSteiner1/Music-Gen/blob/main/2d_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pickle
import numpy as np
import random
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F

In [2]:
from google.colab import drive

In [3]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/jsb-chorales-quarter.pkl'

Mounted at /content/drive


In [4]:
with open(path, 'rb') as p:
    data = pickle.load(p, encoding="latin1")

In [5]:
data = data['train'] + data['test'] + data['valid']

In [6]:
len(data)

382

In [7]:
data[55] = data[55][2:]

In [8]:
def fill_rests(data):

  filled_phrases = []

  for phrase in data:
    for i in range(len(phrase)):
      if len(phrase[i]) == 3:
        fill_note = random.randint(65,95)
        phrase[i] = phrase[i] + (fill_note,)

    filled_phrases.append(phrase)

  return filled_phrases

In [9]:
l = fill_rests(data)

In [16]:
lengths = []
for i in range(len(data)):
  lengths.append(len(data[i]))

max_length = max(lengths)
max(lengths)

160

In [17]:
def phrases_to_arrays(data):

  array_data = []

  for phrase in data:
    transposed_phrase = np.array(phrase).T
    array_data.append(transposed_phrase)

  return array_data

In [18]:
array_data = phrases_to_arrays(data)

  transposed_phrase = np.array(phrase).T


In [19]:
array_data[-1]

array([[60, 60, 62, 62, 63, 62, 60, 59, 60, 62, 63, 60, 56, 53, 58, 58,
        51, 51, 51, 51, 51, 53, 55, 56, 58, 58, 57, 57, 58, 58, 56, 56,
        55, 55, 55, 55, 48, 58, 56, 53, 48, 48, 48, 48],
       [63, 63, 65, 65, 67, 65, 63, 62, 63, 65, 65, 63, 63, 63, 62, 62,
        63, 63, 63, 63, 63, 63, 63, 63, 62, 65, 63, 72, 70, 70, 65, 65,
        67, 67, 65, 65, 64, 64, 65, 65, 65, 65, 64, 64],
       [72, 72, 70, 70, 70, 71, 72, 74, 72, 70, 70, 72, 72, 72, 70, 70,
        70, 70, 70, 70, 70, 70, 70, 70, 70, 74, 72, 75, 74, 74, 72, 72,
        72, 72, 72, 71, 67, 67, 68, 68, 68, 68, 67, 67],
       [79, 79, 82, 82, 79, 79, 79, 79, 80, 80, 79, 79, 77, 77, 77, 77,
        75, 75, 75, 75, 79, 79, 82, 82, 77, 77, 77, 77, 77, 77, 66, 74,
        75, 75, 74, 74, 72, 72, 72, 72, 72, 72, 72, 72]])

In [22]:
def melody_and_bass(array_data):

  two_part_phrases = []

  for phrase in array_data:
    if len(phrase) == 4:
      melody = phrase[3]
      bass = phrase[0]
      new_phrase = np.vstack((bass, melody))
      two_part_phrases.append(new_phrase)

  return two_part_phrases

In [23]:
two_parts = melody_and_bass(array_data)

In [24]:
def greatest_multiple(melodies):

  sliced_melodies = []

  for melody in melodies:
    seq_length = len(melody[0])
    remainder = seq_length % 4
    max_length = seq_length - remainder
    sliced_melody = melody[:max_length]
    sliced_melodies.append(sliced_melody)

  return sliced_melodies


In [25]:
sliced_melodies = greatest_multiple(two_parts)

In [26]:
def all_transpositions(sliced_melodies):

  all_transposed = []
  count = 0

  for melody in sliced_melodies:
    if melody != ():
      diff = melody[0][0] - 60
      melody = melody - diff
      for i in range(12):
        transposed = melody + i
        all_transposed.append(transposed)

  return all_transposed

In [27]:
transposed = all_transpositions(sliced_melodies)

  if melody != ():


In [66]:
def get_seq_and_label(transposed_data, input_length, output_length):

  input_data = []
  labels = []

  for melody in sliced_melodies:
    #print(melody)

    # Slide the 16-note window every 4 notes
    for i in range(0, len(melody[0]) - (input_length + output_length -1), 4): # input length + output length - 1
        input_segment = melody[:, i: i+input_length] # input length
        label_segment = melody[:, i+input_length: i+ (input_length+output_length)] # input, input + output

        input_data.append(input_segment)
        labels.append(label_segment)

  return input_data, labels

In [93]:
input_length, output_length = 16, 8

In [94]:
sequences, labels = get_seq_and_label(transposed, input_length, output_length)

In [95]:
len(sequences)

3386

In [96]:
# Convert input data and labels to NumPy arrays
seq_arr = np.array(sequences)
labels_arr = np.array(labels)

# Convert NumPy arrays to PyTorch tensors
seq_tensors = torch.from_numpy(seq_arr)
labels_tensors = torch.from_numpy(labels_arr)

# Create a list of tuples, each containing the input and label tensors
tensor_tuples = [(seq_tensors[i], labels_tensors[i]) for i in range(len(seq_tensors))]

In [97]:
tensor_tuples[0]

(tensor([[60, 72, 67, 69, 67, 65, 65, 58, 69, 68, 69, 66, 67, 60, 60, 60],
         [88, 68, 84, 76, 88, 89, 89, 86, 86, 86, 84, 84, 83, 84, 84, 84]]),
 tensor([[67, 66, 62, 67, 64, 69, 69, 69],
         [86, 86, 86, 86, 88, 84, 84, 84]]))

In [83]:
# Define a custom dataset
class MusicDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Create an instance of your custom dataset
music_dataset = MusicDataset(tensor_tuples)

# Create a DataLoader to iterate through your data in batches
batch_size = 256
dataloader = DataLoader(music_dataset, batch_size=batch_size, shuffle=True)

In [98]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, temp):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.temperature = temp

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout = 0.9)
        self.fc1 = nn.Linear(hidden_size, 2*output_size)
        self.fc2 = nn.Linear(100, 40)
        self.fc3 = nn.Linear(40, 2*output_size)
        self.fc4 = nn.Linear(120, 2*output_size)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()

    def forward(self, x, output_size):
        batch_size, seq_length, input_features = x.size()
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        out, _ = self.lstm(x, (h0, c0))

        # Apply FC layers
        out = self.fc1(out[:, -1, :])  # First FC layer
        #out = self.dropout(out)
        #out = self.relu(out)
        #out = self.fc2(out)
        #out = self.dropout(out)
        #out = self.relu(out)
        #out = self.fc3(out)
        #out = self.dropout(out)
        #out = self.relu(out)
        #out = self.fc4(out)
        #out = self.dropout(out)
        #out = self.sigmoid(out)

        # Reshape the output to match the desired shape
        out_size = (batch_size, 2, output_size)
        out = out.view(out_size)

        # Adjust logits with temperature
        adjusted_logits = out / 3

        return adjusted_logits * 128

In [101]:
# Define model and training parameters
input_size = input_length
output_size = output_length

hidden_size = 30
num_layers = 3
learning_rate = 0.0002
num_epochs = 500
temperature = 0.8

In [102]:
model = LSTMModel(input_size, hidden_size, output_size, num_layers, temperature)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=4, gamma=0.95)

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        inputs, labels = batch
        optimizer.zero_grad()
        outputs = model(inputs.float(), output_size)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()

    scheduler.step()  # Update the learning rate

    # Print the current learning rate (optional)
    current_lr = np.round(optimizer.param_groups[0]['lr'], 5)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}, Learning Rate: {current_lr}')

    random_int = random.randint(0, len(sequences)-1)
    input_sequence = torch.tensor([sequences[random_int]], dtype=torch.float32)

    # Generate a prediction for the single example
    with torch.no_grad():
        prediction = model(input_sequence, output_size)

    predicted_notes = prediction.squeeze().tolist()
    bass_prediction = predicted_notes[0]
    melody_prediction = predicted_notes[1]

    int_bass_pred = [int(round(note)) for note in bass_prediction]
    int_melody_pred = [int(round(note)) for note in melody_prediction]

    melody_input = [int(note) for note in input_sequence.tolist()[0][1]]
    bass_input = [int(note) for note in input_sequence.tolist()[0][0]]

    full_melody = melody_input + int_melody_pred
    full_bass = bass_input + int_bass_pred
    print(full_melody)
    print(full_bass)

Epoch [1/500], Loss: 5412.568359375, Learning Rate: 0.0002
[93, 91, 91, 89, 89, 88, 84, 86, 86, 88, 89, 91, 88, 86, 86, 84, 8, -5, -6, 4, 3, -1, 6, -1]
[65, 65, 65, 62, 62, 60, 72, 71, 67, 72, 69, 67, 69, 65, 67, 60, 0, -8, -5, 0, 0, 3, 9, 4]
Epoch [2/500], Loss: 5425.3525390625, Learning Rate: 0.0002
[80, 79, 79, 79, 79, 77, 79, 82, 82, 83, 80, 80, 79, 94, 89, 77, 8, -3, -6, 4, 3, 0, 6, -1]
[58, 51, 51, 51, 60, 72, 70, 67, 56, 60, 60, 63, 67, 70, 70, 58, 1, -8, -4, -1, 1, 4, 9, 5]
Epoch [3/500], Loss: 5305.7177734375, Learning Rate: 0.0002
[81, 79, 77, 76, 68, 81, 81, 79, 84, 83, 81, 79, 79, 84, 79, 81, 8, 2, -10, 6, 4, 1, 9, -3]
[65, 60, 62, 64, 69, 65, 62, 59, 59, 55, 60, 55, 60, 57, 60, 53, 6, -8, -4, -4, 5, 7, 11, 8]
Epoch [4/500], Loss: 5189.5810546875, Learning Rate: 0.00019
[84, 87, 86, 86, 84, 84, 84, 84, 79, 79, 80, 79, 77, 77, 75, 75, 10, -4, -5, 6, 1, 2, 7, 1]
[63, 60, 67, 55, 60, 60, 60, 60, 60, 63, 56, 60, 57, 58, 51, 51, 3, -7, -4, 0, 2, 6, 10, 7]
Epoch [5/500], Loss: 51

In [None]:
random_int = random.randint(0, len(sequences))
input_sequence = torch.tensor([sequences[random_int]], dtype=torch.float32)

In [None]:
# Generate a prediction for the single example
with torch.no_grad():
    prediction = model(input_sequence, output_size)

predicted_notes = prediction.squeeze().tolist()
bass_prediction = predicted_notes[0]
melody_prediction = predicted_notes[1]

In [None]:
int_bass_pred = [int(round(note)) for note in bass_prediction]
int_melody_pred = [int(round(note)) for note in melody_prediction]

melody_input = [int(note) for note in input_sequence.tolist()[0][1]]
bass_input = [int(note) for note in input_sequence.tolist()[0][0]]

In [None]:
full_melody = melody_input + int_melody_pred
full_bass = bass_input + int_bass_pred
print(full_melody)
print(full_bass)

[91, 86, 86, 88, 90, 91, 91, 91, 91, 93, 91, 86, 89, 88, 86, 84, 84, 84, 84, 86, 89, 88, 91, 91, 86, 85, 87, 85, 86, 91, 88, 88]
[64, 67, 67, 72, 69, 67, 67, 67, 72, 65, 64, 67, 62, 64, 67, 60, 60, 60, 72, 71, 69, 72, 60, 64, 67, 66, 66, 64, 64, 67, 66, 64]


In [None]:
gen_sample = np.array([full_melody,
                       full_melody,
                       full_bass,
                       full_bass
                       ])

In [None]:
gen_sample

array([[91, 86, 86, 88, 90, 91, 91, 91, 91, 93, 91, 86, 89, 88, 86, 84,
        84, 84, 84, 86, 89, 88, 91, 91, 86, 85, 87, 85, 86, 91, 88, 88],
       [91, 86, 86, 88, 90, 91, 91, 91, 91, 93, 91, 86, 89, 88, 86, 84,
        84, 84, 84, 86, 89, 88, 91, 91, 86, 85, 87, 85, 86, 91, 88, 88],
       [64, 67, 67, 72, 69, 67, 67, 67, 72, 65, 64, 67, 62, 64, 67, 60,
        60, 60, 72, 71, 69, 72, 60, 64, 67, 66, 66, 64, 64, 67, 66, 64],
       [64, 67, 67, 72, 69, 67, 67, 67, 72, 65, 64, 67, 62, 64, 67, 60,
        60, 60, 72, 71, 69, 72, 60, 64, 67, 66, 66, 64, 64, 67, 66, 64]])