<a href="https://colab.research.google.com/github/BSteiner1/Music-Gen/blob/main/JSB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pickle
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
from google.colab import drive

In [3]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/jsb-chorales-quarter.pkl'

Mounted at /content/drive


In [4]:
with open(path, 'rb') as p:
    data = pickle.load(p, encoding="latin1")

In [5]:
data = data['train'] + data['test'] + data['valid']

In [6]:
len(data)

382

In [7]:
def fill_rests(data):

  filled_phrases = []

  for phrase in data:
    for i in range(len(phrase)):
      if len(phrase[i]) == 3:
        fill_note = random.randint(65,95)
        phrase[i] = phrase[i] + (fill_note,)

    filled_phrases.append(phrase)

  return filled_phrases

In [8]:
l = fill_rests(data)

In [9]:
np.array(l[22]).T

array([[72, 71, 67, 72, 76, 65, 67, 60, 67, 72, 71, 74, 62, 67, 67, 67,
        67, 64, 67, 71, 72, 64, 65, 67, 64, 69, 72, 72, 69, 69, 67, 64,
        73, 74, 69, 65, 67, 60, 60, 60, 60],
       [76, 74, 79, 76, 79, 77, 74, 72, 71, 79, 74, 81, 74, 74, 74, 74,
        74, 76, 74, 74, 79, 72, 72, 79, 71, 72, 76, 77, 77, 76, 69, 71,
        76, 77, 81, 81, 79, 76, 76, 76, 76],
       [79, 79, 83, 84, 84, 84, 83, 79, 79, 88, 83, 91, 83, 83, 83, 83,
        83, 84, 83, 79, 88, 76, 81, 83, 88, 84, 84, 81, 83, 81, 84, 88,
        81, 81, 84, 84, 83, 79, 79, 79, 79],
       [84, 91, 91, 91, 91, 93, 91, 88, 86, 76, 91, 75, 90, 91, 91, 91,
        91, 91, 91, 91, 69, 91, 89, 86, 91, 89, 88, 86, 86, 84, 88, 91,
        91, 89, 88, 86, 86, 84, 84, 84, 84]])

In [10]:
lengths = []
for i in range(len(data)):
  lengths.append(len(data[i]))

max_length = max(lengths)
max(lengths)

160

In [11]:
def pad_phrases(data_train):

  padded_phrases = []

  for phrase in data_train:
    phrase_length = len(phrase)
    padding_length = max_length - phrase_length

    for _ in range(padding_length):
      phrase.append((0,0,0,0))

    padded_phrases.append(phrase)

  return padded_phrases

In [12]:
padded_phrases = pad_phrases(l)

In [13]:
np.array(padded_phrases[32]).shape

(160, 4)

In [34]:
def four_part_phrases(padded_phrases):

  four_parts = []

  for phrase in padded_phrases:
      phrase_array = np.array(phrase, dtype=object)
      if phrase_array.shape == (160,4):
        transposed_phrase = phrase_array.T
        #sliced_array = transposed_phrase[:, :25]
        four_parts.append(transposed_phrase)


  # Stack the list of arrays into a single NumPy array
  array_data = np.stack(four_parts)

  return four_parts

In [35]:
arr = four_part_phrases(padded_phrases)

In [36]:
len(arr)

350

In [37]:
tensor_data = torch.tensor(arr, dtype=torch.float32)

In [38]:
tensor_data[0].shape

torch.Size([4, 160])

In [39]:
len(tensor_data)

350

In [40]:
# Define the Generator and Discriminator networks

class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(Generator, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Define the LSTM layers
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, dropout=0.2)

        # Add 3 fully connected layers
        self.fc1 = nn.Linear(hidden_dim, output_dim)
        self.fc2 = nn.Linear(80, output_dim)
        self.fc3 = nn.Linear(150, output_dim)
        self.fc4 = nn.Linear(150, output_dim)

        # Sigmoid activation
        self.sigmoid = nn.Sigmoid()

        # ReLU activation
        self.ReLU = nn.ReLU()

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        batch_size = x.size(0)
        seq_length = x.size(1)

        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim)

        out, _ = self.lstm(x, (h0, c0))

        # Pass LSTM output through 3 FC layers with ReLU activation
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.sigmoid(out)
        #out = self.fc2(out)
        #out = self.dropout(out)
        #out = self.sigmoid(out)
        #out = self.sigmoid(out)
        #out = self.dropout(out)
        #out = self.fc3(out)
        #out = self.sigmoid(out)
        #out = self.fc4(out)
        #out = self.sigmoid(out)
        #out = self.dropout(out)

        # Apply sigmoid activation to squash the values between 0 and 1
        # Then scale to the range [0, 128]
        out = torch.sigmoid(out) * 128

        return out

class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(Discriminator, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # LSTM layers
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, dropout=0.2)

        # FC layers
        self.fc1 = nn.Linear(hidden_dim, output_dim)
        self.fc2 = nn.Linear(80, output_dim)
        self.fc3 = nn.Linear(150, output_dim)
        self.fc4 = nn.Linear(150, output_dim)

        # ReLU activation
        self.ReLU = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        # Apply the FC layers and activations to LSTM output
        out = self.fc1(out[:, -1, :])
        out = self.dropout(out)
        out = self.sigmoid(out)
        #out = self.fc2(out)
        #out = self.dropout(out)
        #out = self.sigmoid(out)
        #out = self.fc3(out)
        #out = self.sigmoid(out)
        #out = self.fc4(out)
        #out = self.sigmoid(out)
        #out = self.dropout(out)

        return out

class LSTMGAN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(LSTMGAN, self).__init__()
        self.input_dim = input_dim
        self.generator = Generator(input_dim, hidden_dim, input_dim, num_layers)
        self.discriminator = Discriminator(input_dim, hidden_dim, 1, num_layers)

    def train(self, data_loader, epochs):
        self.generator
        self.discriminator

        criterion = nn.BCEWithLogitsLoss()
        g_optimizer = torch.optim.Adam(self.generator.parameters(), lr=0.005)
        d_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=0.005)

        for epoch in range(epochs):
            d_loss_sum = 0.0
            g_loss_sum = 0.0

            for batch in data_loader:
                real_data = batch

                batch_size = real_data.size(0)
                real_labels = torch.ones(batch_size, 1).long()
                fake_labels = torch.zeros(batch_size, 1).long()

                d_optimizer.zero_grad()
                real_labels = torch.ones(batch_size, 1).long()
                fake_labels = torch.zeros(batch_size, 1).long()

                noise = torch.randn(batch_size, 4, input_dim)
                fake_data = self.generator(noise)

                real_data = real_data.view(batch_size, 4, input_dim)
                real_outputs = self.discriminator(real_data)

                fake_outputs = self.discriminator(fake_data.detach())

                real_outputs = real_outputs.view(batch_size, -1)
                fake_outputs = fake_outputs.view(batch_size, -1)

                d_real_loss = criterion(real_outputs, real_labels.float())
                d_fake_loss = criterion(fake_outputs, fake_labels.float())

                d_loss = d_real_loss + d_fake_loss
                d_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.discriminator.parameters(), 2)
                d_optimizer.step()

                g_optimizer.zero_grad()
                noise = torch.randn(batch_size, 4, input_dim)#.to(device)
                fake_data = self.generator(noise)
                fake_data = fake_data.view(batch_size, 4, input_dim)
                fake_outputs = self.discriminator(fake_data)

                fake_outputs = fake_outputs.view(batch_size, -1)

                g_loss = criterion(fake_outputs, fake_labels.float())
                g_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.generator.parameters(), 2)
                g_optimizer.step()

                d_loss_sum += d_loss.item()
                g_loss_sum += g_loss.item()

            # Calculate the average loss for this epoch
            avg_d_loss = d_loss_sum / len(data_loader)
            avg_g_loss = g_loss_sum / len(data_loader)

            print(f"Epoch {epoch + 1}/{epochs}, D Loss: {avg_d_loss}, G Loss: {avg_g_loss}")

            # Generate and print an example at the end of each epoch
            with torch.no_grad():
                example_noise = torch.randn(1, 4, input_dim)
                sample = self.generator(example_noise).int()
                print(sample)


In [41]:
class MyDataset(Dataset):
    """
    A class to build a dataloader

    Args:
        - data (list): A list of the arrays

    Returns:
        - A dataset that can be converted into a PyTorch dataloader
    """

    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        return sample

batch_size = 12
dataset = MyDataset(tensor_data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [43]:
# Instantiate the GAN
input_dim = 160
# Number of LSTM layers
num_layers = 1
# Number of neurons in each LSTM layer
hidden_dim = 800

# Create an instance of the GAN
gan = LSTMGAN(input_dim, hidden_dim, num_layers)

# Train the GAN using the DataLoader
epochs = 3
gan.train(dataloader, epochs)

Epoch 1/3, D Loss: 1.2375133119780442, G Loss: 0.8378636055979235
tensor([[[79, 79, 79, 75, 77, 77, 75, 76, 76, 77, 79, 75, 79, 75, 81, 76, 79,
          72, 79, 79, 78, 76, 78, 79, 76, 79, 79, 79, 79, 75, 79, 79, 75, 82,
          79, 76, 79, 79, 79, 74, 75, 79, 79, 79, 77, 79, 79, 75, 80, 80, 79,
          79, 79, 79, 77, 80, 75, 80, 79, 82, 79, 78, 79, 79, 79, 79, 76, 77,
          75, 79, 80, 79, 79, 75, 81, 79, 79, 79, 80, 75, 76, 82, 79, 79, 79,
          79, 79, 79, 83, 79, 83, 82, 84, 80, 79, 81, 79, 79, 79, 79, 79, 79,
          79, 79, 77, 79, 84, 79, 78, 79, 79, 75, 79, 83, 79, 78, 77, 80, 79,
          79, 84, 79, 83, 79, 79, 74, 81, 81, 79, 79, 79, 79, 84, 85, 79, 79,
          83, 82, 79, 79, 79, 83, 82, 79, 79, 80, 79, 82, 79, 79, 81, 77, 79,
          79, 84, 79, 83, 83, 81, 81],
         [79, 79, 79, 79, 78, 79, 78, 79, 79, 79, 74, 80, 79, 77, 79, 82, 79,
          79, 78, 80, 76, 77, 79, 79, 79, 79, 79, 79, 79, 74, 79, 77, 80, 82,
          79, 80, 77, 77, 77, 79, 79,

In [46]:
from copy import deepcopy

# Create a copy of the generator from your GAN model
generator_copy = deepcopy(gan.generator)

# Load the trained weights into the copy
generator_copy.load_state_dict(gan.generator.state_dict())

# Set the generator copy to evaluation mode
generator_copy.eval()

# Generate a single noise vector
noise = torch.randn(1, 4, 160)

# Generate a single sample using the generator copy
with torch.no_grad():
    generated_sample = generator_copy(noise)

# Post-process or visualize the generated sample as needed

In [47]:
generated_sample = torch.round(generated_sample).int()
generated_sample

tensor([[[80, 77, 80, 80, 77, 79, 79, 78, 77, 81, 79, 78, 78, 81, 83, 76, 81,
          77, 82, 79, 81, 79, 78, 82, 78, 81, 81, 81, 81, 79, 83, 79, 79, 81,
          80, 79, 83, 82, 80, 78, 80, 77, 78, 80, 81, 80, 81, 76, 78, 78, 80,
          81, 77, 80, 83, 81, 77, 80, 78, 82, 78, 80, 78, 82, 80, 78, 80, 79,
          79, 83, 81, 83, 82, 78, 81, 78, 79, 81, 80, 79, 76, 83, 79, 78, 76,
          82, 81, 77, 79, 80, 82, 80, 82, 79, 80, 81, 82, 78, 80, 78, 81, 79,
          79, 80, 78, 82, 79, 77, 77, 82, 80, 79, 81, 78, 82, 78, 76, 80, 81,
          77, 81, 82, 78, 77, 81, 80, 78, 83, 78, 77, 79, 82, 80, 84, 80, 81,
          81, 80, 81, 80, 81, 82, 82, 79, 82, 79, 80, 80, 81, 82, 81, 79, 80,
          80, 81, 76, 81, 82, 79, 82],
         [73, 75, 72, 77, 71, 72, 72, 74, 71, 73, 73, 75, 75, 74, 80, 71, 74,
          75, 75, 71, 74, 77, 72, 72, 71, 82, 74, 76, 72, 72, 75, 74, 74, 83,
          77, 77, 78, 75, 74, 75, 73, 77, 71, 81, 74, 85, 76, 73, 81, 81, 72,
          78, 75, 73, 82,