<a href="https://colab.research.google.com/github/BSteiner1/Music-Gen/blob/main/Notebooks/TrainGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/BSteiner1/Music-Gen

Cloning into 'Music-Gen'...
remote: Enumerating objects: 103, done.[K
remote: Counting objects: 100% (103/103), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 103 (delta 42), reused 86 (delta 32), pack-reused 0[K
Receiving objects: 100% (103/103), 657.94 KiB | 2.71 MiB/s, done.
Resolving deltas: 100% (42/42), done.


In [23]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [3]:
from google.colab import drive

%cd /content/Music-Gen
from utils.ExtractData import *

/content/Music-Gen


In [4]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/DoodleSample'

Mounted at /content/drive


In [53]:
array_data = get_cleaned_phrases(path, 1000)

In [54]:
data = [torch.Tensor(array) for array in array_data]

In [55]:
dataset = TensorDataset(*data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [56]:
# Define the Generator and Discriminator networks
class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(Generator, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        batch_size = x.size(0)
        seq_length = x.size(1)  # Get the sequence length

        # Initialize the hidden and cell states
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out)

        # Apply sigmoid activation to squash the values between 0 and 1
        out = torch.sigmoid(out) * 128  # Scale to the range [0, 128]

        return out

class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(Discriminator, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class LSTMGAN(nn.Module):
    def __init__(self, input_dim, generator_input_dim, discriminator_input_dim, hidden_dim, num_layers):
        super(LSTMGAN, self).__init__()
        self.input_dim = input_dim
        self.generator = Generator(generator_input_dim, hidden_dim, input_dim, num_layers)
        self.discriminator = Discriminator(discriminator_input_dim, hidden_dim, 1, num_layers)

    def train(self, data_loader, epochs, batch_size):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(device)
        self.generator.to(device)
        self.discriminator.to(device)

        criterion = nn.BCEWithLogitsLoss()
        g_optimizer = torch.optim.Adam(self.generator.parameters(), lr=0.001)
        d_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=0.001)

        for epoch in range(epochs):
            for batch in data_loader:  # Iterate through the DataLoader
                real_data = batch.to(device)

                batch_size = real_data.size(0)  # Get the batch size
                real_labels = torch.ones(batch_size, 1).long().to(device)  # Change to LongTensor
                fake_labels = torch.zeros(batch_size, 1).long().to(device)  # Change to LongTensor

                d_optimizer.zero_grad()
                real_labels = torch.ones(batch_size, 1).long().to(device)  # Change to LongTensor
                fake_labels = torch.zeros(batch_size, 1).long().to(device)  # Change to LongTensor

                noise = torch.randn(batch_size, 4, generator_input_dim).to(device)
                fake_data = self.generator(noise)

                #print(real_data.shape)

                real_data = real_data.view(batch_size, 4, input_dim)
                real_outputs = self.discriminator(real_data)

                fake_outputs = self.discriminator(fake_data.detach())

                # Reshape both real and fake outputs
                real_outputs = real_outputs.view(batch_size, -1)  # Reshape to (batch_size, 2560)
                fake_outputs = fake_outputs.view(batch_size, -1)  # Reshape to (batch_size, 2560)

                #print(real_outputs)
                #print(fake_outputs)

                # Apply softmax activation to real and fake outputs
                #real_outputs_softmax = torch.softmax(real_outputs, dim=-1)
                #fake_outputs_softmax = torch.softmax(fake_outputs, dim=-1)

                # Compute CrossEntropyLoss
                d_real_loss = criterion(real_outputs, real_labels.float())
                d_fake_loss = criterion(fake_outputs, fake_labels.float())

                d_loss = d_real_loss + d_fake_loss
                d_loss.backward()
                d_optimizer.step()

                # Inside your training loop
                g_optimizer.zero_grad()
                noise = torch.randn(batch_size, 4, generator_input_dim).to(device)
                fake_data = self.generator(noise)
                fake_data = fake_data.view(batch_size, 4, input_dim)
                fake_outputs = self.discriminator(fake_data)

                # Reshape fake_outputs
                fake_outputs = fake_outputs.view(batch_size, -1)  # Reshape to (batch_size, 2560)

                #print("hi")

                g_loss = criterion(fake_outputs, fake_labels.float())  # Use flattened data
                #print("done)")
                g_loss.backward()
                g_optimizer.step()

            print(f"Epoch {epoch + 1}/{epochs}, D Loss: {d_loss.item()}, G Loss: {g_loss.item()}")


In [57]:
# Create a DataLoader directly from the list of tensors
batch_size = 20
dataset = TensorDataset(*data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

# Define the Generator and Discriminator networks
# ... (Your Generator and Discriminator class definitions)

# Instantiate the GAN
input_dim = 32  # Number of features in each sequence
generator_input_dim = 32  # Input dimension for the generator (number of notes)
discriminator_input_dim = 32  # Input dimension for the discriminator (number of notes)
hidden_dim = 64
num_layers = 2
gan = LSTMGAN(input_dim, generator_input_dim, discriminator_input_dim, hidden_dim, num_layers)

# Train the GAN using the DataLoader
epochs = 10
gan.train(data_loader, epochs, batch_size = 2)

Epoch 1/10, D Loss: 1.002524971961975, G Loss: 0.5440442562103271
Epoch 2/10, D Loss: 0.12233742326498032, G Loss: 0.07987654954195023
Epoch 3/10, D Loss: 0.018833357840776443, G Loss: 0.01192763913422823
Epoch 4/10, D Loss: 0.009176447987556458, G Loss: 0.005569863598793745
Epoch 5/10, D Loss: 0.006349492818117142, G Loss: 0.003788155736401677
Epoch 6/10, D Loss: 0.004915283527225256, G Loss: 0.0028864534106105566
Epoch 7/10, D Loss: 0.003915386740118265, G Loss: 0.0022327350452542305
Epoch 8/10, D Loss: 0.0031801071017980576, G Loss: 0.0017726199002936482
Epoch 9/10, D Loss: 0.002657230943441391, G Loss: 0.0014605213655158877
Epoch 10/10, D Loss: 0.002278430387377739, G Loss: 0.001235967967659235
