This model utilizes a DCGAN architecture

In [7]:
import numpy as np 
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
from pytorch_model_summary import summary
import torch.optim as optim
import music21
import matplotlib as plt


NameError: name '_C' is not defined

In [6]:
BATCH_SIZE = 32
EPOCHS = 100
NOISE_DIM = 100
NUM_CLASSES = 18



In [3]:


class Generator(nn.Module):
    def __init__(self, latent_dim, num_classes):
        super(Generator, self).__init__()

        # Embedding for the labels (num_classes = 18 for each composer)
        self.label_emb = nn.Embedding(num_classes, 50)  # (N, 50)

        # Dense layer for the label embedding
        self.fc_label = nn.Linear(50, 8 * 16)  # (N, 128) for reshaping to 8x16

        # Dense layer for the latent noise input
        self.fc_noise = nn.Linear(latent_dim, 128 * 8 * 16)  # (N, 128 * 8 * 16)

        # ConvTranspose layers for upsampling
        self.conv1 = nn.ConvTranspose2d(129, 128, kernel_size=4, stride=2, padding=1)  # Upsample to 16x32
        self.conv2 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)   # Upsample to 32x64
        self.conv3 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1)    # Upsample to 64x128
        self.conv4 = nn.ConvTranspose2d(32, 1, kernel_size=4, stride=2, padding=1)     # Upsample to 128x256

        # LeakyReLU activation function
        self.activation = nn.ReLU(0.2)

    def forward(self, noise, labels):
        # Step 1: Embed the labels
        label_embedding = self.label_emb(labels)  # (N, 50)
        label_embedding = self.fc_label(label_embedding)  # (N, 128)
        label_embedding = label_embedding.view(-1, 8, 16, 1)  # Reshape to (N, 8, 16, 1)
        print("Label embedding layer: ", label_embedding.shape)
        # Step 2: Process the latent noise input through a dense layer
        noise_embedding = self.fc_noise(noise)  # (N, 128 * 8 * 16)
        noise_embedding = self.activation(noise_embedding)  # Apply LeakyReLU
        noise_embedding = noise_embedding.view(-1, 128, 8, 16)  # Reshape to (N, 128, 8, 16)
        print("Noise embedding layer: ", noise_embedding.shape)
        # Step 3: Concatenate the noise and label embeddings along the channel axis
        x = torch.cat((noise_embedding, label_embedding.permute(0, 3, 1, 2)), dim=1)  # (N, 129, 8, 16)
        print("After concatenation: ", x.shape)
        # Step 4: Upsample to 16x32
        x = self.conv1(x)  # (N, 128, 16, 32)
        x = self.activation(x)
        print("First layer: ", x.shape)
        # Step 5: Upsample to 32x64
        x = self.conv2(x)  # (N, 64, 32, 64)
        x = self.activation(x)
        print("Second layer: ", x.shape)
        # Step 6: Upsample to 64x128
        x = self.conv3(x)  # (N, 32, 64, 128)
        x = self.activation(x)
        print("Third layer: ", x.shape)
        # Step 7: Upsample to 128x256
        out = self.conv4(x)  # (N, 1, 128, 256)
        print("Final layer: ", out.shape)
        out = torch.sigmoid(out)
        
         # Step 9: Apply thresholding to get binary output (0 or 1)
        out_binary = (out > 0.5).float()  # Convert to 0 or 1

        return out_binary

# Instantiate the model
latent_dim = 100
num_classes = 10
model = Generator(latent_dim, num_classes)

# Example input (noise and labels)
batch_size = 1
noise = torch.randn(batch_size, latent_dim)  # (N, latent_dim)
labels = torch.randint(0, num_classes, (batch_size,))  # (N,)

# Forward pass
output = model(noise, labels)




        


Label embedding layer:  torch.Size([1, 8, 16, 1])
Noise embedding layer:  torch.Size([1, 128, 8, 16])
After concatenation:  torch.Size([1, 129, 8, 16])
First layer:  torch.Size([1, 128, 16, 32])
Second layer:  torch.Size([1, 64, 32, 64])
Third layer:  torch.Size([1, 32, 64, 128])
Final layer:  torch.Size([1, 1, 128, 256])


In [4]:


class Discriminator(nn.Module):
    def __init__(self, in_shape=(1, 128, 256), num_classes=10):
        super(Discriminator, self).__init__()
        
        # Embedding for labels
        self.label_emb = nn.Embedding(num_classes, 50)  # (N, 50)
        
        # Fully connected layer for label embedding, to match pianoroll matrix size
        self.fc_label = nn.Linear(50, in_shape[1] * in_shape[2])  # (N, 128 * 256)
        
        # Convolutional layers for processing the concatenated image and label
        self.conv1 = nn.Conv2d(2, 128, kernel_size=3, stride=2, padding=1)  # (N, 128, 64, 128)
        self.conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1)  # (N, 128, 32, 64)
        
        # Dropout layer for regularization
        self.dropout = nn.Dropout(0.4)
        
        # Fully connected output layer
        self.fc = nn.Linear(128 * 32 * 64, 1)
        
        # Sigmoid activation function
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, img, labels):
        # Step 1: Embed the labels
        label_embedding = self.label_emb(labels)  # (N, 50)
        
        # Step 2: Process label embedding through a dense layer to match image dimensions
        label_embedding = self.fc_label(label_embedding)  # (N, 128 * 256)
        
        # Step 3: Reshape label embedding to add as a channel to the image
        label_embedding = label_embedding.view(-1, 1, 128, 256)  # Reshape to (N, 1, 128, 256)
        print("Label embedding: ", label_embedding.shape)
        # Step 4: Concatenate the label embedding with the image input
        x = torch.cat((img, label_embedding), dim=1)  # Concatenate along channel dimension: (N, 2, 128, 256)
        print("Concatenation: ", x.shape)
        # Step 5: Apply convolutional layers to process the image and label
        x = nn.LeakyReLU(0.2)(self.conv1(x))  # (N, 128, 64, 128)
        print("First layer: ", x.shape)
        x = nn.LeakyReLU(0.2)(self.conv2(x))  # (N, 128, 32, 64)
        print("Second layer: ", x.shape)
        # Step 6: Flatten the feature maps
        x = x.view(x.size(0), -1)  # (N, 128 * 32 * 64 = 262144)
        print("Flatten ", x.shape)
        # Step 7: Apply dropout for regularization
        x = self.dropout(x)
        
        # Step 8: Fully connected layer and sigmoid activation for binary classification
        out = self.fc(x)  # (N, 1)
        out = self.sigmoid(out)  # Sigmoid activation to get values between 0 and 1
        print("Final: ", out.shape)
        return out

# Instantiate the discriminator
discriminator = Discriminator(in_shape=(1, 128, 256), num_classes=10)


# Example inputs: random image and label
batch_size = 1
image = torch.randn(batch_size, 1, 128, 256)  # Random image input
#labels = torch.randint(0, 10, (batch_size,))  # Random labels (10 classes)

# Forward pass
output1 = discriminator(output, labels)

print(output1.shape)  # Output should be (batch_size, 1)
print(output1)


Label embedding:  torch.Size([1, 1, 128, 256])
Concatenation:  torch.Size([1, 2, 128, 256])
First layer:  torch.Size([1, 128, 64, 128])
Second layer:  torch.Size([1, 128, 32, 64])
Flatten  torch.Size([1, 262144])
Final:  torch.Size([1, 1])
torch.Size([1, 1])
tensor([[0.5015]], grad_fn=<SigmoidBackward0>)


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define hyperparameters
latent_dim = 100
num_classes = 10
batch_size = 64
num_epochs = 5
learning_rate = 0.0002
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate models
generator = Generator(latent_dim, num_classes).to(device)
discriminator = Discriminator(in_shape=(1, 128, 256), num_classes=num_classes).to(device)

# Define loss function and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=(0.5, 0.999))

# Placeholder: Create sample data (replace this with your actual dataset)
# Random input data for music sequences (x, 128, 128) and corresponding labels (x, 1)
num_samples = 1000
real_data = torch.randn(num_samples, 1, 128, 256)  # (N, 1, 128, 256)
labels = torch.randint(0, num_classes, (num_samples,))  # (N,)

# Create DataLoader
dataset = TensorDataset(real_data, labels)

dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

print("Begin training")
# Training loop
for epoch in range(num_epochs):
    
    for i, (real_imgs, class_labels) in enumerate(dataloader):
        batch_size = real_imgs.size(0)
        real_imgs = real_imgs.to(device)
        class_labels = class_labels.to(device)

        # Generate labels for real and fake data
        valid = torch.ones(batch_size, 1).to(device)  # Real labels
        fake = torch.zeros(batch_size, 1).to(device)  # Fake labels

        # ===================
        # Train Discriminator
        # ===================

        # Zero the parameter gradients
        optimizer_D.zero_grad()

        # Train on real data
        real_loss = criterion(discriminator(real_imgs, class_labels), valid)

        # Generate fake images
        noise = torch.randn(batch_size, latent_dim).to(device)
        gen_labels = torch.randint(0, num_classes, (batch_size,)).to(device)
        fake_imgs = generator(noise, gen_labels)

        # Train on fake data
        fake_loss = criterion(discriminator(fake_imgs.detach(), gen_labels), fake)

        # Total discriminator loss
        d_loss = real_loss + fake_loss
        d_loss.backward()
        optimizer_D.step()

        # ===================
        # Train Generator
        # ===================

        # Zero the parameter gradients
        optimizer_G.zero_grad()

        # Generate fake images and calculate loss
        gen_imgs = generator(noise, gen_labels)
        g_loss = criterion(discriminator(gen_imgs, gen_labels), valid)

        # Backprop and optimize
        g_loss.backward()
        optimizer_G.step()

        # Print training progress
        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{len(dataloader)}], "
                  f"D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")

print("Training completed.")

KeyboardInterrupt: 