In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

In [3]:
import sys
sys.path.append('../src')
from data.audio_dataset import AudioSpectrogramDataset
from models.PGAN_model.PGenerator import PGenerator
from models.PGAN_model.PDiscriminator import PDiscriminator

import torch
import torch.nn.functional as F
import pytorch_lightning as pl
import soundfile as sf
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# # hyperparameters
# z_dim = 100
batch_size = 32
num_epochs = 50
latent_dim = 100
# learning_rate = 0.0002
# beta1 = 0.5

# base_directory = "../data/raw/NSynth/audio"
base_directory = "../data/raw/Bass"

dataset = AudioSpectrogramDataset(base_directory=base_directory, spectro_type='mel')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [5]:
first_batch = next(iter(dataloader))
print(first_batch.size())

torch.Size([32, 1, 256, 160])


In [6]:
temp = torch.rand((32, 1, 256, 160))
# temp = torch.unsqueeze(temp, dim=0)
print(temp.shape)

torch.Size([32, 1, 256, 160])


In [7]:
import torch.nn.functional as F
goal_size = [32, 1, 128, 80]
goal = (128, 80)

In [8]:
goal_size[-2:]

[128, 80]

In [9]:
low_res_real = F.adaptive_avg_pool2d(temp, output_size=goal)
low_res_real2 = F.interpolate(temp, size=goal_size[-2:], mode='nearest')

In [9]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    n_devices = torch.cuda.device_count()
else:
    device = torch.device("cpu")
    n_devices = 1    
device

device(type='cuda', index=0)

In [24]:
print(f'low_res_real: {low_res_real.shape}')
print(f'low_res_real2: {low_res_real2.shape}')

low_res_real: torch.Size([32, 1, 128, 80])
low_res_real2: torch.Size([32, 1, 128, 80])


In [30]:
discriminator = PDiscriminator().to(device)
low_res_real = temp.to(device)
discriminator.add_next_block(new_depth=256)
discriminator.add_next_block(new_depth=128)
discriminator.add_next_block(new_depth=64)
discriminator.add_next_block(new_depth=32)
discriminator.add_next_block(new_depth=16)
discriminator.set_alpha(1.0)
# print(first_batch_low_resolution.device)
dummy_output = discriminator(low_res_real)
print(dummy_output.shape)
print(dummy_output)

torch.Size([32, 1])
tensor([[-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165],
        [-0.0165]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [41]:
p_latent_dim = 256
dummy_latent_vector = torch.randn((1, p_latent_dim)).to(device)
generator = PGenerator(latent_dim=p_latent_dim).to(device)
# generator.add_next_block(new_depth=256)
# generator.add_next_block(new_depth=128)
# generator.add_next_block(new_depth=64)
# generator.add_next_block(new_depth=32)
# generator.add_next_block(new_depth=16)
# generator.set_alpha(1.0)
dummy_output = generator(dummy_latent_vector)
print(dummy_output.shape)

torch.Size([1, 1, 8, 5])


In [36]:
print(generator)

PGenerator2(
  (blocks): ModuleList(
    (0): ModuleList(
      (0): Sequential(
        (0): Upsample(scale_factor=2.0, mode=nearest)
        (1): ConvTranspose2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (2): LeakyReLU(negative_slope=0.2)
        (3): ConvTranspose2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): LeakyReLU(negative_slope=0.2)
      )
      (1): Sequential(
        (0): Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1))
        (1): Upsample(scale_factor=2.0, mode=nearest)
      )
    )
  )
  (normalizationLayer): NormalizationLayer()
  (l1): Sequential(
    (0): Linear(in_features=100, out_features=10240, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
  )
  (base_block): ModuleList(
    (0): Sequential(
      (0): ConvTranspose2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.2)
    )
    (1): Sequential(
      (0): Conv2d(256, 1, kernel_size=(1, 1), stride=(1,