In [2]:
import data.audio_dataset_v1 as data_utils
import models.inversion_v1 as model_utils
from abstract_model import AbstractModel

import torch
import torch.nn as nn
from torch import optim

from argparse import Namespace
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

from ipywidgets import IntProgress

In [3]:
AudioDataset = data_utils.AudioDataset
InversionV1 = model_utils.InversionV1

In [6]:
data_paths = {}
data_paths['train'] = '/scratch/prs392/incubator/data/LibriSpeech/train-clean-360'
data_paths['val'] = '/scratch/prs392/incubator/data/LibriSpeech/dev-clean'
data_paths['test'] = '/scratch/prs392/incubator/data/LibriSpeech/test-clean'

def optimizer(model_parameters, hparams):
    optimizer = optim.SGD(model_parameters, lr=hparams.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, hparams.scheduler_epoch, hparams.scheduler_step_size)
    return [optimizer], [scheduler]

args = {
    'batch_size': 32, # Compulsory
    'lr': 0.0002, # Compulsory
    'scheduler_epoch': 3, # Compulsory
    'scheduler_step_size': 0.1 # Compulsory
}

hparams = Namespace(**args)

In [8]:
model = AbstractModel(
    hparams=hparams,
    data_paths = data_paths, 
    dataset_model = AudioDataset,
    model = InversionV1(), 
    criterion = nn.MSELoss(),
    optimizer = optimizer
)

model = AbstractModel.load_from_checkpoint('/scratch/prs392/incubator/checkpoints/openl3_librispeech/inversion_v1/no_optimization/version_0/checkpoints/epoch=6.ckpt', 
                                            data_paths = data_paths, 
                                            dataset_model = AudioDataset,
                                            model = InversionV1()
                                          )


In [9]:
audio_dataset = AudioDataset(root_dir='/scratch/prs392/incubator/data/LibriSpeech/train-clean-360')

for i in range(len(audio_dataset)):
    emb, spec, j = audio_dataset[i]
    print(emb.shape, spec.shape, j)
    pred = model(emb)
    print("Embeddings shape: " + str(emb.shape))
    print("Expected Spectrogram shape: " + str(spec.shape))
    print("Predicted Spectrogram shape: " + str(pred.shape))
    if i == 0:
        break

torch.Size([6144]) torch.Size([1, 128, 199]) tensor(105)
Embeddings shape: torch.Size([6144])
Expected Spectrogram shape: torch.Size([1, 128, 199])
Predicted Spectrogram shape: torch.Size([1, 1, 128, 199])


In [10]:
spec

tensor([[[-15.1928, -15.5141, -16.5235,  ..., -15.6203, -17.5699, -19.1182],
         [-14.0530, -14.3904, -15.9192,  ..., -15.5312, -16.3387, -17.6766],
         [-13.4978, -14.0089, -15.8368,  ..., -17.4850, -17.5224, -17.5436],
         ...,
         [-32.7973, -33.6334, -35.9862,  ..., -42.2527, -39.9076, -39.0768],
         [-32.8173, -33.6534, -36.0061,  ..., -42.2751, -39.9301, -39.0992],
         [-32.8295, -33.6657, -36.0184,  ..., -42.2889, -39.9439, -39.1131]]])

In [11]:
pred

tensor([[[[-18.8010, -14.3573, -12.4874,  ..., -15.1127, -15.1407, -19.8973],
          [-14.1781, -12.2489, -13.3498,  ..., -15.2939, -13.3789, -15.6543],
          [-13.5861, -13.6307, -16.0765,  ..., -16.0832, -14.5984, -15.0446],
          ...,
          [-29.8552, -30.6372, -32.5927,  ..., -42.1061, -39.5374, -40.2745],
          [-30.6450, -30.3381, -32.4032,  ..., -41.4409, -39.0865, -39.7729],
          [-34.6549, -33.3636, -35.8674,  ..., -43.6624, -41.8636, -41.6395]]]],
       grad_fn=<NativeBatchNormBackward>)