In [125]:
import sys
sys.path.append("..") # Adds higher directory to python modules path.

import numpy as np
from tqdm import tqdm

import torch
import torch.utils.data as data
import pickle

from cycleGAN_VC3.model import Generator, Discriminator
from args.cycleGAN_train_arg_parser import CycleGANTrainArgParser
from dataset.dataset import Dataset
from dataset.vc_dataset import trainingDataset
from cycleGAN_VC3.utils import get_audio_transforms, data_processing, decode_melspectrogram, get_img_from_fig, get_waveform_fig, get_mel_spectrogram_fig
from logger.train_logger import TrainLogger
from saver.model_saver import ModelSaver

In [2]:
parser = CycleGANTrainArgParser()

In [43]:
args_dict={
    "batch_size": 1,
    "ckpt_path": None,
    "continue_train": False,
    "converted": False,
    "converted_source_ids": [
        "10",
        "13",
        "24",
        "18",
        "7",
        "0"
    ],
    "coraal": False,
    "cycle_loss_lambda": 10,
    "data_dir": "/home/data/",
    "decay_after": 10000.0,
    "discriminator_lr": 0.0001,
    "epochs_per_plot": 25,
    "epochs_per_save": 250,
    "generator_lr": 0.0002,
    "gpu_ids": "0",
    "identity_loss_lambda": 5,
    "isTrain": True,
    "load_epoch": 6000,
    "lr": 0.0005,
    "manifest_path": "./manifests/",
    "max_ckpts": 3,
    "max_mask_len": 25,
    "name": "source_voc_10_target_coraal_PRV_se0_ag2_f_03",
    "norm_stats_A_path": "/data1/datasets/melspec_dataset/voc/10/norm_stat_voc.npz",
    "norm_stats_B_path": "/data1/datasets/melspec_dataset/coraal/PRV_se0_ag2_f_03/norm_stat_coraal.npz",
    "normalized_dataset_A_path": "/data1/datasets/melspec_dataset/voc/10/voc_normalized.pickle",
    "normalized_dataset_B_path": "/data1/datasets/melspec_dataset/coraal/PRV_se0_ag2_f_03/coraal_normalized.pickle",
    "num_epochs": 6172,
    "num_frames": 64,
    "num_frames_validation": 320,
    "num_workers": 2,
    "return_pair": False,
    "sample_rate": 22050,
    "save_dir": "/data1/cycleGAN_VC3",
    "seed": 0,
    "small_dataset": False,
    "source_id": "28",
    "start_epoch": 1,
    "steps_per_print": 100,
    "target_id": "DCB_se2_ag3_m_02_1",
    "voc": False,
    "ckpt_dir": '/data1/cycleGAN_VC3/source_voc_10_target_coraal_PRV_se0_ag2_f_03/ckpts'
}

In [44]:
from collections import namedtuple

In [45]:
args = namedtuple('args', args_dict.keys())(*args_dict.values())

In [46]:
def loadPickleFile(fileName):
    with open(fileName, 'rb') as f:
        return pickle.load(f)

In [47]:
dataset_A = loadPickleFile(args.normalized_dataset_A_path)
dataset_A_norm_stats = np.load(args.norm_stats_A_path)
dataset_A_mean = dataset_A_norm_stats['mean']
dataset_A_std = dataset_A_norm_stats['std']
dataset_B = loadPickleFile(args.normalized_dataset_B_path)
dataset_B_norm_stats = np.load(args.norm_stats_B_path)
dataset_B_mean = dataset_B_norm_stats['mean']
dataset_B_std = dataset_B_norm_stats['std']

In [48]:
saver = ModelSaver(args)

In [49]:
# Generator and Discriminator
generator_A2B = Generator().to('cuda')
generator_B2A = Generator().to('cuda')

In [51]:
checkpoint = torch.load('/data1/cycleGAN_VC3/source_voc_10_target_coraal_PRV_se0_ag2_f_03/ckpts/6000_generator_A2B.pth.tar', map_location='cpu')

In [52]:
generator_A2B.load_state_dict(checkpoint['model_state'])

<All keys matched successfully>

In [105]:
checkpoint = torch.load('/data1/cycleGAN_VC3/source_voc_10_target_coraal_PRV_se0_ag2_f_03/ckpts/6000_generator_B2A.pth.tar', map_location='cpu')

In [106]:
generator_B2A.load_state_dict(checkpoint['model_state'])

<All keys matched successfully>

In [107]:
vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan')

Using cache found in /home/szalouk/.cache/torch/hub/descriptinc_melgan-neurips_master


In [140]:
len(dataset_A)

436

In [168]:
real_A = torch.tensor(dataset_A[434]).unsqueeze(0).to('cuda')
# real_B = torch.tensor(dataset_B[2305]).unsqueeze(0).to('cuda')
fake_B = generator_A2B(real_A, torch.ones_like(real_A))
# fake_A = generator_B2A(real_B, torch.ones_like(real_B))

In [169]:
real_wav_A = decode_melspectrogram(vocoder, real_A[0].detach(
).cpu(), dataset_A_mean, dataset_A_std).cpu()
# fake_wav_A = decode_melspectrogram(vocoder, fake_A[0].detach(
# ).cpu(), dataset_A_mean, dataset_A_std).cpu()
# real_wav_B = decode_melspectrogram(vocoder, real_B[0].detach(
# ).cpu(), dataset_B_mean, dataset_B_std).cpu()
fake_wav_B = decode_melspectrogram(vocoder, fake_B[0].detach(
).cpu(), dataset_B_mean, dataset_B_std).cpu()

In [170]:
import IPython.display as display

In [171]:
display.display(display.Audio(real_wav_A.detach().numpy()[0], rate=22050))

In [172]:
display.display(display.Audio(fake_wav_B.detach().numpy()[0], rate=22050))

In [173]:
display.display(display.Audio(real_wav_B.detach().numpy()[0], rate=22050))

In [174]:
display.display(display.Audio(fake_wav_A.detach().numpy()[0], rate=22050))

In [175]:
import os
import pandas as pd

In [176]:
manifest_path = os.path.join('../manifests', 'voc_manifest.csv')
df = pd.read_csv(manifest_path, sep=',')

# Filter by speaker_id
df['speaker_id'] = df['speaker_id'].astype(str)
df = df[df['speaker_id'] == '10']
wav_files = df['wav_file'].tolist()

In [177]:
df.reset_index(drop=True, inplace=True)

In [178]:
df

Unnamed: 0,wav_file,txt_file,groundtruth_text_raw,groundtruth_text_train,duration,aave,speaker_id,split,sr
0,data_processed_voc/wav/voc_10_part_1.wav,data_processed_voc/txt/voc_10_part_1.txt,oh it was a wonderful place to grow up as a ch...,OH IT WAS A WONDERFUL PLACE TO GROW UP AS A CH...,17.221,0,10,train,44100
1,data_processed_voc/wav/voc_10_part_2.wav,data_processed_voc/txt/voc_10_part_2.txt,oh yeah ... it was much smaller geographically...,OH YEAH IT WAS MUCH SMALLER GEOGRAPHICALLY YEA...,19.496,0,10,train,44100
2,data_processed_voc/wav/voc_10_part_3.wav,data_processed_voc/txt/voc_10_part_3.txt,because he taught at the high school um ... an...,BECAUSE HE TAUGHT AT THE HIGH SCHOOL UM AND IT...,12.751,0,10,train,44100
3,data_processed_voc/wav/voc_10_part_4.wav,data_processed_voc/txt/voc_10_part_4.txt,yeah [laughter] then he built when I was three...,YEAH THEN HE BUILT WHEN I WAS THREE HE BUILT A...,20.383,0,10,train,44100
4,data_processed_voc/wav/voc_10_part_5.wav,data_processed_voc/txt/voc_10_part_5.txt,I grew up in the country ... but the city move...,I GREW UP IN THE COUNTRY BUT THE CITY MOVED OU...,7.688,0,10,train,44100
...,...,...,...,...,...,...,...,...,...
431,data_processed_voc/wav/voc_10_part_435.wav,data_processed_voc/txt/voc_10_part_435.txt,uh about speech in Kern county ... and I wonde...,UH ABOUT SPEECH IN KERN COUNTY AND I WONDERED ...,20.080,0,10,train,44100
432,data_processed_voc/wav/voc_10_part_436.wav,data_processed_voc/txt/voc_10_part_436.txt,and that type thing and I wondered if you woul...,AND THAT TYPE THING AND I WONDERED IF YOU WOUL...,20.112,0,10,train,44100
433,data_processed_voc/wav/voc_10_part_437.wav,data_processed_voc/txt/voc_10_part_437.txt,it's just that they're interviewing people who...,IT'S JUST THAT THEY'RE INTERVIEWING PEOPLE WHO...,21.887,0,10,train,44100
434,data_processed_voc/wav/voc_10_part_438.wav,data_processed_voc/txt/voc_10_part_438.txt,uh maybe different nationalities and what they...,UH MAYBE DIFFERENT NATIONALITIES AND WHAT THEY...,20.243,0,10,train,44100
