In [1]:
import pickle
import soundfile as sf
import os
import torch
import numpy as np
import IPython.display as ipd
from util.convert import get_trans_mel,get_mel,convert,save_wave
from factory.AutoVC import AutoVC
from factory.ASGANVC import ASGANVC
from factory.VQVC import VQVC
from factory.AgainVC import AgainVC

In [2]:
device = "cuda:0"

In [3]:
build_config = {'model_name': 'again', 'model': {'params': {'encoder_params': {'c_in': 80, 'c_h': 256, 'c_out': 4, 'n_conv_blocks': 6, 'subsample': [1, 1, 1, 1, 1, 1]}, 'decoder_params': {'c_in': 4, 'c_h': 256, 'c_out': 80, 'n_conv_blocks': 6, 'upsample': [1, 1, 1, 1, 1, 1]}, 'activation_params': {'act': 'sigmoid', 'params': {'alpha': 0.1}}}}, 'optimizer': {'params': {'lr': 0.0005, 'betas': [0.9, 0.999], 'amsgrad': True, 'weight_decay': 0.0001}, 'grad_norm': 3}}

In [44]:
autovc =  AutoVC(32,256,512,16).to(device)
autovc.load_state_dict(torch.load("model/autovc_128.pt", map_location=device))
asganvc =  ASGANVC(32,256,512,16).to(device)
asganvc.load_state_dict(torch.load("model/asganvc_128.pt", map_location=device))
vqvc = VQVC(80,64,64).to(device)
vqvc.load_state_dict(torch.load("model/vqvc+.pt", map_location=device))
againvc = AgainVC(**build_config['model']['params']).to("cuda:0")
againvc.load_state_dict(torch.load("model/againvc.pt",map_location="cuda:0"))

<All keys matched successfully>

In [6]:
ROOT = "train_spmel_vctk80"
SAVE_DIR = "generate_wav"

In [7]:
metadata = pickle.load(open(f'{ROOT}/train.pkl', "rb"))

In [129]:
import random
sample_rate = 22050
s = 0 #random.randint(0,79) #20,259
t = 41 #random.randint(0,79) #35,264
sound_id = random.randint(6,7) #7
t_id = 4#random.randint(15,20)

In [130]:
metadata[s][0],metadata[t][0]

('p225', 'p268')

In [131]:
source_path = metadata[s][sound_id].replace("\\", "/")
target_path = metadata[t][t_id].replace("\\", "/")
mel_source = np.load(f"{ROOT}/{source_path}")
mel_target = np.load(f"{ROOT}/{target_path}")
emb_org = torch.from_numpy(metadata[s][1]).unsqueeze(0).to(device)
emb_trg = torch.from_numpy(metadata[t][1]).unsqueeze(0).to(device)

## Mel Source

In [132]:
source_wave = convert( torch.from_numpy(mel_source).unsqueeze(0))
ipd.Audio(source_wave,rate =sample_rate)

# Mel Target

In [133]:
target_wave = convert( torch.from_numpy(mel_target).unsqueeze(0).to(device))
ipd.Audio(target_wave,rate = sample_rate)

In [134]:
m_s = torch.from_numpy(mel_source).unsqueeze(0).transpose(1,2).to(device)
m_t = torch.from_numpy(mel_target).unsqueeze(0).transpose(1,2).to(device)
mel_trans = againvc.inference(m_s,m_t)

In [135]:
againvc_wave = convert(mel_trans.transpose(1,2))

## AgainVC

In [136]:
ipd.Audio(againvc_wave,rate = sample_rate)

## VQVC+

In [137]:
vqvc_wave = get_trans_mel( 
    vqvc,
    mel_source,
    mel_target,
    emb_org,
    emb_trg,
    isAdain=False,
    isVQ=True,
    isAgainVC=False)

In [138]:
ipd.Audio(vqvc_wave,rate = sample_rate)

## AutoVC

In [139]:
autovc_wave = get_trans_mel( 
    autovc,
    mel_source,
    mel_target,
    emb_org,
    emb_trg,
    isAdain=False,
    isVQ=False,
    isAgainVC=False)

In [140]:
ipd.Audio(autovc_wave,rate = sample_rate)

## ASGANVC

In [141]:
asganvc_wave = get_trans_mel( 
    asganvc,
    mel_source,
    mel_target,
    emb_org,
    emb_trg,
    isAdain=True,
    isVQ=False,
    isAgainVC=False,

)

In [142]:
ipd.Audio(asganvc_wave,rate = sample_rate)

In [21]:
save_wave(f'{SAVE_DIR}/target_{metadata[t][0]}.wav',target_wave,sample_rate)

In [22]:
save_wave(f'{SAVE_DIR}/source_{metadata[s][0]}.wav',source_wave,sample_rate)

In [23]:
save_wave(f'{SAVE_DIR}/autovc_trans_{metadata[s][0]}_{metadata[t][0]}.wav',autovc_wave,sample_rate)

In [24]:
save_wave(f'{SAVE_DIR}/vqvc_trans_{metadata[s][0]}_{metadata[t][0]}.wav',vqvc_wave,sample_rate)

In [25]:
save_wave(f'{SAVE_DIR}/autovc3_trans_{metadata[s][0]}_{metadata[t][0]}.wav',autovc3_wave,sample_rate)