In [1]:
#python2のコードを、python3準拠で動かす
from __future__ import division
from __future__ import print_function

import argparse
from dateutil.relativedelta import relativedelta
from distutils.util import strtobool
import logging
import itertools
import os
import sys
import time

import numpy as np
import six
import torch
from torch.autograd import Variable
from torch import nn
from torchvision import transforms
from torch.utils.data import DataLoader

sys.path.append('/cyclevae-vc/src/utils')
#utils.pyからモジュールをインポート
from utils import find_files
from utils import read_hdf5
from utils import read_txt

sys.path.append('/cyclevae-vc/src/nets')
#gru_vae.pyからモジュールをインポート
from gru_vae import initialize
from gru_vae import GRU_RNN
from gru_vae import TWFSEloss
from gru_vae import sampling_vae_batch, loss_vae

#dataset.pyからモジュールをインポート
from dataset import FeatureDatasetSingleVAE, padding

from dtw_c import dtw_c as dtw



In [2]:
#(venv37pt1cu10)上で動作しているかの確認
import pkg_resources

installed_packages = sorted(["%s==%s" % (i.key, i.version) for i in pkg_resources.working_set])
for package in installed_packages:
    print(package)

absl-py==2.1.0
aiofiles==22.1.0
aiosqlite==0.19.0
anyio==3.7.1
argon2-cffi-bindings==21.2.0
argon2-cffi==23.1.0
arrow==1.2.3
astunparse==1.6.3
attrs==24.2.0
audioread==3.0.1
babel==2.14.0
backcall==0.2.0
beautifulsoup4==4.12.3
bleach==6.0.0
cached-property==1.5.2
cachetools==5.5.1
certifi==2024.12.14
cffi==1.15.1
charset-normalizer==3.4.1
comm==0.1.4
cycler==0.11.0
cython==3.0.11
debugpy==1.7.0
decorator==5.1.1
defusedxml==0.7.1
dtw-c==1.0.2
entrypoints==0.4
exceptiongroup==1.2.2
fastjsonschema==2.21.1
flatbuffers==25.1.24
fonttools==4.38.0
fqdn==1.5.1
gast==0.4.0
google-auth-oauthlib==0.4.6
google-auth==2.38.0
google-pasta==0.2.0
grpcio==1.62.3
h5py==3.8.0
idna==3.10
importlib-metadata==6.7.0
importlib-resources==5.12.0
ipykernel==6.16.2
ipython-genutils==0.2.0
ipython==7.34.0
ipywidgets==8.1.5
isoduration==20.11.0
jedi==0.19.2
jinja2==3.1.5
joblib==1.3.2
json5==0.9.16
jsonpointer==3.0.0
jsonschema==4.17.3
jupyter-client==7.4.9
jupyter-console==6.6.3
jupyter-core==4.12.0
jupyter-event

In [3]:
def train_generator(dataloader, device, batch_size=80):
    print(f"train_generatorの処理を開始")
    while True:
        c_idx = 0
        # process over all of files
        for idx, batch in enumerate(dataloader):
            flens = batch['flen_src'].data.numpy()
            max_flen = np.max(flens) ## get max frame length
            flens_spc_src = batch['flen_spc_src'].data.numpy()
            max_flen_spc_src = np.max(flens_spc_src) ## get max frame length
            flens_src_trg = batch['flen_src_trg'].data.numpy()
            max_flen_src_trg = np.max(flens_src_trg) ## get max frame length
            flens_spc_src_trg = batch['flen_spc_src_trg'].data.numpy()
            max_flen_spc_src_trg = np.max(flens_spc_src_trg) ## get max frame length
            hs_src = batch['h_src'][:,:max_flen].to(device)
            src_codes = batch['src_code'][:,:max_flen].to(device)
            trg_codes = batch['trg_code'][:,:max_flen].to(device)
            cvs_src = batch['cv_src'][:,:max_flen].to(device)
            spcidcs_src = batch['spcidx_src'][:,:max_flen_spc_src].to(device)
            hs_src_trg = batch['h_src_trg'][:,:max_flen_src_trg].to(device)
            spcidcs_src_trg = batch['spcidx_src_trg'][:,:max_flen_spc_src_trg].to(device)
            featfiles_src = batch['featfile_src']
            featfiles_src_trg = batch['featfile_src_trg']
            n_batch_utt = hs_src.size(0)
            #dataloaderから取り出したテンソルは、先頭次元がサンプル数になる。今回は1つの発話がバッチなので1になる。
            print(f"featfiles_src:{featfiles_src}")
            print(f"hs_src.shape：{hs_src.shape}")
            #print(f"n_batch_utt：{n_batch_utt}")

            # use mini batch
            if batch_size != 0:
                #特徴量のフレームを指定したbatch_sizeに分割

                src_idx_s = 0
                src_idx_e = batch_size-1 # 79


                spcidcs_src_s_idx = np.repeat(-1,n_batch_utt)
                spcidcs_src_e_idx = np.repeat(-1,n_batch_utt)

                s_flag = np.repeat(False,n_batch_utt)
                e_flag = np.repeat(True,n_batch_utt)
                flen_acc = np.repeat(batch_size,n_batch_utt)

                for j in range(n_batch_utt):
                    for i in range(spcidcs_src_e_idx[j]+1,flens_spc_src[j]):
                        if not s_flag[j] and spcidcs_src[j,i] >= src_idx_s:
                            if spcidcs_src[j,i] > src_idx_e:
                                spcidcs_src_s_idx[j] = -1
                                break
                            spcidcs_src_s_idx[j] = i
                            s_flag[j] = True
                            e_flag[j] = False
                            if i == flens_spc_src[j]-1:
                                spcidcs_src_e_idx[j] = i
                                s_flag[j] = False
                                e_flag[j] = True
                                break
                        elif not e_flag[j] and (spcidcs_src[j,i] >= src_idx_e or i == flens_spc_src[j]-1):
                            if spcidcs_src[j,i] > src_idx_e:
                                spcidcs_src_e_idx[j] = i-1
                            else:
                                spcidcs_src_e_idx[j] = i
                            s_flag[j] = False
                            e_flag[j] = True
                            break
                select_utt_idx = [i for i in range(n_batch_utt)]
                print(f"yield1を返却")
                print(f"開始フレーム:{src_idx_s}")
                print(f"終了フレーム:{src_idx_e}")
                #0~79フレームを、80つにバッチ分割した特徴量を返却
                yield hs_src, src_codes[:,src_idx_s:src_idx_e+1], trg_codes[:,src_idx_s:src_idx_e+1], hs_src_trg, cvs_src, src_idx_s, src_idx_e, spcidcs_src_s_idx, spcidcs_src_e_idx, c_idx, idx, spcidcs_src, spcidcs_src_trg, featfiles_src, featfiles_src_trg, flens, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt
                
                #80~159,160~ ... までの80つのバッチを繰り返し処理していく
                while src_idx_e < max_flen-1:
                    src_idx_s = src_idx_e + 1
                    src_idx_e = src_idx_s+batch_size-1
                    if src_idx_e >= max_flen:
                        src_idx_e = max_flen-1
                    select_utt_idx  = []
                    for j in range(n_batch_utt):
                        if spcidcs_src_e_idx[j] < flens_spc_src[j]-1:
                            if src_idx_e >= flens[j]:
                                flen_acc[j] = flens[j]-src_idx_s
                            for i in range(spcidcs_src_e_idx[j]+1,flens_spc_src[j]):
                                if not s_flag[j] and spcidcs_src[j,i] >= src_idx_s:
                                    if spcidcs_src[j,i] > src_idx_e:
                                        spcidcs_src_s_idx[j] = -1
                                        break
                                    spcidcs_src_s_idx[j] = i
                                    s_flag[j] = True
                                    e_flag[j] = False
                                    if i == flens_spc_src[j]-1:
                                        spcidcs_src_e_idx[j] = i
                                        s_flag[j] = False
                                        e_flag[j] = True
                                        break
                                elif not e_flag[j] and (spcidcs_src[j,i] >= src_idx_e or i == flens_spc_src[j]-1):
                                    if spcidcs_src[j,i] > src_idx_e:
                                        spcidcs_src_e_idx[j] = i-1
                                    else:
                                        spcidcs_src_e_idx[j] = i
                                    s_flag[j] = False
                                    e_flag[j] = True
                                    break
                            select_utt_idx.append(j)

                    print(f"yield2を返却")
                    print(f"featfiles_src:{featfiles_src}")
                    print(f"開始フレーム:{src_idx_s}")
                    print(f"終了フレーム:{src_idx_e}")
                    yield hs_src, src_codes[:,src_idx_s:src_idx_e+1], trg_codes[:,src_idx_s:src_idx_e+1], hs_src_trg, cvs_src, src_idx_s, src_idx_e, spcidcs_src_s_idx, spcidcs_src_e_idx, c_idx, idx, spcidcs_src, spcidcs_src_trg, featfiles_src, featfiles_src_trg, flens, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt
                    print(f"ループを再開")
            # use utterance batch
            else:
                yield hs_src, src_codes, trg_codes, hs_src_trg, cvs_src, c_idx, idx, spcidcs_src, spcidcs_src_trg, featfiles_src, featfiles_src_trg, flens, flens_src_trg, flens_spc_src, flens_spc_src_trg, n_batch_utt
            
        print(f"すべての発話処理を完了")
        c_idx += 1
        if c_idx > 0:
        #if c_idx > 1:
        #if c_idx > 2:
            break
    #すべてのフレーム区間の出力が完了
    #エポックの終了条件
    print(f"yield3を返却")
        
    if batch_size > 0:
        yield [], [], [], [], [], [], [], [], [], -1, -1, [], [], [], [], [], [], [], [], [], [], []
    else:
        yield [], [], [], [], [], -1, -1, [], [], [], [], [], [], [], [], []

In [4]:
working_dir = "/cyclevae-vc/egs/one-to-one"
os.chdir(working_dir)

spk_src = "VCC2SF1"
spk_trg = "VCC2TF1"
train_src = "tr50_" +spk_src+ "_"+spk_trg
train_src_trg = "trt50_" +spk_src+ "_" +spk_trg
train_trg="tr50_" +spk_trg+ "_"+spk_src
train_trg_src="trt50_"+spk_trg+"_"+spk_src
test_src="ts50_"+spk_src+"_"+spk_trg
test_trg="ts50_"+spk_trg+"_"+spk_src

expdir = "/cyclevae-vc/egs/one-to-one/exp/my_train_log"
stats_jnt = "/cyclevae-vc/egs/one-to-one/data/" + train_src + "/stats_jnt.h5"
GPU_device = 0
seed = 1
n_cyc = 3
stdim = 4
in_dim=54
out_dim=50
lat_dim=16
hidden_layers=1
hidden_units=1024
kernel_size=3
dilation_size=2
do_prob=0.5
batch_size_utt=1
batch_size_utt_eval=1
batch_size = 80
lr=1e-4
resume = None
feats_src ="/cyclevae-vc/egs/one-to-one/data/"+train_src+"/feats.scp"
feats_src_trg="/cyclevae-vc/egs/one-to-one/data/"+train_src_trg+"/feats.scp"
feats_trg ="/cyclevae-vc/egs/one-to-one/data/"+train_trg+"/feats.scp"
feats_trg_src="/cyclevae-vc/egs/one-to-one/data/"+train_trg_src+"/feats.scp"
feats_eval_src = "/cyclevae-vc/egs/one-to-one/data/"+ test_src +"/feats.scp"
feats_eval_trg = "/cyclevae-vc/egs/one-to-one/data/"+ test_trg +"/feats.scp"
pad_len=2200
n_workers=2
stats_src="/cyclevae-vc/egs/one-to-one/data/"+train_src+"/stats.h5"
stats_trg="/cyclevae-vc/egs/one-to-one/data/"+train_trg+"/stats.h5"
epoch_count=1
try:
    if not os.path.exists(expdir):
        os.makedirs(expdir)
except Exception as e:
    print(f"正常にディレクトリが作成されませんでした。: {e}")
os.environ["CUDA_DEVICE_ORDER"]     = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]  = str(GPU_device)

os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if str(device) == "cpu":
    raise ValueError('ERROR: Training by CPU is not acceptable.')

if n_cyc < 1:
    half_cyc = True
    n_cyc = 1
else:
    half_cyc = False

#torch.save(args, expdir + "/model.conf")

stdim = stdim
stdim_ = stdim+1
# print(os.path.exists(stats_jnt))
# print(stats_jnt)
# 統計量を、テンソル型に変換
#ソース話者とターゲット話者すべての特徴量に対して平均、分散をまとめたもの
mean_jnt = torch.FloatTensor(read_hdf5(stats_jnt, "/mean_feat_org_lf0_jnt"))
std_jnt = torch.FloatTensor(read_hdf5(stats_jnt, "/scale_feat_org_lf0_jnt"))
#メルケプストラムの統計を取得
mean_jnt_trg = torch.FloatTensor(read_hdf5(stats_jnt, "/mean_feat_org_lf0_jnt")[stdim:])
std_jnt_trg = torch.FloatTensor(read_hdf5(stats_jnt, "/scale_feat_org_lf0_jnt")[stdim:])

if torch.cuda.is_available():
    mean_jnt = mean_jnt.cuda()
    std_jnt = std_jnt.cuda()
    mean_jnt_trg = mean_jnt_trg.cuda()
    std_jnt_trg = std_jnt_trg.cuda()
print(mean_jnt.shape)
print(std_jnt.shape)
print(mean_jnt_trg.shape)
print(std_jnt_trg.shape)

# モデルの定義
#エンコーダ

model_encoder = GRU_RNN(
    in_dim = in_dim,
    out_dim= lat_dim*2,
    hidden_layers= hidden_layers,
    hidden_units= hidden_units,
    kernel_size= kernel_size,
    dilation_size= dilation_size,
    do_prob= do_prob,
    scale_out_flag=False)
config = model_encoder.get_config()
# print(config)

#デコーダ
model_decoder = GRU_RNN(
    in_dim=lat_dim+2,
    out_dim=out_dim,
    hidden_layers=hidden_layers,
    hidden_units=hidden_units,
    kernel_size=kernel_size,
    dilation_size=dilation_size,
    do_prob=do_prob,
    scale_in_flag=False)
# print(model_encoder)
# print(model_decoder)

#平均二乗誤差をベースにした損失
criterion_mcd = TWFSEloss()
# print(criterion_mcd)

# 定義したモデルをGPUへ
if torch.cuda.is_available():
    model_encoder.cuda()
    model_decoder.cuda()
    criterion_mcd.cuda()
model_encoder.apply(initialize)
model_encoder.train()
model_decoder.apply(initialize)
model_decoder.train()
model_encoder.scale_in.weight = torch.nn.Parameter(torch.diag(1.0/std_jnt.data).unsqueeze(2))
model_encoder.scale_in.bias = torch.nn.Parameter(-(mean_jnt.data/std_jnt.data))
model_decoder.scale_out.weight = torch.nn.Parameter(torch.diag(std_jnt_trg.data).unsqueeze(2))
model_decoder.scale_out.bias = torch.nn.Parameter(mean_jnt_trg.data)

#学習エポックを初期化
epoch_idx = 0

init_pp = np.zeros((batch_size_utt,1,lat_dim*2))
y_in_pp = torch.FloatTensor(init_pp).cuda()
y_in_src = y_in_trg = torch.unsqueeze(torch.unsqueeze((0-mean_jnt_trg)/std_jnt_trg,0),0).repeat(batch_size_utt,1,1)
with torch.no_grad():
    init_pp_eval = np.zeros((batch_size_utt_eval,1,lat_dim*2))
    y_in_pp_eval = torch.FloatTensor(init_pp_eval).cuda()
    y_in_src_eval = y_in_trg_eval = torch.unsqueeze(torch.unsqueeze((0-mean_jnt_trg)/std_jnt_trg,0),0).repeat(batch_size_utt_eval,1,1)

for param in model_encoder.parameters():
    param.requires_grad = True
for param in model_decoder.parameters():
    param.requires_grad = True
for param in model_encoder.scale_in.parameters():
    param.requires_grad = False
for param in model_decoder.scale_out.parameters():
    param.requires_grad = False
module_list = list(model_encoder.conv.parameters())
module_list += list(model_encoder.gru.parameters()) + list(model_encoder.out_1.parameters())
module_list += list(model_decoder.conv.parameters())
module_list += list(model_decoder.gru.parameters()) + list(model_decoder.out_1.parameters())
optimizer = torch.optim.Adam(module_list, lr=lr)
if resume is not None:
    optimizer.load_state_dict(checkpoint["optimizer"])

parameters = filter(lambda p: p.requires_grad, model_encoder.parameters())
parameters = sum([np.prod(p.size()) for p in parameters]) / 1000000
print('Trainable Parameters (encoder): %.3f million' % parameters)
parameters = filter(lambda p: p.requires_grad, model_decoder.parameters())
parameters = sum([np.prod(p.size()) for p in parameters]) / 1000000
print('Trainable Parameters (decoder): %.3f million' % parameters)

# define generator training

if os.path.isdir(feats_src):
    feat_list_src = sorted(find_files(feats_src, "*.h5", use_dir_name=False))
elif os.path.isfile(feats_src):
    feat_list_src = read_txt(feats_src)
else:
    print("--feats_src should be directory or list.")
    sys.exit(1)
if os.path.isdir(feats_src_trg):
    feat_list_src_trg = sorted(find_files(feats_src_trg, "*.h5", use_dir_name=False))
elif os.path.isfile(feats_src_trg):
    feat_list_src_trg = read_txt(feats_src_trg)
else:
    print("--feats_src_trg should be directory or list.")
    sys.exit(1)
assert(len(feat_list_src) == len(feat_list_src_trg))
print("ソース話者訓練データ = %d." % len(feat_list_src))
if os.path.isdir(feats_trg):
    feat_list_trg = sorted(find_files(feats_trg, "*.h5", use_dir_name=False))
elif os.path.isfile(feats_trg):
    feat_list_trg = read_txt(feats_trg)
else:
    print("--feats_trg should be directory or list.")
    sys.exit(1)
if os.path.isdir(feats_trg_src):
    feat_list_trg_src = sorted(find_files(feats_trg_src, "*.h5", use_dir_name=False))
elif os.path.isfile(feats_trg_src):
    feat_list_trg_src = read_txt(feats_trg_src)
else:
    print("--feats_trg_src should be directory or list.")
    sys.exit(1)
assert(len(feat_list_trg) == len(feat_list_trg_src))
print("ターゲット話者の訓練データ = %d." % len(feat_list_trg))

n_train_data = len(feat_list_src) + len(feat_list_trg)
mod_train_batch = n_train_data % batch_size_utt

if mod_train_batch > 0:
    init_pp_mod = np.zeros((mod_train_batch,1,lat_dim*2))
    y_in_pp_mod= torch.FloatTensor(init_pp_mod).cuda()
    y_in_src_mod = y_in_trg_mod = torch.unsqueeze(torch.unsqueeze((0-mean_jnt_trg)/std_jnt_trg,0),0).repeat(mod_train_batch,1,1)

# define generator evaluation
if os.path.isdir(feats_eval_src):
    feat_list_eval_src = sorted(find_files(feats_eval_src, "*.h5", use_dir_name=False))
elif os.path.isfile(feats_eval_src):
    feat_list_eval_src = read_txt(feats_eval_src)
else:
    print("--feats_eval_src should be directory or list.")
    sys.exit(1)
if os.path.isdir(feats_eval_trg):
    feat_list_eval_trg = sorted(find_files(feats_eval_trg, "*.h5", use_dir_name=False))
elif os.path.isfile(feats_eval_trg):
    feat_list_eval_trg = read_txt(feats_eval_trg)
else:
    print("--feats_eval_trg should be directory or list.")
    sys.exit(1)
# print(feat_list_eval_trg)

assert(len(feat_list_eval_src) == len(feat_list_eval_trg))
print("evalデータ数 = %d." % len(feat_list_eval_src))

n_eval_data = len(feat_list_eval_src)
mod_eval_batch = n_eval_data % batch_size_utt_eval
if mod_eval_batch > 0:
    with torch.no_grad():
        init_pp_eval_mod = np.zeros((mod_eval_batch,1,lat_dim*2))
        y_in_pp_eval_mod = torch.FloatTensor(init_pp_eval_mod).cuda()
        y_in_src_eval_mod = y_in_trg_eval_mod = torch.unsqueeze(torch.unsqueeze((0-mean_jnt_trg)/std_jnt_trg,0),0).repeat(mod_eval_batch,1,1)
# data
def zero_pad(x): return padding(x, pad_len, value=0.0)

pad_transform = transforms.Compose([zero_pad])

#ソース話者とターゲット話者を混ぜたデータセット
dataset = FeatureDatasetSingleVAE(feat_list_src+feat_list_trg, feat_list_src_trg+feat_list_trg_src, pad_transform, spk_src)
#print(dataset.__getitem__(0))
# print(dataset.__len__())
#print(feat_list_src)
#print(spk_src)

dataloader = DataLoader(dataset, batch_size=batch_size_utt, shuffle=True, num_workers=n_workers)
dataset_eval_src = FeatureDatasetSingleVAE(feat_list_eval_src, feat_list_eval_trg, pad_transform, spk_src)
dataloader_eval_src = DataLoader(dataset_eval_src, batch_size=batch_size_utt_eval, num_workers=n_workers)
dataset_eval_trg = FeatureDatasetSingleVAE(feat_list_eval_trg, feat_list_eval_src, pad_transform, spk_src)
dataloader_eval_trg = DataLoader(dataset_eval_trg, batch_size=batch_size_utt_eval, num_workers=n_workers)

# ジェネレータのインスタンスを生成　→　値を取得するには、next()を使う
generator_src = train_generator(dataloader, device, batch_size=batch_size)
#print(generator_src)
# generator eval instance
generator_eval_src = train_generator(dataloader_eval_src, device, batch_size=0)
generator_eval_trg = train_generator(dataloader_eval_trg, device, batch_size=0)

#最初の行以外を抽出する。50→49
gv_trg_mean = read_hdf5(stats_trg, "/gv_range_mean")[1:]
gv_src_mean = read_hdf5(stats_src, "/gv_range_mean")[1:]

#print(f"gv_src_mean : {gv_src_mean.shape}")

# train
batch_lat_src = [None]*n_cyc
y_in_pp_src = [None]*n_cyc
h_in_pp_src = [None]*n_cyc
batch_trj_src_src = [None]*n_cyc
y_in_src_src = [None]*n_cyc
h_in_src_src = [None]*n_cyc
batch_trj_src_trg = [None]*n_cyc
y_in_src_trg = [None]*n_cyc
h_in_src_trg = [None]*n_cyc
batch_lat_src_trg = [None]*n_cyc
y_in_pp_src_trg = [None]*n_cyc
h_in_pp_src_trg = [None]*n_cyc
batch_trj_src_trg_src = [None]*n_cyc
y_in_src_trg_src = [None]*n_cyc
h_in_src_trg_src = [None]*n_cyc
batch_lat_trg_ = [None]*n_cyc
batch_trj_trg_trg_ = [None]*n_cyc
batch_trj_trg_src_ = [None]*n_cyc
batch_lat_trg_src_ = [None]*n_cyc
batch_trj_trg_src_trg_ = [None]*n_cyc
batch_lat_src_ = [None]*n_cyc
batch_trj_src_src_ = [None]*n_cyc
batch_trj_src_trg_ = [None]*n_cyc
batch_lat_src_trg_ = [None]*n_cyc
batch_trj_src_trg_src_ = [None]*n_cyc
batch_loss_mcd_trg_trg = [None]*n_cyc
batch_loss_mcd_trg_src_trg = [None]*n_cyc
batch_loss_mcd_trg_src = [None]*n_cyc
batch_loss_mcd_src_src = [None]*n_cyc
batch_loss_mcd_src_trg_src = [None]*n_cyc
batch_loss_mcd_src_trg = [None]*n_cyc
batch_loss_lat_src = [None]*n_cyc
batch_loss_lat_trg = [None]*n_cyc
batch_loss_lat_src_cv = [None]*n_cyc
batch_loss_lat_trg_cv = [None]*n_cyc
batch_gv_trg_trg = [None]*n_cyc
batch_mcdpow_trg_trg = [None]*n_cyc
batch_mcd_trg_trg = [None]*n_cyc
batch_gv_trg_src_trg = [None]*n_cyc
batch_mcdpow_trg_src_trg = [None]*n_cyc
batch_mcd_trg_src_trg = [None]*n_cyc
batch_gv_trg_src = [None]*n_cyc
batch_mcdpow_trg_src = [None]*n_cyc
batch_mcd_trg_src = [None]*n_cyc
batch_lat_dist_trgsrc1 = [None]*n_cyc
batch_lat_dist_trgsrc2 = [None]*n_cyc
batch_lat_cdist_trgsrc1 = [None]*n_cyc
batch_lat_cdist_trgsrc2 = [None]*n_cyc
batch_gv_src_src = [None]*n_cyc
batch_mcdpow_src_src = [None]*n_cyc
batch_mcd_src_src = [None]*n_cyc
batch_gv_src_trg_src = [None]*n_cyc
batch_mcdpow_src_trg_src = [None]*n_cyc
batch_mcd_src_trg_src = [None]*n_cyc
batch_gv_src_trg = [None]*n_cyc
batch_mcdpow_src_trg = [None]*n_cyc
batch_mcd_src_trg = [None]*n_cyc
batch_lat_dist_srctrg1 = [None]*n_cyc
batch_lat_dist_srctrg2 = [None]*n_cyc
batch_lat_cdist_srctrg1 = [None]*n_cyc
batch_lat_cdist_srctrg2 = [None]*n_cyc

loss = []
loss_mcd_trg_trg = []
loss_mcd_trg_src_trg = []
loss_mcd_trg_src = []
loss_mcd_src_src = []
loss_mcd_src_trg_src = []
loss_mcd_src_trg = []
loss_lat_src = []
loss_lat_trg = []
loss_lat_src_cv = []
loss_lat_trg_cv = []
gv_trg_trg = []
mcdpow_trg_trg = []
mcd_trg_trg = []
gv_trg_src_trg = []
mcdpow_trg_src_trg = []
mcd_trg_src_trg = []
gv_trg_src = []
mcdpow_trg_src = []
mcd_trg_src = []
lat_dist_trgsrc1 = []
lat_dist_trgsrc2 = []
gv_src_src = []
mcdpow_src_src = []
mcd_src_src = []
gv_src_trg_src = []
mcdpow_src_trg_src = []
mcd_src_trg_src = []
gv_src_trg = []
mcdpow_src_trg = []
mcd_src_trg = []
lat_dist_srctrg1 = []
lat_dist_srctrg2 = []
for i in range(n_cyc):
    loss_mcd_trg_trg.append([])
    loss_mcd_trg_src_trg.append([])
    loss_mcd_trg_src.append([])
    loss_mcd_src_src.append([])
    loss_mcd_src_trg_src.append([])
    loss_mcd_src_trg.append([])
    loss_lat_src.append([])
    loss_lat_trg.append([])
    loss_lat_src_cv.append([])
    loss_lat_trg_cv.append([])
    gv_trg_trg.append([])
    mcdpow_trg_trg.append([])
    mcd_trg_trg.append([])
    gv_trg_src_trg.append([])
    mcdpow_trg_src_trg.append([])
    mcd_trg_src_trg.append([])
    gv_trg_src.append([])
    mcdpow_trg_src.append([])
    mcd_trg_src.append([])
    lat_dist_trgsrc1.append([])
    lat_dist_trgsrc2.append([])
    gv_src_src.append([])
    mcdpow_src_src.append([])
    mcd_src_src.append([])
    gv_src_trg_src.append([])
    mcdpow_src_trg_src.append([])
    mcd_src_trg_src.append([])
    gv_src_trg.append([])
    mcdpow_src_trg.append([])
    mcd_src_trg.append([])
    lat_dist_srctrg1.append([])
    lat_dist_srctrg2.append([])
total = []

n_ev_cyc = 1

eval_loss_mcd_trg_trg = [None]*n_ev_cyc
eval_loss_mcd_trg_src_trg = [None]*n_ev_cyc
eval_loss_mcd_trg_src = [None]*n_ev_cyc
eval_loss_mcd_src_src = [None]*n_ev_cyc
eval_loss_mcd_src_trg_src = [None]*n_ev_cyc
eval_loss_mcd_src_trg = [None]*n_ev_cyc
eval_loss_lat_src = [None]*n_ev_cyc
eval_loss_lat_trg = [None]*n_ev_cyc
eval_loss_lat_src_cv = [None]*n_ev_cyc
eval_loss_lat_trg_cv = [None]*n_ev_cyc
eval_gv_trg_trg = [None]*n_ev_cyc
eval_mcdpow_trg_trg = [None]*n_ev_cyc
eval_mcd_trg_trg = [None]*n_ev_cyc
eval_gv_trg_src_trg = [None]*n_ev_cyc
eval_mcdpow_trg_src_trg = [None]*n_ev_cyc
eval_mcd_trg_src_trg = [None]*n_ev_cyc
eval_gv_trg_src = [None]*n_ev_cyc
eval_mcdpow_trg_src = [None]*n_ev_cyc
eval_mcdpowstd_trg_src = [None]*n_ev_cyc
eval_mcd_trg_src = [None]*n_ev_cyc
eval_mcdstd_trg_src = [None]*n_ev_cyc
eval_lat_dist_trgsrc1 = [None]*n_ev_cyc
eval_lat_dist_trgsrc2 = [None]*n_ev_cyc
eval_gv_src_src = [None]*n_ev_cyc
eval_mcdpow_src_src = [None]*n_ev_cyc
eval_mcd_src_src = [None]*n_ev_cyc
eval_gv_src_trg_src = [None]*n_ev_cyc
eval_mcdpow_src_trg_src = [None]*n_ev_cyc
eval_mcd_src_trg_src = [None]*n_ev_cyc
eval_gv_src_trg = [None]*n_ev_cyc
eval_mcdpow_src_trg = [None]*n_ev_cyc
eval_mcdpowstd_src_trg = [None]*n_ev_cyc
eval_mcd_src_trg = [None]*n_ev_cyc
eval_mcdstd_src_trg = [None]*n_ev_cyc
eval_lat_dist_srctrg1 = [None]*n_ev_cyc
eval_lat_dist_srctrg2 = [None]*n_ev_cyc
prev_featfile_src = np.repeat("",batch_size_utt)
iter_idx = 0 
iter_count = 0 
min_idx = -1
min_eval_mcdpow_src_trg = 99999999.99
min_eval_mcdpowstd_src_trg = 99999999.99
min_eval_mcd_src_trg = 99999999.99
min_eval_mcdstd_src_trg = 99999999.99
if resume is not None:
    np.random.set_state(checkpoint["numpy_random_state"])
    torch.set_rng_state(checkpoint["torch_random_state"])

#学習ループ

torch.Size([54])
torch.Size([54])
torch.Size([50])
torch.Size([50])
Trainable Parameters (encoder): 5.039 million
Trainable Parameters (decoder): 3.884 million
ソース話者訓練データ = 40.
ターゲット話者の訓練データ = 41.
evalデータ数 = 35.


In [5]:
print("==%d EPOCH==" % (epoch_idx+1))

while epoch_idx < epoch_count:
    print(f"4:ループ先頭")
    start = time.time()
    #step 1 : batch_size =80 ミニバッチの値を受け取る。

    if batch_size > 0:
        if iter_count > 0:
            #1つ前のミニバッチの特徴量を取っておく。
            featfile_src_ = featfile_src
            featfile_src_trg_ = featfile_src_trg
            spcidx_src_ = spcidx_src
            prev_flens_src = flens_src
            flens_spc_src_ = flens_spc_src
            batch_src_trg_ = batch_src_trg
            spcidx_src_trg_ = spcidx_src_trg
            flens_spc_src_trg_ = flens_spc_src_trg
            n_batch_utt_ = n_batch_utt
        #ミニバッチの値を取得する。
        print(f"21:1回目train_generator起動")
        batch_src, batch_src_src_code, batch_src_trg_code, batch_src_trg, batch_cv_src, src_idx_s, src_idx_e, spcidx_src_s_idx, spcidx_src_e_idx, c_idx_src, utt_idx_src, spcidx_src, spcidx_src_trg, featfile_src, featfile_src_trg, flens_src, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt = next(generator_src)
        # print(batch_src, batch_src_src_code, batch_src_trg_code, batch_src_trg, batch_cv_src, src_idx_s, src_idx_e, spcidx_src_s_idx, spcidx_src_e_idx, c_idx_src, utt_idx_src, spcidx_src, spcidx_src_trg, featfile_src, featfile_src_trg, flens_src, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt)

        if iter_count > 0 and (src_idx_s == 0 or c_idx_src < 0):
            with torch.no_grad():
                #
                if n_batch_utt_ == batch_size_utt:
                    trj_lat_srctrg, _, _ = model_encoder(batch_src_trg_, y_in_pp, clamp_vae=True, lat_dim=lat_dim)
                else:
                    trj_lat_srctrg, _, _ = model_encoder(batch_src_trg_, y_in_pp_mod, clamp_vae=True, lat_dim=lat_dim)
            print(f"32: 2エポック以上かつ開始フレームが0：trj_lat_srctrgは：{trj_lat_srctrg.shape}")

            for i in range(n_batch_utt_):
                _, _, batch_mcdpow_src_trg[0], _ = dtw.dtw_org_to_trg(np.array(torch.index_select(trj_src_trg[i],0,spcidx_src_[i,:flens_spc_src_[i]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_src_trg_[i][:,stdim:],0,spcidx_src_trg_[i,:flens_spc_src_trg_[i]]).cpu().data.numpy(), dtype=np.float64))
                _, _, batch_mcd_src_trg[0], _ = dtw.dtw_org_to_trg(np.array(torch.index_select(trj_src_trg[i][:,1:],0,spcidx_src_[i,:flens_spc_src_[i]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_src_trg_[i][:,stdim_:],0,spcidx_src_trg_[i,:flens_spc_src_trg_[i]]).cpu().data.numpy(), dtype=np.float64))
                trj_lat_srctrg_ = np.array(torch.index_select(trj_lat_srctrg[i],0,spcidx_src_trg_[i,:flens_spc_src_trg_[i]]).cpu().data.numpy(), dtype=np.float64)
                trj_lat_src_ = np.array(torch.index_select(trj_lat_src[i],0,spcidx_src_[i,:flens_spc_src_[i]]).cpu().data.numpy(), dtype=np.float64)
                aligned_lat_srctrg1, _, _, _ = dtw.dtw_org_to_trg(trj_lat_src_, trj_lat_srctrg_)
                batch_lat_dist_srctrg1[0] = np.mean(np.sqrt(np.mean((aligned_lat_srctrg1-trj_lat_srctrg_)**2, axis=0)))
                _, _, batch_lat_cdist_srctrg1[0], _ = dtw.dtw_org_to_trg(trj_lat_srctrg_, trj_lat_src_, mcd=0)
                aligned_lat_srctrg2, _, _, _ = dtw.dtw_org_to_trg(trj_lat_srctrg_, trj_lat_src_)
                batch_lat_dist_srctrg2[0] = np.mean(np.sqrt(np.mean((aligned_lat_srctrg2-trj_lat_src_)**2, axis=0)))
                _, _, batch_lat_cdist_srctrg2[0], _ = dtw.dtw_org_to_trg(trj_lat_src_, trj_lat_srctrg_, mcd=0)

                if os.path.basename(os.path.dirname(featfile_src_[i])) == spk_src:
                    mcdpow_src_trg[0].append(batch_mcdpow_src_trg[0])
                    mcd_src_trg[0].append(batch_mcd_src_trg[0])
                    batch_lat_dist_srctrg1[0] = (batch_lat_dist_srctrg1[0]+batch_lat_dist_srctrg2[0])/2
                    lat_dist_srctrg1[0].append(batch_lat_dist_srctrg1[0])
                    batch_lat_dist_srctrg2[0] = (batch_lat_cdist_srctrg1[0]+batch_lat_cdist_srctrg2[0])/2
                    lat_dist_srctrg2[0].append(batch_lat_dist_srctrg2[0])
                    print("batch srctrg loss %s %s = %.3f dB %.3f dB , %.3f %.3f" % (featfile_src_[i], featfile_src_trg_[i], batch_mcdpow_src_trg[0], batch_mcd_src_trg[0], batch_lat_dist_srctrg1[0], batch_lat_dist_srctrg2[0]))
                else:
                    mcdpow_trg_src[0].append(batch_mcdpow_src_trg[0])
                    mcd_trg_src[0].append(batch_mcd_src_trg[0])
                    batch_lat_dist_trgsrc1[0] = (batch_lat_dist_srctrg1[0]+batch_lat_dist_srctrg2[0])/2
                    lat_dist_trgsrc1[0].append(batch_lat_dist_trgsrc1[0])
                    batch_lat_dist_trgsrc2[0] = (batch_lat_cdist_srctrg1[0]+batch_lat_cdist_srctrg2[0])/2
                    lat_dist_trgsrc2[0].append(batch_lat_dist_trgsrc2[0])
                    print("61:batch trgsrc loss %s %s = %.3f dB %.3f dB , %.3f %.3f" % (featfile_src_[i], featfile_src_trg_[i], batch_mcdpow_src_trg[0], batch_mcd_src_trg[0], batch_lat_dist_trgsrc1[0], batch_lat_dist_trgsrc2[0]))
    else:
        batch_src, batch_src_src_code, batch_src_trg_code, batch_src_trg, batch_cv_src, c_idx_src, utt_idx_src, spcidx_src, spcidx_src_trg, featfile_src, featfile_src_trg, flens_src, flens_src_trg, flens_spc_src, flens_spc_src_trg, n_batch_utt = next(generator_src)     
    print(f"64：c_idx_src : {c_idx_src}")

    #
    if c_idx_src < 0:
        numpy_random_state = np.random.get_state()
        torch_random_state = torch.get_rng_state()
        # save current epoch model
        #save_checkpoint(expdir, model_encoder, model_decoder, optimizer, numpy_random_state, torch_random_state, epoch_idx + 1)
        #yield2を取得
        dataloader = DataLoader(dataset, batch_size=batch_size_utt, shuffle=True, num_workers=n_workers)
        generator_src = train_generator(dataloader, device, batch_size=batch_size)
        print(f"73:2回めtrain_generator起動：c_idx_src = -1")
        if batch_size > 0:
            batch_src, batch_src_src_code, batch_src_trg_code, batch_src_trg, batch_cv_src, src_idx_s, src_idx_e, spcidx_src_s_idx, spcidx_src_e_idx, c_idx_src, utt_idx_src, spcidx_src, spcidx_src_trg, featfile_src, featfile_src_trg, flens_src, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt = next(generator_src)
            #print(f"{batch_src, batch_src_src_code, batch_src_trg_code, batch_src_trg, batch_cv_src, src_idx_s, src_idx_e, spcidx_src_s_idx, spcidx_src_e_idx, c_idx_src, utt_idx_src, spcidx_src, spcidx_src_trg, featfile_src, featfile_src_trg, flens_src, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt}")
        else:
        #16個の引数を返すyieldを受け取る
            batch_src, batch_src_src_code, batch_src_trg_code, batch_src_trg, batch_cv_src, c_idx_src, utt_idx_src, spcidx_src, spcidx_src_trg, featfile_src, featfile_src_trg, flens_src, flens_src_trg, flens_spc_src, flens_spc_src_trg, n_batch_utt = next(generator_src)

        #count = 0 のときの処理を追う。

        numpy_random_state = np.random.get_state()
        torch_random_state = torch.get_rng_state()

        # report current epoch
        text_log = "%.3f ;; " % np.mean(loss)
        print(f"88:{epoch_idx+1}エポックのloss:{np.mean(loss)}")
        #print(f"89:{epoch_idx+1}エポックのgv_trg_trg:{gv_trg_trg}")
        #print(f"90:{epoch_idx+1}エポックのloss_mcd_src_src:{loss_mcd_src_src}")
        #for i in range(args.n_cyc):
        for i in range(n_ev_cyc):
            eval_gv_trg_trg[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_trg_trg[i], axis=0))-np.log(gv_trg_mean))))
            eval_gv_src_trg[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_src_trg[i], axis=0))-np.log(gv_trg_mean))))
            eval_gv_trg_src_trg[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_trg_src_trg[i], axis=0))-np.log(gv_trg_mean))))
            eval_gv_src_src[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_src_src[i], axis=0))-np.log(gv_src_mean))))
            eval_gv_trg_src[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_trg_src[i], axis=0))-np.log(gv_src_mean))))
            eval_gv_src_trg_src[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_src_trg_src[i], axis=0))-np.log(gv_src_mean))))
            text_log += "[%d] %.3f %.3f %.3f %.3f %.3f %.3f ; %.3f %.3f %.3f %.3f ; %.6f %.3f dB %.6f dB , %.3f %.3f dB %.3f dB , %.6f %.3f dB (+- %.3f) %.6f dB (+- %.3f) , %.6f %.6f ; %.6f %.3f dB %.6f dB , %.3f %.3f dB %.3f dB , %.6f %.3f dB (+- %.3f) %.6f dB (+- %.3f) , %.6f %.6f ;; " % (
                         i+1, np.mean(loss_mcd_trg_trg[i]), np.mean(loss_mcd_trg_src_trg[i]), np.mean(loss_mcd_trg_src[i]),
                             np.mean(loss_mcd_src_src[i]), np.mean(loss_mcd_src_trg_src[i]), np.mean(loss_mcd_src_trg[i]),
                                 np.mean(loss_lat_trg[i]), np.mean(loss_lat_trg_cv[i]), np.mean(loss_lat_src[i]), np.mean(loss_lat_src_cv[i]),
                                     eval_gv_trg_trg[i], np.mean(mcdpow_trg_trg[i]), np.mean(mcd_trg_trg[i]),
                                         eval_gv_trg_src_trg[i], np.mean(mcdpow_trg_src_trg[i]), np.mean(mcd_trg_src_trg[i]),
                                             eval_gv_trg_src[i], np.mean(mcdpow_trg_src[i]), np.std(mcdpow_trg_src[i]), np.mean(mcd_trg_src[i]), np.std(mcd_trg_src[i]),
                                                 np.mean(lat_dist_trgsrc1[i]), np.mean(lat_dist_trgsrc2[i]), eval_gv_src_src[i], np.mean(mcdpow_src_src[i]), np.mean(mcd_src_src[i]),
                                                     eval_gv_src_trg_src[i], np.mean(mcdpow_src_trg_src[i]), np.mean(mcd_src_trg_src[i]),
                                                         eval_gv_src_trg[i], np.mean(mcdpow_src_trg[i]), np.std(mcdpow_src_trg[i]), np.mean(mcd_src_trg[i]), np.std(mcd_src_trg[i]),
                                                             np.mean(lat_dist_srctrg1[i]), np.mean(lat_dist_srctrg2[i]))
        print(f"loss_mcd_src_src[i]:{loss_mcd_src_src[0]}")
        print("(EPOCH:%d) average optimization loss = %s  (%.3f min., %.3f sec / batch)" % (epoch_idx + 1, text_log, np.sum(total) / 60.0, np.mean(total)))
        print("estimated training required time = {0.days:02}:{0.hours:02}:{0.minutes:02}:{0.seconds:02}".format(relativedelta(seconds=int((epoch_count - (epoch_idx + 1)) * np.sum(total)))))
        model_encoder.eval()
        model_decoder.eval()
        for param in model_encoder.parameters():
            param.requires_grad = False
        for param in model_decoder.parameters():
            param.requires_grad = False
        # compute loss in evaluation data
        loss = []
        loss_mcd_trg_trg = []
        loss_mcd_trg_src_trg = []
        loss_mcd_trg_src = []
        loss_mcd_src_src = []
        loss_mcd_src_trg_src = []
        loss_mcd_src_trg = []
        loss_lat_src = []
        loss_lat_trg = []
        loss_lat_src_cv = []
        loss_lat_trg_cv = []
        gv_trg_trg = []
        mcdpow_trg_trg = []
        mcd_trg_trg = []
        gv_trg_src_trg = []
        mcdpow_trg_src_trg = []
        mcd_trg_src_trg = []
        gv_trg_src = []
        mcdpow_trg_src = []
        mcd_trg_src = []
        lat_dist_trgsrc1 = []
        lat_dist_trgsrc2 = []
        gv_src_src = []
        mcdpow_src_src = []
        mcd_src_src = []
        gv_src_trg_src = []
        mcdpow_src_trg_src = []
        mcd_src_trg_src = []
        gv_src_trg = []
        mcdpow_src_trg = []
        mcd_src_trg = []
        lat_dist_srctrg1 = []
        lat_dist_srctrg2 = []
        for i in range(n_ev_cyc):
            loss_mcd_trg_trg.append([])
            loss_mcd_trg_src_trg.append([])
            loss_mcd_trg_src.append([])
            loss_mcd_src_src.append([])
            loss_mcd_src_trg_src.append([])
            loss_mcd_src_trg.append([])
            loss_lat_src.append([])
            loss_lat_trg.append([])
            loss_lat_src_cv.append([])
            loss_lat_trg_cv.append([])
            gv_trg_trg.append([])
            mcdpow_trg_trg.append([])
            mcd_trg_trg.append([])
            gv_trg_src_trg.append([])
            mcdpow_trg_src_trg.append([])
            mcd_trg_src_trg.append([])
            gv_trg_src.append([])
            mcdpow_trg_src.append([])
            mcd_trg_src.append([])
            lat_dist_trgsrc1.append([])
            lat_dist_trgsrc2.append([])
            gv_src_src.append([])
            mcdpow_src_src.append([])
            mcd_src_src.append([])
            gv_src_trg_src.append([])
            mcdpow_src_trg_src.append([])
            mcd_src_trg_src.append([])
            gv_src_trg.append([])
            mcdpow_src_trg.append([])
            mcd_src_trg.append([])
            lat_dist_srctrg1.append([])
            lat_dist_srctrg2.append([])
            total = []
        iter_count = 0
        print(f"1epoch(1発話)の学習が完了")
        print("189:########学習したモデルの性能評価を行います#########")
        with torch.no_grad():
            while True:
                start = time.time()
                batch_src_, batch_src_src_code_, batch_src_trg_code_, batch_src_trg_, batch_cv_src_, c_idx, utt_idx, spcidx_src_, spcidx_src_trg_, featfile_src_, featfile_src_trg_, flens_src_, flens_src_trg_, flens_spc_src_, flens_spc_src_trg_, n_batch_utt_ = next(generator_eval_src)
                batch_trg_, batch_trg_trg_code_, batch_trg_src_code_, batch_trg_src_, batch_cv_trg_, c_idx, utt_idx, spcidx_trg_, spcidx_trg_src_, featfile_trg_, featfile_trg_src_, flens_trg_, flens_trg_src_, flens_spc_trg_, flens_spc_trg_src_, n_batch_utt_ = next(generator_eval_trg)
                if c_idx < 0:
                    break
                for i in range(n_batch_utt_):
                    print("%s %s %d %d %d %d" % (featfile_src_[i], featfile_src_trg_[i], flens_src_[i], flens_src_trg_[i], flens_spc_src_[i], flens_spc_src_trg_[i]))
                    print("%s %s %d %d %d %d" % (featfile_trg_[i], featfile_trg_src_[i], flens_trg_[i], flens_trg_src_[i], flens_spc_trg_[i], flens_spc_trg_src_[i]))

                if n_batch_utt_ == batch_size_utt_eval:
                    y_in_pp_eval_ = y_in_pp_eval
                    y_in_trg_eval_ = y_in_trg_eval
                    y_in_src_eval_ = y_in_src_eval
                else:
                    y_in_pp_eval_ = y_in_pp_eval_mod
                    y_in_trg_eval_ = y_in_trg_eval_mod
                    y_in_src_eval_ = y_in_src_eval_mod

                trj_lat_srctrg, _, _ = model_encoder(batch_src_trg_, y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)
                trj_lat_trgsrc, _, _ = model_encoder(batch_trg_src_, y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)
                for i in range(n_ev_cyc):
                    batch_mcdpow_src_src[i] = []
                    batch_mcd_src_src[i] = []
                    batch_mcdpow_src_trg_src[i] = []
                    batch_mcd_src_trg_src[i] = []
                    batch_mcdpow_src_trg[i] = []
                    batch_mcd_src_trg[i] = []
                    batch_mcdpow_trg_trg[i] = []
                    batch_mcd_trg_trg[i] = []
                    batch_mcdpow_trg_src_trg[i] = []
                    batch_mcd_trg_src_trg[i] = []
                    batch_mcdpow_trg_src[i] = []
                    batch_mcd_trg_src[i] = []
                    batch_lat_dist_srctrg1[i] = []
                    batch_lat_dist_srctrg2[i] = []
                    batch_lat_dist_trgsrc1[i] = []
                    batch_lat_dist_trgsrc2[i] = []
                    if i > 0:
                        batch_lat_trg_[i], _, _ = model_encoder(torch.cat((batch_trg_[:,:,:stdim], batch_trj_trg_src_trg_[i-1]),2), y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)
                        batch_lat_src_[i], _, _ = model_encoder(torch.cat((batch_src_[:,:,:stdim], batch_trj_src_trg_src_[i-1]),2), y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)

                        batch_trj_trg_trg_[i], _, _ = model_decoder(torch.cat((batch_trg_trg_code_, sampling_vae_batch(batch_lat_trg_[i], lat_dim=lat_dim)),2), y_in_trg_eval_)
                        batch_trj_trg_src_[i], _, _ = model_decoder(torch.cat((batch_trg_src_code_, sampling_vae_batch(batch_lat_trg_[i], lat_dim=lat_dim)),2), y_in_src_eval_)

                        batch_trj_src_src_[i], _, _ = model_decoder(torch.cat((batch_src_src_code_, sampling_vae_batch(batch_lat_src_[i], lat_dim=lat_dim)),2), y_in_src_eval_)
                        batch_trj_src_trg_[i], _, _ = model_decoder(torch.cat((batch_src_trg_code_, sampling_vae_batch(batch_lat_src_[i], lat_dim=lat_dim)),2), y_in_trg_eval_)

                        batch_lat_trg_src_[i], _, _ = model_encoder(torch.cat((batch_cv_trg_, batch_trj_trg_src_[i]),2), y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)
                        batch_lat_src_trg_[i], _, _ = model_encoder(torch.cat((batch_cv_src_, batch_trj_src_trg_[i]),2), y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)

                        batch_trj_trg_src_trg_[i], _, _ = model_decoder(torch.cat((batch_trg_trg_code_, sampling_vae_batch(batch_lat_trg_src_[i], lat_dim=lat_dim)),2), y_in_trg_eval_)
                        batch_trj_src_trg_src_[i], _, _ = model_decoder(torch.cat((batch_src_src_code_, sampling_vae_batch(batch_lat_src_trg_[i], lat_dim=lat_dim)),2), y_in_src_eval_)
                    else:
                        batch_lat_trg_[0], _, _ = model_encoder(batch_trg_, y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)
                        batch_lat_src_[0], _, _ = model_encoder(batch_src_, y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)

                        batch_trj_trg_trg_[0], _, _ = model_decoder(torch.cat((batch_trg_trg_code_, sampling_vae_batch(batch_lat_trg_[0], lat_dim=lat_dim)),2), y_in_trg_eval_)
                        batch_trj_trg_src_[0], _, _ = model_decoder(torch.cat((batch_trg_src_code_, sampling_vae_batch(batch_lat_trg_[0], lat_dim=lat_dim)),2), y_in_src_eval_)

                        batch_trj_src_src_[0], _, _ = model_decoder(torch.cat((batch_src_src_code_, sampling_vae_batch(batch_lat_src_[0], lat_dim=lat_dim)),2), y_in_src_eval_)
                        batch_trj_src_trg_[0], _, _ = model_decoder(torch.cat((batch_src_trg_code_, sampling_vae_batch(batch_lat_src_[0], lat_dim=lat_dim)),2), y_in_trg_eval_)

                        batch_lat_trg_src_[0], _, _ = model_encoder(torch.cat((batch_cv_trg_, batch_trj_trg_src_[0]),2), y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)
                        batch_lat_src_trg_[0], _, _ = model_encoder(torch.cat((batch_cv_src_, batch_trj_src_trg_[0]),2), y_in_pp_eval_, clamp_vae=True, lat_dim=lat_dim)

                        batch_trj_trg_src_trg_[0], _, _ = model_decoder(torch.cat((batch_trg_trg_code_, sampling_vae_batch(batch_lat_trg_src_[0], lat_dim=lat_dim)),2), y_in_trg_eval_)
                        batch_trj_src_trg_src_[0], _, _ = model_decoder(torch.cat((batch_src_src_code_, sampling_vae_batch(batch_lat_src_trg_[0], lat_dim=lat_dim)),2), y_in_src_eval_)

                        for j in range(n_batch_utt_):
                            gv_src_src[i].append(np.var(batch_trj_src_src_[i][j,:flens_src_[j],1:].cpu().data.numpy(), axis=0))
                            gv_src_trg[i].append(np.var(batch_trj_src_trg_[i][j,:flens_src_[j],1:].cpu().data.numpy(), axis=0))
                            gv_src_trg_src[i].append(np.var(batch_trj_src_trg_src_[i][j,:flens_src_[j],1:].cpu().data.numpy(), axis=0))
                            gv_trg_trg[i].append(np.var(batch_trj_trg_trg_[i][j,:flens_trg_[j],1:].cpu().data.numpy(), axis=0))
                            gv_trg_src[i].append(np.var(batch_trj_trg_src_[i][j,:flens_trg_[j],1:].cpu().data.numpy(), axis=0))
                            gv_trg_src_trg[i].append(np.var(batch_trj_trg_src_trg_[i][j,:flens_trg_[j],1:].cpu().data.numpy(), axis=0))

                            trj_lat_srctrg_ = np.array(torch.index_select(trj_lat_srctrg[j],0,spcidx_src_trg_[j,:flens_spc_src_trg_[j]]).cpu().data.numpy(), dtype=np.float64)
                            trj_lat_src_ = np.array(torch.index_select(batch_lat_src_[0][j],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64)
                            aligned_lat_srctrg1, _, _, _ = dtw.dtw_org_to_trg(trj_lat_src_, trj_lat_srctrg_)
                            tmp_batch_lat_dist_srctrg1 = np.mean(np.sqrt(np.mean((aligned_lat_srctrg1-trj_lat_srctrg_)**2, axis=0)))
                            _, _, tmp_batch_lat_cdist_srctrg1, _ = dtw.dtw_org_to_trg(trj_lat_srctrg_, trj_lat_src_, mcd=0)
                            aligned_lat_srctrg2, _, _, _ = dtw.dtw_org_to_trg(trj_lat_srctrg_, trj_lat_src_)
                            tmp_batch_lat_dist_srctrg2 = np.mean(np.sqrt(np.mean((aligned_lat_srctrg2-trj_lat_src_)**2, axis=0)))
                            _, _, tmp_batch_lat_cdist_srctrg2, _ = dtw.dtw_org_to_trg(trj_lat_src_, trj_lat_srctrg_, mcd=0)

                            tmp_batch_lat_dist_srctrg1 = (tmp_batch_lat_dist_srctrg1+tmp_batch_lat_dist_srctrg2)/2
                            lat_dist_srctrg1[0].append(tmp_batch_lat_dist_srctrg1)
                            tmp_batch_lat_dist_srctrg2 = (tmp_batch_lat_cdist_srctrg1+tmp_batch_lat_cdist_srctrg2)/2
                            lat_dist_srctrg2[0].append(tmp_batch_lat_dist_srctrg2)

                            batch_lat_dist_srctrg1[0].append(tmp_batch_lat_dist_srctrg1)
                            batch_lat_dist_srctrg2[0].append(tmp_batch_lat_dist_srctrg2)

                            trj_lat_trgsrc_ = np.array(torch.index_select(trj_lat_trgsrc[j],0,spcidx_trg_src_[j,:flens_spc_trg_src_[j]]).cpu().data.numpy(), dtype=np.float64)
                            trj_lat_trg_ = np.array(torch.index_select(batch_lat_trg_[0][j],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64)
                            aligned_lat_trgsrc1, _, _, _ = dtw.dtw_org_to_trg(trj_lat_trg_, trj_lat_trgsrc_)
                            tmp_batch_lat_dist_trgsrc1 = np.mean(np.sqrt(np.mean((aligned_lat_trgsrc1-trj_lat_trgsrc_)**2, axis=0)))
                            _, _, tmp_batch_lat_cdist_trgsrc1, _ = dtw.dtw_org_to_trg(trj_lat_trgsrc_, trj_lat_trg_, mcd=0)
                            aligned_lat_trgsrc2, _, _, _ = dtw.dtw_org_to_trg(trj_lat_trgsrc_, trj_lat_trg_)
                            tmp_batch_lat_dist_trgsrc2 = np.mean(np.sqrt(np.mean((aligned_lat_trgsrc2-trj_lat_trg_)**2, axis=0)))
                            _, _, tmp_batch_lat_cdist_trgsrc2, _ = dtw.dtw_org_to_trg(trj_lat_trg_, trj_lat_trgsrc_, mcd=0)

                            tmp_batch_lat_dist_trgsrc1 = (tmp_batch_lat_dist_trgsrc1+tmp_batch_lat_dist_trgsrc2)/2
                            lat_dist_trgsrc1[0].append(tmp_batch_lat_dist_trgsrc1)
                            tmp_batch_lat_dist_trgsrc2 = (tmp_batch_lat_cdist_trgsrc1+tmp_batch_lat_cdist_trgsrc2)/2
                            lat_dist_trgsrc2[0].append(tmp_batch_lat_dist_trgsrc2)

                            batch_lat_dist_trgsrc1[0].append(tmp_batch_lat_dist_trgsrc1)
                            batch_lat_dist_trgsrc2[0].append(tmp_batch_lat_dist_trgsrc2)

                            batch_trg_spc_ = np.array(torch.index_select(batch_trg_[j,:,stdim:],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64)
                            batch_trg_spc__ = np.array(torch.index_select(batch_trg_[j,:,stdim_:],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64)

                            tmp_batch_mcdpow_trg_trg, _ = dtw.calc_mcd(batch_trg_spc_, np.array(torch.index_select(batch_trj_trg_trg_[i][j],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64))
                            tmp_batch_mcd_trg_trg, _ = dtw.calc_mcd(batch_trg_spc__, np.array(torch.index_select(batch_trj_trg_trg_[i][j,:,1:],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64))

                            tmp_batch_mcdpow_trg_src_trg, _ = dtw.calc_mcd(batch_trg_spc_, np.array(torch.index_select(batch_trj_trg_src_trg_[i][j],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64))
                            tmp_batch_mcd_trg_src_trg, _ = dtw.calc_mcd(batch_trg_spc__, np.array(torch.index_select(batch_trj_trg_src_trg_[i][j,:,1:],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64))

                            _, _, tmp_batch_mcdpow_trg_src, _ = dtw.dtw_org_to_trg(np.array(torch.index_select(batch_trj_trg_src_[i][j],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_trg_src_[j,:,stdim:],0,spcidx_trg_src_[j,:flens_spc_trg_src_[j]]).cpu().data.numpy(), dtype=np.float64))
                            _, _, tmp_batch_mcd_trg_src, _ = dtw.dtw_org_to_trg(np.array(torch.index_select(batch_trj_trg_src_[i][j,:,1:],0,spcidx_trg_[j,:flens_spc_trg_[j]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_trg_src_[j,:,stdim_:],0,spcidx_trg_src_[j,:flens_spc_trg_src_[j]]).cpu().data.numpy(), dtype=np.float64))

                            batch_src_spc_ = np.array(torch.index_select(batch_src_[j,:,stdim:],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64)
                            batch_src_spc__ = np.array(torch.index_select(batch_src_[j,:,stdim_:],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64)

                            tmp_batch_mcdpow_src_src, _ = dtw.calc_mcd(batch_src_spc_, np.array(torch.index_select(batch_trj_src_src_[i][j],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64))
                            tmp_batch_mcd_src_src, _ = dtw.calc_mcd(batch_src_spc__, np.array(torch.index_select(batch_trj_src_src_[i][j,:,1:],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64))

                            tmp_batch_mcdpow_src_trg_src, _ = dtw.calc_mcd(batch_src_spc_, np.array(torch.index_select(batch_trj_src_trg_src_[i][j],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64))
                            tmp_batch_mcd_src_trg_src, _ = dtw.calc_mcd(batch_src_spc__, np.array(torch.index_select(batch_trj_src_trg_src_[i][j,:,1:],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64))

                            _, _, tmp_batch_mcdpow_src_trg, _ = dtw.dtw_org_to_trg(np.array(torch.index_select(batch_trj_src_trg_[i][j],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_src_trg_[j,:,stdim:],0,spcidx_src_trg_[j,:flens_spc_src_trg_[j]]).cpu().data.numpy(), dtype=np.float64))
                            _, _, tmp_batch_mcd_src_trg, _ = dtw.dtw_org_to_trg(np.array(torch.index_select(batch_trj_src_trg_[i][j,:,1:],0,spcidx_src_[j,:flens_spc_src_[j]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_src_trg_[j,:,stdim_:],0,spcidx_src_trg_[j,:flens_spc_src_trg_[j]]).cpu().data.numpy(), dtype=np.float64))

                            batch_mcdpow_trg_trg[0].append(tmp_batch_mcdpow_trg_trg)
                            batch_mcd_trg_trg[0].append(tmp_batch_mcd_trg_trg)
                            batch_mcdpow_trg_src_trg[0].append(tmp_batch_mcdpow_trg_src_trg)
                            batch_mcd_trg_src_trg[0].append(tmp_batch_mcd_trg_src_trg)
                            batch_mcdpow_trg_src[0].append(tmp_batch_mcdpow_trg_src)
                            batch_mcd_trg_src[0].append(tmp_batch_mcd_trg_src)

                            batch_mcdpow_src_src[0].append(tmp_batch_mcdpow_src_src)
                            batch_mcd_src_src[0].append(tmp_batch_mcd_src_src)
                            batch_mcdpow_src_trg_src[0].append(tmp_batch_mcdpow_src_trg_src)
                            batch_mcd_src_trg_src[0].append(tmp_batch_mcd_src_trg_src)
                            batch_mcdpow_src_trg[0].append(tmp_batch_mcdpow_src_trg)
                            batch_mcd_src_trg[0].append(tmp_batch_mcd_src_trg)

                            mcdpow_trg_trg[i].append(tmp_batch_mcdpow_trg_trg)
                            mcd_trg_trg[i].append(tmp_batch_mcd_trg_trg)
                            mcdpow_trg_src_trg[i].append(tmp_batch_mcdpow_trg_src_trg)
                            mcd_trg_src_trg[i].append(tmp_batch_mcd_trg_src_trg)
                            mcdpow_trg_src[i].append(tmp_batch_mcdpow_trg_src)
                            mcd_trg_src[i].append(tmp_batch_mcd_trg_src)

                            mcdpow_src_src[i].append(tmp_batch_mcdpow_src_src)
                            mcd_src_src[i].append(tmp_batch_mcd_src_src)
                            mcdpow_src_trg_src[i].append(tmp_batch_mcdpow_src_trg_src)
                            mcd_src_trg_src[i].append(tmp_batch_mcd_src_trg_src)
                            mcdpow_src_trg[i].append(tmp_batch_mcdpow_src_trg)
                            mcd_src_trg[i].append(tmp_batch_mcd_src_trg)

                            print("batch trgsrc loss %s %s = %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f %.3f" % (
                                    featfile_trg_[j], featfile_trg_src_[j], tmp_batch_mcdpow_trg_trg, tmp_batch_mcd_trg_trg, tmp_batch_mcdpow_trg_src_trg, tmp_batch_mcd_trg_src_trg,
                                        tmp_batch_mcdpow_trg_src, tmp_batch_mcd_trg_src, tmp_batch_lat_dist_trgsrc1, tmp_batch_lat_dist_trgsrc2))
                            print("batch srctrg loss %s %s = %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f %.3f" % (
                                    featfile_src_[j], featfile_src_trg_[j], tmp_batch_mcdpow_src_src, tmp_batch_mcd_src_src, tmp_batch_mcdpow_src_trg_src, tmp_batch_mcd_src_trg_src,
                                        tmp_batch_mcdpow_src_trg, tmp_batch_mcd_src_trg, tmp_batch_lat_dist_srctrg1, tmp_batch_lat_dist_srctrg2))

                        batch_mcdpow_src_src[i] = np.mean(batch_mcdpow_src_src[i])
                        batch_mcd_src_src[i] = np.mean(batch_mcd_src_src[i])
                        batch_mcdpow_src_trg_src[i] = np.mean(batch_mcdpow_src_trg_src[i])
                        batch_mcd_src_trg_src[i] = np.mean(batch_mcd_src_trg_src[i])
                        batch_mcdpow_src_trg[i] = np.mean(batch_mcdpow_src_trg[i])
                        batch_mcd_src_trg[i] = np.mean(batch_mcd_src_trg[i])
                        batch_mcdpow_trg_trg[i] = np.mean(batch_mcdpow_trg_trg[i])
                        batch_mcd_trg_trg[i] = np.mean(batch_mcd_trg_trg[i])
                        batch_mcdpow_trg_src_trg[i] = np.mean(batch_mcdpow_trg_src_trg[i])
                        batch_mcd_trg_src_trg[i] = np.mean(batch_mcd_trg_src_trg[i])
                        batch_mcdpow_trg_src[i] = np.mean(batch_mcdpow_trg_src[i])
                        batch_mcd_trg_src[i] = np.mean(batch_mcd_trg_src[i])
                        batch_lat_dist_srctrg1[i] = np.mean(batch_lat_dist_srctrg1[i])
                        batch_lat_dist_srctrg2[i] = np.mean(batch_lat_dist_srctrg2[i])
                        batch_lat_dist_trgsrc1[i] = np.mean(batch_lat_dist_trgsrc1[i])
                        batch_lat_dist_trgsrc2[i] = np.mean(batch_lat_dist_trgsrc2[i])

                    for j in range(n_batch_utt_):
                        _, tmp_batch_loss_mcd_trg_trg, _ = criterion_mcd(batch_trj_trg_trg_[i][j,:flens_trg_[j]], batch_trg_[j,:flens_trg_[j],stdim:], L2=False, GV=False)
                        _, tmp_batch_loss_mcd_trg_src, _ = criterion_mcd(batch_trj_trg_src_[i][j,:flens_trg_[j]], batch_trg_[j,:flens_trg_[j],stdim:], L2=False, GV=False)

                        _, tmp_batch_loss_mcd_src_src, _ = criterion_mcd(batch_trj_src_src_[i][j,:flens_src_[j]], batch_src_[j,:flens_src_[j],stdim:], L2=False, GV=False)
                        # ok！ print(f"tmp_batch_loss_mcd_src_src:{tmp_batch_loss_mcd_src_src}")
                        _, tmp_batch_loss_mcd_src_trg, _ = criterion_mcd(batch_trj_src_trg_[i][j,:flens_src_[j]], batch_src_[j,:flens_src_[j],stdim:], L2=False, GV=False)

                        _, tmp_batch_loss_mcd_trg_src_trg, _ = criterion_mcd(batch_trj_trg_src_trg_[i][j,:flens_trg_[j]], batch_trg_[j,:flens_trg_[j],stdim:], L2=False, GV=False)
                        _, tmp_batch_loss_mcd_src_trg_src, _ = criterion_mcd(batch_trj_src_trg_src_[i][j,:flens_src_[j]], batch_src_[j,:flens_src_[j],stdim:], L2=False, GV=False)

                        tmp_batch_loss_lat_trg = loss_vae(batch_lat_trg_[i][j,:flens_trg_[j]], lat_dim=lat_dim)
                        tmp_batch_loss_lat_src = loss_vae(batch_lat_src_[i][j,:flens_src_[j]], lat_dim=lat_dim)

                        tmp_batch_loss_lat_trg_cv = loss_vae(batch_lat_trg_src_[i][j,:flens_trg_[j]], lat_dim=lat_dim)
                        tmp_batch_loss_lat_src_cv = loss_vae(batch_lat_src_trg_[i][j,:flens_src_[j]], lat_dim=lat_dim)

                        if j > 0:
                            batch_loss_mcd_trg_trg[i] = torch.cat((batch_loss_mcd_trg_trg[i], tmp_batch_loss_mcd_trg_trg.unsqueeze(0)))
                            batch_loss_mcd_trg_src[i] = torch.cat((batch_loss_mcd_trg_src[i], tmp_batch_loss_mcd_trg_src.unsqueeze(0)))

                            batch_loss_mcd_src_src[i] = torch.cat((batch_loss_mcd_src_src[i], tmp_batch_loss_mcd_src_src.unsqueeze(0)))

                            batch_loss_mcd_src_trg[i] = torch.cat((batch_loss_mcd_src_trg[i], tmp_batch_loss_mcd_src_trg.unsqueeze(0)))

                            batch_loss_mcd_trg_src_trg[i] = torch.cat((batch_loss_mcd_trg_src_trg[i], tmp_batch_loss_mcd_trg_src_trg.unsqueeze(0)))
                            batch_loss_mcd_src_trg_src[i] = torch.cat((batch_loss_mcd_src_trg_src[i], tmp_batch_loss_mcd_src_trg_src.unsqueeze(0)))

                            batch_loss_lat_trg[i] = torch.cat((batch_loss_lat_trg[i], tmp_batch_loss_lat_trg.unsqueeze(0)))
                            batch_loss_lat_src[i] = torch.cat((batch_loss_lat_src[i], tmp_batch_loss_lat_src.unsqueeze(0)))

                            batch_loss_lat_trg_cv[i] = torch.cat((batch_loss_lat_trg_cv[i], tmp_batch_loss_lat_trg_cv.unsqueeze(0)))
                            batch_loss_lat_src_cv[i] = torch.cat((batch_loss_lat_src_cv[i], tmp_batch_loss_lat_src_cv.unsqueeze(0)))
                        else:
                            batch_loss_mcd_trg_trg[i] = tmp_batch_loss_mcd_trg_trg.unsqueeze(0)
                            batch_loss_mcd_trg_src[i] = tmp_batch_loss_mcd_trg_src.unsqueeze(0)

                            batch_loss_mcd_src_src[i] = tmp_batch_loss_mcd_src_src.unsqueeze(0)
                            batch_loss_mcd_src_trg[i] = tmp_batch_loss_mcd_src_trg.unsqueeze(0)

                            batch_loss_mcd_trg_src_trg[i] = tmp_batch_loss_mcd_trg_src_trg.unsqueeze(0)
                            batch_loss_mcd_src_trg_src[i] = tmp_batch_loss_mcd_src_trg_src.unsqueeze(0)

                            batch_loss_lat_trg[i] = tmp_batch_loss_lat_trg.unsqueeze(0)
                            batch_loss_lat_src[i] = tmp_batch_loss_lat_src.unsqueeze(0)

                            batch_loss_lat_trg_cv[i] = tmp_batch_loss_lat_trg_cv.unsqueeze(0)
                            batch_loss_lat_src_cv[i] = tmp_batch_loss_lat_src_cv.unsqueeze(0)

                    batch_loss_mcd_trg_trg[i] = torch.mean(batch_loss_mcd_trg_trg[i])
                    batch_loss_mcd_trg_src_trg[i] = torch.mean(batch_loss_mcd_trg_src_trg[i])
                    batch_loss_mcd_trg_src[i] = torch.mean(batch_loss_mcd_trg_src[i])
                    batch_loss_lat_trg[i] = torch.mean(batch_loss_lat_trg[i])
                    batch_loss_lat_trg_cv[i] = torch.mean(batch_loss_lat_trg_cv[i])

                    batch_loss_mcd_src_src[i] = torch.mean(batch_loss_mcd_src_src[i])
                    batch_loss_mcd_src_trg_src[i] = torch.mean(batch_loss_mcd_src_trg_src[i])
                    batch_loss_mcd_src_trg[i] = torch.mean(batch_loss_mcd_src_trg[i])
                    batch_loss_lat_src[i] = torch.mean(batch_loss_lat_src[i])
                    batch_loss_lat_src_cv[i] = torch.mean(batch_loss_lat_src_cv[i])

                    loss_mcd_trg_trg[i].append(batch_loss_mcd_trg_trg[i].item())
                    loss_mcd_trg_src[i].append(batch_loss_mcd_trg_src[i].item())

                    loss_mcd_src_src[i].append(batch_loss_mcd_src_src[i].item())
                    print(f"1013:loss_mcd_src_src[i]:{loss_mcd_src_src[0]}")
                    loss_mcd_src_trg[i].append(batch_loss_mcd_src_trg[i].item())

                    loss_mcd_trg_src_trg[i].append(batch_loss_mcd_trg_src_trg[i].item())
                    loss_mcd_src_trg_src[i].append(batch_loss_mcd_src_trg_src[i].item())

                    loss_lat_trg[i].append(batch_loss_lat_trg[i].item())
                    loss_lat_src[i].append(batch_loss_lat_src[i].item())

                    loss_lat_trg_cv[i].append(batch_loss_lat_trg_cv[i].item())
                    loss_lat_src_cv[i].append(batch_loss_lat_src_cv[i].item())

                    if i > 0: 
                        if not half_cyc:
                            batch_loss += batch_loss_mcd_trg_trg[i] + batch_loss_mcd_src_src[i] + batch_loss_mcd_trg_src_trg[i] + batch_loss_mcd_src_trg_src[i] + batch_loss_lat_trg[i] + batch_loss_lat_src[i] + batch_loss_lat_trg_cv[i] + batch_loss_lat_src_cv[i]
                        else:
                            batch_loss += batch_loss_mcd_trg_trg[i] + batch_loss_mcd_src_src[i] + batch_loss_lat_trg[i] + batch_loss_lat_src[i]
                    else:
                        if not half_cyc:
                            batch_loss = batch_loss_mcd_trg_trg[0] + batch_loss_mcd_src_src[0] + batch_loss_mcd_trg_src_trg[0] + batch_loss_mcd_src_trg_src[0] + batch_loss_lat_trg[0] + batch_loss_lat_src[0] + batch_loss_lat_trg_cv[0] + batch_loss_lat_src_cv[0]
                        else:
                            batch_loss = batch_loss_mcd_trg_trg[0] + batch_loss_mcd_src_src[0] + batch_loss_lat_trg[0] + batch_loss_lat_src[0]

                loss.append(batch_loss.item())
                text_log = "%.3f ;; " % batch_loss.item()
                for i in range(n_ev_cyc):
                    text_log += "[%d] %.3f %.3f %.3f %.3f %.3f %.3f ; %.3f %.3f %.3f %.3f ; %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f %.3f ; %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f dB %.3f dB , %.3f %.3f ;; " % (
                                 i+1, batch_loss_mcd_trg_trg[i].item(), batch_loss_mcd_trg_src_trg[i].item(), batch_loss_mcd_trg_src[i].item(),
                                     batch_loss_mcd_src_src[i].item(), batch_loss_mcd_src_trg_src[i].item(), batch_loss_mcd_src_trg[i].item(),
                                         batch_loss_lat_trg[i].item(), batch_loss_lat_trg_cv[i].item(), batch_loss_lat_src[i].item(), batch_loss_lat_src_cv[i].item(),
                                             batch_mcdpow_trg_trg[i], batch_mcd_trg_trg[i], batch_mcdpow_trg_src_trg[i], batch_mcd_trg_src_trg[i],
                                                     batch_mcdpow_trg_src[i], batch_mcd_trg_src[i], batch_lat_dist_trgsrc1[i], batch_lat_dist_trgsrc2[i], batch_mcdpow_src_src[i], batch_mcd_src_src[i],
                                                             batch_mcdpow_src_trg_src[i], batch_mcd_src_trg_src[i], batch_mcdpow_src_trg[i], batch_mcd_src_trg[i], batch_lat_dist_srctrg1[i], batch_lat_dist_srctrg2[i])
                print("batch eval loss [%d] = %s  (%.3f sec)" % (c_idx+1, text_log, time.time() - start))
                total.append(time.time() - start)
            eval_loss = np.mean(loss)
        for i in range(n_ev_cyc):
            eval_loss_mcd_trg_trg[i] = np.mean(loss_mcd_trg_trg[i])
            eval_loss_mcd_trg_src_trg[i] = np.mean(loss_mcd_trg_src_trg[i])
            eval_loss_mcd_trg_src[i] = np.mean(loss_mcd_trg_src[i])
            eval_loss_mcd_src_src[i] = np.mean(loss_mcd_src_src[i])
            eval_loss_mcd_src_trg_src[i] = np.mean(loss_mcd_src_trg_src[i])
            eval_loss_mcd_src_trg[i] = np.mean(loss_mcd_src_trg[i])
            eval_loss_lat_src_cv[i] = np.mean(loss_lat_src_cv[i])
            eval_loss_lat_trg_cv[i] = np.mean(loss_lat_trg_cv[i])
            eval_loss_lat_src[i] = np.mean(loss_lat_src[i])
            eval_loss_lat_trg[i] = np.mean(loss_lat_trg[i])
            eval_gv_trg_trg[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_trg_trg[i], axis=0))-np.log(gv_trg_mean))))
            eval_mcdpow_trg_trg[i] = np.mean(mcdpow_trg_trg[i])
            eval_mcd_trg_trg[i] = np.mean(mcd_trg_trg[i])
            eval_gv_trg_src_trg[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_trg_src_trg[i], axis=0))-np.log(gv_trg_mean))))
            eval_mcdpow_trg_src_trg[i] = np.mean(mcdpow_trg_src_trg[i])
            eval_mcd_trg_src_trg[i] = np.mean(mcd_trg_src_trg[i])
            eval_gv_trg_src[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_trg_src[i], axis=0))-np.log(gv_src_mean))))
            eval_mcdpow_trg_src[i] = np.mean(mcdpow_trg_src[i])
            eval_mcdpowstd_trg_src[i] = np.std(mcdpow_trg_src[i])
            eval_mcd_trg_src[i] = np.mean(mcd_trg_src[i])
            eval_mcdstd_trg_src[i] = np.std(mcd_trg_src[i])
            eval_lat_dist_trgsrc1[i] = np.mean(lat_dist_trgsrc1[i])
            eval_lat_dist_trgsrc2[i] = np.mean(lat_dist_trgsrc2[i])
            eval_gv_src_src[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_src_src[i], axis=0))-np.log(gv_src_mean))))
            eval_mcdpow_src_src[i] = np.mean(mcdpow_src_src[i])
            eval_mcd_src_src[i] = np.mean(mcd_src_src[i])
            eval_gv_src_trg_src[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_src_trg_src[i], axis=0))-np.log(gv_src_mean))))
            eval_mcdpow_src_trg_src[i] = np.mean(mcdpow_src_trg_src[i])
            eval_mcd_src_trg_src[i] = np.mean(mcd_src_trg_src[i])
            eval_gv_src_trg[i] = np.mean(np.sqrt(np.square(np.log(np.mean(gv_src_trg[i], axis=0))-np.log(gv_trg_mean))))
            eval_mcdpow_src_trg[i] = np.mean(mcdpow_src_trg[i])
            eval_mcdpowstd_src_trg[i] = np.std(mcdpow_src_trg[i])
            eval_mcd_src_trg[i] = np.mean(mcd_src_trg[i])
            eval_mcdstd_src_trg[i] = np.std(mcd_src_trg[i])
            eval_lat_dist_srctrg1[i] = np.mean(lat_dist_srctrg1[i])
            eval_lat_dist_srctrg2[i] = np.mean(lat_dist_srctrg2[i])
        text_log = "%.3f ;; " % eval_loss
        for i in range(n_ev_cyc):
            text_log += "[%d] %.3f %.3f %.3f %.3f %.3f %.3f ; %.3f %.3f %.3f %.3f ; %.6f %.3f dB %.6f dB , %.3f %.3f dB %.3f dB , %.6f %.3f dB (+- %.3f) %.6f dB (+- %.3f) , %.6f %.6f ; %.6f %.3f dB %.6f dB , %.3f %.3f dB %.3f dB , %.6f %.3f dB (+- %.3f) %.6f dB (+- %.3f) , %.6f %.6f ;; " % (
                         i+1, eval_loss_mcd_trg_trg[i], eval_loss_mcd_trg_src_trg[i], eval_loss_mcd_trg_src[i],
                             eval_loss_mcd_src_src[i], eval_loss_mcd_src_trg_src[i], eval_loss_mcd_src_trg[i],
                                 eval_loss_lat_trg[i], eval_loss_lat_trg_cv[i], eval_loss_lat_src[i], eval_loss_lat_src_cv[i],
                                     eval_gv_trg_trg[i], eval_mcdpow_trg_trg[i], eval_mcd_trg_trg[i],
                                         eval_gv_trg_src_trg[i], eval_mcdpow_trg_src_trg[i], eval_mcd_trg_src_trg[i],
                                             eval_gv_trg_src[i], eval_mcdpow_trg_src[i], eval_mcdpowstd_trg_src[i], eval_mcd_trg_src[i], eval_mcdstd_trg_src[i],
                                                 eval_lat_dist_trgsrc1[i], eval_lat_dist_trgsrc2[i], eval_gv_src_src[i], eval_mcdpow_src_src[i], eval_mcd_src_src[i],
                                                     eval_gv_src_trg_src[i], eval_mcdpow_src_trg_src[i], eval_mcd_src_trg_src[i],
                                                         eval_gv_src_trg[i], eval_mcdpow_src_trg[i], eval_mcdpowstd_src_trg[i], eval_mcd_src_trg[i], eval_mcdstd_src_trg[i], eval_lat_dist_srctrg1[i], eval_lat_dist_srctrg2[i])
        print("(EPOCH:%d) average evaluation loss = %s  (%.3f min., %.3f sec / batch)" % (epoch_idx + 1, text_log, np.sum(total) / 60.0, np.mean(total)))
        if (eval_mcdpow_src_trg[0]+eval_mcdpowstd_src_trg[0]+eval_mcd_src_trg[0]+eval_mcdstd_src_trg[0]) <= (min_eval_mcdpow_src_trg+min_eval_mcdpowstd_src_trg+min_eval_mcd_src_trg+min_eval_mcdstd_src_trg):
            min_eval_loss_mcd_trg_trg = eval_loss_mcd_trg_trg[0]
            min_eval_loss_mcd_trg_src_trg = eval_loss_mcd_trg_src_trg[0]
            min_eval_loss_mcd_trg_src = eval_loss_mcd_trg_src[0]
            min_eval_loss_mcd_src_src = eval_loss_mcd_src_src[0]
            min_eval_loss_mcd_src_trg_src = eval_loss_mcd_src_trg_src[0]
            min_eval_loss_mcd_src_trg = eval_loss_mcd_src_trg[0]
            min_eval_loss_lat_src = eval_loss_lat_src[0]
            min_eval_loss_lat_trg = eval_loss_lat_trg[0]
            min_eval_loss_lat_src_cv = eval_loss_lat_src_cv[0]
            min_eval_loss_lat_trg_cv = eval_loss_lat_trg_cv[0]
            min_eval_gv_trg_trg = eval_gv_trg_trg[0]
            min_eval_mcdpow_trg_trg = eval_mcdpow_trg_trg[0]
            min_eval_mcd_trg_trg = eval_mcd_trg_trg[0]
            min_eval_gv_trg_src_trg = eval_gv_trg_src_trg[0]
            min_eval_mcdpow_trg_src_trg = eval_mcdpow_trg_src_trg[0]
            min_eval_mcd_trg_src_trg = eval_mcd_trg_src_trg[0]
            min_eval_gv_trg_src = eval_gv_trg_src[0]
            min_eval_mcdpow_trg_src = eval_mcdpow_trg_src[0]
            min_eval_mcdpowstd_trg_src = eval_mcdpowstd_trg_src[0]
            min_eval_mcd_trg_src = eval_mcd_trg_src[0]
            min_eval_mcdstd_trg_src = eval_mcdstd_trg_src[0]
            min_eval_lat_dist_trgsrc1 = eval_lat_dist_trgsrc1[0]
            min_eval_lat_dist_trgsrc2 = eval_lat_dist_trgsrc2[0]
            min_eval_gv_src_src = eval_gv_src_src[0]
            min_eval_mcdpow_src_src = eval_mcdpow_src_src[0]
            min_eval_mcd_src_src = eval_mcd_src_src[0]
            min_eval_gv_src_trg_src = eval_gv_src_trg_src[0]
            min_eval_mcdpow_src_trg_src = eval_mcdpow_src_trg_src[0]
            min_eval_mcd_src_trg_src = eval_mcd_src_trg_src[0]
            min_eval_gv_src_trg = eval_gv_src_trg[0]
            min_eval_mcdpow_src_trg = eval_mcdpow_src_trg[0]
            min_eval_mcdpowstd_src_trg = eval_mcdpowstd_src_trg[0]
            min_eval_mcd_src_trg = eval_mcd_src_trg[0]
            min_eval_mcdstd_src_trg = eval_mcdstd_src_trg[0]
            min_eval_lat_dist_srctrg1 = eval_lat_dist_srctrg1[0]
            min_eval_lat_dist_srctrg2 = eval_lat_dist_srctrg2[0]
            min_idx = epoch_idx
        text_log = "%.3f %.3f %.3f %.3f %.3f %.3f ; %.3f %.3f %.3f %.3f ; %.6f %.3f dB %.6f dB , %.3f %.3f dB %.3f dB , %.6f %.3f dB (+- %.3f) %.6f dB (+- %.3f) , %.6f %.6f ; %.6f %.3f dB %.6f dB , %.3f %.3f dB %.3f dB , %.6f %.3f dB (+- %.3f) %.6f dB (+- %.3f) , %.6f %.6f ;; " % (
                     min_eval_loss_mcd_trg_trg, min_eval_loss_mcd_trg_src_trg, min_eval_loss_mcd_trg_src,
                         min_eval_loss_mcd_src_src, min_eval_loss_mcd_src_trg_src, min_eval_loss_mcd_src_trg,
                             min_eval_loss_lat_trg, min_eval_loss_lat_trg_cv, min_eval_loss_lat_src, min_eval_loss_lat_src_cv,
                                 min_eval_gv_trg_trg, min_eval_mcdpow_trg_trg, min_eval_mcd_trg_trg,
                                     min_eval_gv_trg_src_trg, min_eval_mcdpow_trg_src_trg, min_eval_mcd_trg_src_trg,
                                         min_eval_gv_trg_src, min_eval_mcdpow_trg_src, min_eval_mcdpowstd_trg_src, min_eval_mcd_trg_src, min_eval_mcdstd_trg_src,
                                             min_eval_lat_dist_trgsrc1, min_eval_lat_dist_trgsrc2, min_eval_gv_src_src, min_eval_mcdpow_src_src, min_eval_mcd_src_src,
                                                 min_eval_gv_src_trg_src, min_eval_mcdpow_src_trg_src, min_eval_mcd_src_trg_src,
                                                     min_eval_gv_src_trg, min_eval_mcdpow_src_trg, min_eval_mcdpowstd_src_trg, min_eval_mcd_src_trg, min_eval_mcdstd_src_trg, min_eval_lat_dist_srctrg1, min_eval_lat_dist_srctrg2)
        print("min_eval_acc= %s min_idx=%d" % (text_log, min_idx+1))
        loss = []
        loss_mcd_trg_trg = []
        loss_mcd_trg_src_trg = []
        loss_mcd_trg_src = []
        loss_mcd_src_src = []
        loss_mcd_src_trg_src = []
        loss_mcd_src_trg = []
        loss_lat_src = []
        loss_lat_trg = []
        loss_lat_src_cv = []
        loss_lat_trg_cv = []
        gv_trg_trg = []
        mcdpow_trg_trg = []
        mcd_trg_trg = []
        gv_trg_src_trg = []
        mcdpow_trg_src_trg = []
        mcd_trg_src_trg = []
        gv_trg_src = []
        mcdpow_trg_src = []
        mcd_trg_src = []
        lat_dist_trgsrc1 = []
        lat_dist_trgsrc2 = []
        gv_src_src = []
        mcdpow_src_src = []
        mcd_src_src = []
        gv_src_trg_src = []
        mcdpow_src_trg_src = []
        mcd_src_trg_src = []
        gv_src_trg = []
        mcdpow_src_trg = []
        mcd_src_trg = []
        lat_dist_srctrg1 = []
        lat_dist_srctrg2 = []
        for i in range(n_cyc):
            loss_mcd_trg_trg.append([])
            loss_mcd_trg_src_trg.append([])
            loss_mcd_trg_src.append([])
            loss_mcd_src_src.append([])
            loss_mcd_src_trg_src.append([])
            loss_mcd_src_trg.append([])
            loss_lat_src.append([])
            loss_lat_trg.append([])
            loss_lat_src_cv.append([])
            loss_lat_trg_cv.append([])
            gv_trg_trg.append([])
            mcdpow_trg_trg.append([])
            mcd_trg_trg.append([])
            gv_trg_src_trg.append([])
            mcdpow_trg_src_trg.append([])
            mcd_trg_src_trg.append([])
            gv_trg_src.append([])
            mcdpow_trg_src.append([])
            mcd_trg_src.append([])
            lat_dist_trgsrc1.append([])
            lat_dist_trgsrc2.append([])
            gv_src_src.append([])
            mcdpow_src_src.append([])
            mcd_src_src.append([])
            gv_src_trg_src.append([])
            mcdpow_src_trg_src.append([])
            mcd_src_trg_src.append([])
            gv_src_trg.append([])
            mcdpow_src_trg.append([])
            mcd_src_trg.append([])
            lat_dist_srctrg1.append([])
            lat_dist_srctrg2.append([])
        total = []
        iter_count = 0 
        epoch_idx += 1

        np.random.set_state(numpy_random_state)
        torch.set_rng_state(torch_random_state)
        model_encoder.train()
        model_decoder.train()
        for param in model_encoder.parameters():
            param.requires_grad = True
        for param in model_decoder.parameters():
            param.requires_grad = True
        for param in model_encoder.scale_in.parameters():
            param.requires_grad = False
        for param in model_decoder.scale_out.parameters():
            param.requires_grad = False


        # start next epoch
        if epoch_idx < epoch_count:
            start = time.time()
            print("==%d EPOCH==" % (epoch_idx+1))
            print("Training data")

    # feedforward and backpropagate current batch
    if epoch_idx < epoch_count:
        print("######### %d iteration [%d] ##########" % (iter_idx+1, epoch_idx+1))

        if batch_size > 0: # frame-length mini-batch
            for i in range(n_batch_utt):
                print(f"674:ソース話者　ターゲット話者　ソのフレーム長　タのフレーム長　整後のソのフレーム長　整後のタのフレーム長　ミニバッチの開始F 終了F　有声Fの開始インデックス　終了インデックス　有声Fの開始インデックス内のフレーム番号　終了インデックス内のフレーム番号")
                print("%s %s %d %d %d %d %d %d %d %d %d %d" % (
                    featfile_src[i], featfile_src_trg[i], flens_src[i], flens_src_trg[i], flens_spc_src[i], flens_spc_src_trg[i],
                        src_idx_s, src_idx_e, spcidx_src_s_idx[i], spcidx_src_e_idx[i], spcidx_src[i,spcidx_src_s_idx[i]].item(), spcidx_src[i,spcidx_src_e_idx[i]].item()))
            print(f"1255:prev_featfile_src:{prev_featfile_src}")
            if src_idx_s > 0 and prev_featfile_src == featfile_src and iter_count > 0:
                print(f"680:80フレーム以降のバッチの順伝搬")
                for i in range(n_cyc):
                    if i > 0:
                        batch_lat_src[i], y_in_pp_src[i], h_in_pp_src[i] = model_encoder(torch.cat((batch_src[:,src_idx_s:src_idx_e+1,:stdim], batch_trj_src_trg_src[i-1]),2), Variable(y_in_pp_src[i].data).detach(), h_in=Variable(h_in_pp_src[i].data).detach(), do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_src[i], y_in_src_src[i], h_in_src_src[i] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src[i], lat_dim=lat_dim, training=True)),2), Variable(y_in_src_src[i].data).detach(), h_in=Variable(h_in_src_src[i].data).detach(), do=True)
                        batch_trj_src_trg[i], y_in_src_trg[i], h_in_src_trg[i] = model_decoder(torch.cat((batch_src_trg_code, sampling_vae_batch(batch_lat_src[i], lat_dim=lat_dim, training=True)),2), Variable(y_in_src_trg[i].data).detach(), h_in=Variable(h_in_src_trg[i].data).detach(), do=True)
                        batch_lat_src_trg[i], y_in_pp_src_trg[i], h_in_pp_src_trg[i] = model_encoder(torch.cat((batch_cv_src[:,src_idx_s:src_idx_e+1], batch_trj_src_trg[i]),2), Variable(y_in_pp_src_trg[i].data).detach(), h_in=Variable(h_in_pp_src_trg[i].data).detach(), do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_trg_src[i], y_in_src_trg_src[i], h_in_src_trg_src[i] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src_trg[i], lat_dim=lat_dim, training=True)),2), Variable(y_in_src_trg_src[i].data).detach(), h_in=Variable(h_in_src_trg_src[i].data).detach(), do=True)
                    else:
                        batch_lat_src[0], y_in_pp_src[0], h_in_pp_src[0] = model_encoder(batch_src[:,src_idx_s:src_idx_e+1], Variable(y_in_pp_src[0].data).detach(), h_in=Variable(h_in_pp_src[0].data).detach(), do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_src[0], y_in_src_src[0], h_in_src_src[0] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src[0], lat_dim=lat_dim, training=True)),2), Variable(y_in_src_src[0].data).detach(), h_in=Variable(h_in_src_src[0].data).detach(), do=True)
                        batch_trj_src_trg[0], y_in_src_trg[0], h_in_src_trg[0] = model_decoder(torch.cat((batch_src_trg_code, sampling_vae_batch(batch_lat_src[0], lat_dim=lat_dim, training=True)),2), Variable(y_in_src_trg[0].data).detach(), h_in=Variable(h_in_src_trg[0].data).detach(), do=True)
                        batch_lat_src_trg[0], y_in_pp_src_trg[0], h_in_pp_src_trg[0] = model_encoder(torch.cat((batch_cv_src[:,src_idx_s:src_idx_e+1], batch_trj_src_trg[0]),2), Variable(y_in_pp_src_trg[0].data).detach(), h_in=Variable(h_in_pp_src_trg[0].data).detach(), do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_trg_src[0], y_in_src_trg_src[0], h_in_src_trg_src[0] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src_trg[0], lat_dim=lat_dim, training=True)),2), Variable(y_in_src_trg_src[0].data).detach(), h_in=Variable(h_in_src_trg_src[0].data).detach(), do=True)
                        tmp_src_src = np.concatenate((tmp_src_src, batch_trj_src_src[0][:,:,1:].cpu().data.numpy()), axis=1)
                        tmp_src_trg = np.concatenate((tmp_src_trg, batch_trj_src_trg[0][:,:,1:].cpu().data.numpy()), axis=1)
                        tmp_src_trg_src = np.concatenate((tmp_src_trg_src, batch_trj_src_trg_src[0][:,:,1:].cpu().data.numpy()), axis=1)
                        trj_src_trg = torch.cat((trj_src_trg, batch_trj_src_trg[0]),1)
                        trj_lat_src = torch.cat((trj_lat_src, batch_lat_src[0]),1)
            else:
                print(f"700:0フレーム目のバッチの順伝搬")
                if n_batch_utt == batch_size_utt:
                    print(f"1279:batch_size_utt={batch_size_utt}")
                    y_in_pp_ = y_in_pp
                    y_in_src_ = y_in_src
                    y_in_trg_ = y_in_trg
                else:
                    y_in_pp_ = y_in_pp_mod
                    y_in_src_ = y_in_src_mod
                    y_in_trg_ = y_in_trg_mod
                for i in range(n_cyc):
                    if i > 0:
                        batch_lat_src[i], y_in_pp_src[i], h_in_pp_src[i] = model_encoder(torch.cat((batch_src[:,src_idx_s:src_idx_e+1,:stdim], batch_trj_src_trg_src[i-1]),2), y_in_pp_, do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_src[i], y_in_src_src[i], h_in_src_src[i] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src[i], lat_dim=lat_dim, training=True)),2), y_in_src_, do=True)
                        batch_trj_src_trg[i], y_in_src_trg[i], h_in_src_trg[i] = model_decoder(torch.cat((batch_src_trg_code, sampling_vae_batch(batch_lat_src[i], lat_dim=lat_dim, training=True)),2), y_in_trg_, do=True)
                        batch_lat_src_trg[i], y_in_pp_src_trg[i], h_in_pp_src_trg[i] = model_encoder(torch.cat((batch_cv_src[:,src_idx_s:src_idx_e+1], batch_trj_src_trg[i]),2), y_in_pp_, do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_trg_src[i], y_in_src_trg_src[i], h_in_src_trg_src[i] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src_trg[i], lat_dim=lat_dim, training=True)),2), y_in_src_, do=True)
                    else:
                        batch_lat_src[0], y_in_pp_src[0], h_in_pp_src[0] = model_encoder(batch_src[:,src_idx_s:src_idx_e+1], y_in_pp_, do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_src[0], y_in_src_src[0], h_in_src_src[0] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src[0], lat_dim=lat_dim, training=True)),2), y_in_src_, do=True)
                        batch_trj_src_trg[0], y_in_src_trg[0], h_in_src_trg[0] = model_decoder(torch.cat((batch_src_trg_code, sampling_vae_batch(batch_lat_src[0], lat_dim=lat_dim, training=True)),2), y_in_trg_, do=True)
                        batch_lat_src_trg[0], y_in_pp_src_trg[0], h_in_pp_src_trg[0] = model_encoder(torch.cat((batch_cv_src[:,src_idx_s:src_idx_e+1], batch_trj_src_trg[0]),2), y_in_pp_, do=True, clamp_vae=True, lat_dim=lat_dim)
                        batch_trj_src_trg_src[0], y_in_src_trg_src[0], h_in_src_trg_src[0] = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src_trg[0], lat_dim=lat_dim, training=True)),2), y_in_src_, do=True)
                        if iter_count > 0:
                            for j in range(n_batch_utt):
                                if os.path.basename(os.path.dirname(prev_featfile_src[j])) == spk_src:
                                    gv_src_src[i].append(np.var(tmp_src_src[j,:prev_flens_src[j]], axis=0))
                                    gv_src_trg[i].append(np.var(tmp_src_trg[j,:prev_flens_src[j]], axis=0))
                                    gv_src_trg_src[i].append(np.var(tmp_src_trg_src[j,:prev_flens_src[j]], axis=0))
                                else:
                                    gv_trg_trg[i].append(np.var(tmp_src_src[j,:prev_flens_src[j]], axis=0))
                                    gv_trg_src[i].append(np.var(tmp_src_trg[j,:prev_flens_src[j]], axis=0))
                                    gv_trg_src_trg[i].append(np.var(tmp_src_trg_src[j,:prev_flens_src[j]], axis=0))
                        tmp_src_src = batch_trj_src_src[0][:,:,1:].cpu().data.numpy()
                        tmp_src_trg = batch_trj_src_trg[0][:,:,1:].cpu().data.numpy()
                        tmp_src_trg_src = batch_trj_src_trg_src[0][:,:,1:].cpu().data.numpy()
                        trj_src_trg = batch_trj_src_trg[0]
                        trj_lat_src = batch_lat_src[0]
            prev_featfile_src = featfile_src

            if len(select_utt_idx) > 0:
                print(f"1310")
                for i in range(n_cyc):
                    batch_mcdpow_src_src[i] = []
                    batch_mcd_src_src[i] = []
                    batch_mcdpow_src_trg_src[i] = []
                    batch_mcd_src_trg_src[i] = []

                for i in range(n_cyc):
                    for k, j in enumerate(select_utt_idx):
                        print(f"750 :criterion_mcd")
                        src_idx_e_ = src_idx_s + flen_acc[j]
                        _, tmp_batch_loss_mcd_src_src, _ = criterion_mcd(batch_trj_src_src[i][j,:flen_acc[j]], batch_src[j,src_idx_s:src_idx_e_,stdim:], L2=False, GV=False)
                        _, tmp_batch_loss_mcd_src_trg_src, _ = criterion_mcd(batch_trj_src_trg_src[i][j,:flen_acc[j]], batch_src[j,src_idx_s:src_idx_e_,stdim:], L2=False, GV=False)
                        _, tmp_batch_loss_mcd_src_trg, _ = criterion_mcd(batch_trj_src_trg[i][j,:flen_acc[j]], batch_src[j,src_idx_s:src_idx_e_,stdim:], L2=False, GV=False)

                        tmp_batch_loss_lat_src = loss_vae(batch_lat_src[i][j,:flen_acc[j]], lat_dim=lat_dim)
                        tmp_batch_loss_lat_src_cv = loss_vae(batch_lat_src_trg[i][j,:flen_acc[j]], lat_dim=lat_dim)

                        if os.path.basename(os.path.dirname(featfile_src[j])) == spk_src:
                            #print(f"760:featfile_srcがソース話者なので mcdpow_src_srcをappend")
                            loss_mcd_src_src[i].append(tmp_batch_loss_mcd_src_src.item())
                            loss_mcd_src_trg_src[i].append(tmp_batch_loss_mcd_src_trg_src.item())
                            loss_mcd_src_trg[i].append(tmp_batch_loss_mcd_src_trg.item())

                            loss_lat_src_cv[i].append(tmp_batch_loss_lat_src_cv.item())
                            loss_lat_src[i].append(tmp_batch_loss_lat_src.item())
                        else:
                            #print(f"768:featfile_srcがターゲット話者なので mcdpow_trg_trgをappend")
                            loss_mcd_trg_trg[i].append(tmp_batch_loss_mcd_src_src.item())
                            loss_mcd_trg_src_trg[i].append(tmp_batch_loss_mcd_src_trg_src.item())
                            loss_mcd_trg_src[i].append(tmp_batch_loss_mcd_src_trg.item())

                            loss_lat_trg_cv[i].append(tmp_batch_loss_lat_src_cv.item())
                            loss_lat_trg[i].append(tmp_batch_loss_lat_src.item())

                        if k > 0:
                            batch_loss_mcd_src_src[i] = torch.cat((batch_loss_mcd_src_src[i], tmp_batch_loss_mcd_src_src.unsqueeze(0)))
                            batch_loss_mcd_src_trg_src[i] = torch.cat((batch_loss_mcd_src_trg_src[i], tmp_batch_loss_mcd_src_trg_src.unsqueeze(0)))
                            batch_loss_mcd_src_trg[i] = torch.cat((batch_loss_mcd_src_trg[i], tmp_batch_loss_mcd_src_trg.unsqueeze(0)))
                            batch_loss_lat_src[i] = torch.cat((batch_loss_lat_src[i], tmp_batch_loss_lat_src.unsqueeze(0)))
                            batch_loss_lat_src_cv[i] = torch.cat((batch_loss_lat_src[i], tmp_batch_loss_lat_src_cv.unsqueeze(0)))
                        else:
                            batch_loss_mcd_src_src[i] = tmp_batch_loss_mcd_src_src.unsqueeze(0)
                            batch_loss_mcd_src_trg_src[i] = tmp_batch_loss_mcd_src_trg_src.unsqueeze(0)
                            batch_loss_mcd_src_trg[i] = tmp_batch_loss_mcd_src_trg.unsqueeze(0)
                            batch_loss_lat_src[i] = tmp_batch_loss_lat_src.unsqueeze(0)
                            batch_loss_lat_src_cv[i] = tmp_batch_loss_lat_src_cv.unsqueeze(0)

                    if i > 0: 
                        if not half_cyc:
                            batch_loss += batch_loss_mcd_src_src[i].sum() + batch_loss_mcd_src_trg_src[i].sum() + batch_loss_lat_src[i].sum() + batch_loss_lat_src_cv[i].sum()
                            print(f"792:batch_loss累積和:{batch_loss}")
                        else:
                            batch_loss += batch_loss_mcd_src_src[i].sum() + batch_loss_lat_src[i].sum()
                    else:
                        if not half_cyc:
                            batch_loss = batch_loss_mcd_src_src[0].sum() + batch_loss_mcd_src_trg_src[0].sum() + batch_loss_lat_src[0].sum() + batch_loss_lat_src_cv[0].sum()
                            print(f"798:batch_loss:{batch_loss}")
                        else:
                            batch_loss = batch_loss_mcd_src_src[0].sum() + batch_loss_lat_src[0].sum()

                    batch_loss_mcd_src_src[i] = torch.mean(batch_loss_mcd_src_src[i])
                    batch_loss_mcd_src_trg_src[i] = torch.mean(batch_loss_mcd_src_trg_src[i])
                    batch_loss_mcd_src_trg[i] = torch.mean(batch_loss_mcd_src_trg[i])
                    batch_loss_lat_src[i] = torch.mean(batch_loss_lat_src[i])
                    batch_loss_lat_src_cv[i] = torch.mean(batch_loss_lat_src_cv[i])
                print(f"807: 勾配更新")
                optimizer.zero_grad()
                batch_loss.backward()
                optimizer.step()
                #print(f"811:gv_trg_trg:{gv_trg_trg}")
                loss.append(batch_loss.item())
                #print(f"損失（loss)：{loss}")

                print_mcd_flag = False
                for i in range(n_cyc):
                    batch_mcdpow_src_src[i] = []
                    batch_mcd_src_src[i] = []
                    batch_mcdpow_src_trg_src[i] = []
                    batch_mcd_src_trg_src[i] = []

                for j in select_utt_idx:
                    if spcidx_src_s_idx[j] >= 0:
                        print_mcd_flag = True
                        for i in range(n_cyc):
                            tmp_batch_mcdpow_src_src, _ = dtw.calc_mcd(np.array(torch.index_select(batch_src[j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1])[:,stdim:].cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_trj_src_src[i][j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1]-src_idx_s).cpu().data.numpy(), dtype=np.float64))
                            tmp_batch_mcd_src_src, _ = dtw.calc_mcd(np.array(torch.index_select(batch_src[j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1])[:,stdim_:].cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_trj_src_src[i][j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1]-src_idx_s)[:,1:].cpu().data.numpy(), dtype=np.float64))

                            tmp_batch_mcdpow_src_trg_src, _ = dtw.calc_mcd(np.array(torch.index_select(batch_src[j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1])[:,stdim:].cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_trj_src_trg_src[i][j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1]-src_idx_s).cpu().data.numpy(), dtype=np.float64))
                            tmp_batch_mcd_src_trg_src, _ = dtw.calc_mcd(np.array(torch.index_select(batch_src[j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1])[:,stdim_:].cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_trj_src_trg_src[i][j],0,spcidx_src[j,spcidx_src_s_idx[j]:spcidx_src_e_idx[j]+1]-src_idx_s)[:,1:].cpu().data.numpy(), dtype=np.float64))

                            batch_mcdpow_src_src[i].append(tmp_batch_mcdpow_src_src)
                            batch_mcd_src_src[i].append(tmp_batch_mcd_src_src)

                            batch_mcdpow_src_trg_src[i].append(tmp_batch_mcdpow_src_trg_src)
                            batch_mcd_src_trg_src[i].append(tmp_batch_mcd_src_trg_src)

                            if os.path.basename(os.path.dirname(featfile_src[j])) == spk_src:
                                print(f"839: mcdpow_src_srcをappend")
                                mcdpow_src_src[i].append(tmp_batch_mcdpow_src_src)
                                mcd_src_src[i].append(tmp_batch_mcd_src_src)

                                mcdpow_src_trg_src[i].append(tmp_batch_mcdpow_src_trg_src)
                                mcd_src_trg_src[i].append(tmp_batch_mcd_src_trg_src)
                            else:
                                print(f"846: mcdpow_trg_trgをappend")
                                mcdpow_trg_trg[i].append(tmp_batch_mcdpow_src_src)
                                mcd_trg_trg[i].append(tmp_batch_mcd_src_src)

                                mcdpow_trg_src_trg[i].append(tmp_batch_mcdpow_src_trg_src)
                                mcd_trg_src_trg[i].append(tmp_batch_mcd_src_trg_src)

                text_log = "%.3f ;; " % batch_loss.item()
                if print_mcd_flag:
                    for i in range(n_cyc):
                        text_log += "[%d] %.3f %.3f %.3f ; %.3f %.3f ; %.3f dB %.3f dB , %.3f dB %.3f dB ;; " % (
                                     i+1, batch_loss_mcd_src_src[i].item(), batch_loss_mcd_src_trg_src[i].item(), batch_loss_mcd_src_trg[i].item(),
                                         batch_loss_lat_src[i].item(), batch_loss_lat_src_cv[i].item(),
                                             np.mean(batch_mcdpow_src_src[i]), np.mean(batch_mcd_src_src[i]), np.mean(batch_mcdpow_src_trg_src[i]), np.mean(batch_mcd_src_trg_src[i]))
                else:
                    for i in range(n_cyc):
                        text_log += "[%d] %.3f %.3f %.3f ; %.3f %.3f ;; " % (
                                     i+1, batch_loss_mcd_src_src[i].item(), batch_loss_mcd_src_trg_src[i].item(), batch_loss_mcd_src_trg[i].item(),
                                         batch_loss_lat_src[i].item(), batch_loss_lat_src_cv[i].item())
                print("865:batch loss [%d] = %s  (%.3f sec)" % (c_idx_src+1, text_log, time.time() - start))
                #print(f"batch_loss_mcd_src_src[i]:{batch_loss_mcd_src_src[0]},")
                iter_idx += 1
                iter_count += 1
            total.append(time.time() - start)
            print(f"870: ループ終端")

        else: # utterance-length mini-batch
            for i in range(n_batch_utt):
                print("%s %s %d %d %d %d" % (featfile_src[i], featfile_src_trg[i], flens_src[i], flens_src_trg[i], flens_spc_src[i], flens_spc_src_trg[i]))

            if n_batch_utt == batch_size_utt:
                y_in_pp_ = y_in_pp
                y_in_trg_ = y_in_trg
                y_in_src_ = y_in_src
            else:
                y_in_pp_ = y_in_pp_mod
                y_in_trg_ = y_in_trg_mod
                y_in_src_ = y_in_src_mod

            with torch.no_grad():
                trj_lat_srctrg, _, _ = model_encoder(batch_src_trg, y_in_pp_, clamp_vae=True, lat_dim=lat_dim)
            for i in range(n_cyc):
                batch_mcdpow_src_src[i] = []
                batch_mcd_src_src[i] = []
                batch_mcdpow_src_trg_src[i] = []
                batch_mcd_src_trg_src[i] = []

                if i > 0:
                    batch_lat_src[i], _, _ = model_encoder(torch.cat((batch_src[:,:,:stdim], batch_trj_src_trg_src[i-1]),2), y_in_pp_, clamp_vae=True, lat_dim=lat_dim, do=True)
                    batch_trj_src_src[i], _, _ = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src[i], lat_dim=lat_dim)),2), y_in_src_, do=True)
                    batch_trj_src_trg[i], _, _ = model_decoder(torch.cat((batch_src_trg_code, sampling_vae_batch(batch_lat_src[i], lat_dim=lat_dim)),2), y_in_trg_, do=True)

                    batch_lat_src_trg[i], _, _ = model_encoder(torch.cat((batch_cv_src, batch_trj_src_trg[i]),2), y_in_pp_, clamp_vae=True, lat_dim=lat_dim, do=True)
                    batch_trj_src_trg_src[i], _, _ = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src_trg[i], lat_dim=lat_dim)),2), y_in_src_, do=True)
                else:
                    batch_lat_src[0], _, _ = model_encoder(batch_src, y_in_pp_, clamp_vae=True, lat_dim=lat_dim, do=True)
                    batch_trj_src_src[0], _, _ = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src[0], lat_dim=lat_dim)),2), y_in_src_, do=True)
                    batch_trj_src_trg[0], _, _ = model_decoder(torch.cat((batch_src_trg_code, sampling_vae_batch(batch_lat_src[0], lat_dim=lat_dim)),2), y_in_trg_, do=True)

                    batch_lat_src_trg[0], _, _ = model_encoder(torch.cat((batch_cv_src, batch_trj_src_trg[0]),2), y_in_pp_, clamp_vae=True, lat_dim=lat_dim, do=True)
                    batch_trj_src_trg_src[0], _, _ = model_decoder(torch.cat((batch_src_src_code, sampling_vae_batch(batch_lat_src_trg[0], lat_dim=lat_dim)),2), y_in_src_, do=True)

                    batch_mcdpow_src_trg[i] = []
                    batch_mcd_src_trg[i] = []
                    batch_lat_dist_srctrg1[i] = []
                    batch_lat_dist_srctrg2[i] = []
                    for j in range(n_batch_utt):
                        if os.path.basename(os.path.dirname(featfile_src[j])) == spk_src:
                            gv_src_src[i].append(np.var(batch_trj_src_src[i][j,:flens_src[j],1:].cpu().data.numpy(), axis=0))
                            gv_src_trg[i].append(np.var(batch_trj_src_trg[i][j,:flens_src[j],1:].cpu().data.numpy(), axis=0))
                            gv_src_trg_src[i].append(np.var(batch_trj_src_trg_src[i][j,:flens_src[j],1:].cpu().data.numpy(), axis=0))
                        else:
                            gv_trg_trg[i].append(np.var(batch_trj_src_src[i][j,:flens_src[j],1:].cpu().data.numpy(), axis=0))
                            gv_trg_src[i].append(np.var(batch_trj_src_trg[i][j,:flens_src[j],1:].cpu().data.numpy(), axis=0))
                            gv_trg_src_trg[i].append(np.var(batch_trj_src_trg_src[i][j,:flens_src[j],1:].cpu().data.numpy(), axis=0))

                        trj_lat_srctrg_ = np.array(torch.index_select(trj_lat_srctrg[j],0,spcidx_src_trg[j,:flens_spc_src_trg[j]]).cpu().data.numpy(), dtype=np.float64)
                        trj_lat_src_ = np.array(torch.index_select(batch_lat_src[0][j],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64)
                        aligned_lat_srctrg1, _, _, _ = dtw.dtw_org_to_trg(trj_lat_src_, trj_lat_srctrg_)
                        tmp_batch_lat_dist_srctrg1 = np.mean(np.sqrt(np.mean((aligned_lat_srctrg1-trj_lat_srctrg_)**2, axis=0)))
                        _, _, tmp_batch_lat_cdist_srctrg1, _ = dtw.dtw_org_to_trg(trj_lat_srctrg_, trj_lat_src_, mcd=0)
                        aligned_lat_srctrg2, _, _, _ = dtw.dtw_org_to_trg(trj_lat_srctrg_, trj_lat_src_)
                        tmp_batch_lat_dist_srctrg2 = np.mean(np.sqrt(np.mean((aligned_lat_srctrg2-trj_lat_src_)**2, axis=0)))
                        _, _, tmp_batch_lat_cdist_srctrg2, _ = dtw.dtw_org_to_trg(trj_lat_src_, trj_lat_srctrg_, mcd=0)

                        tmp_batch_lat_dist_srctrg1 = (tmp_batch_lat_dist_srctrg1+tmp_batch_lat_dist_srctrg2)/2
                        tmp_batch_lat_dist_srctrg2 = (tmp_batch_lat_cdist_srctrg1+tmp_batch_lat_cdist_srctrg2)/2

                        batch_lat_dist_srctrg1[0].append(tmp_batch_lat_dist_srctrg1)
                        batch_lat_dist_srctrg2[0].append(tmp_batch_lat_dist_srctrg2)

                        _, _, tmp_batch_mcdpow_src_trg, _ = dtw.dtw_org_to_trg(np.array(torch.index_select(batch_trj_src_trg[i][j],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_src_trg[j,:,stdim:],0,spcidx_src_trg[j,:flens_spc_src_trg[j]]).cpu().data.numpy(), dtype=np.float64))
                        _, _, tmp_batch_mcd_src_trg, _ = dtw.dtw_org_to_trg(np.array(torch.index_select(batch_trj_src_trg[i][j,:,1:],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64), np.array(torch.index_select(batch_src_trg[j,:,stdim_:],0,spcidx_src_trg[j,:flens_spc_src_trg[j]]).cpu().data.numpy(), dtype=np.float64))

                        batch_mcdpow_src_trg[0].append(tmp_batch_mcdpow_src_trg)
                        batch_mcd_src_trg[0].append(tmp_batch_mcd_src_trg)

                        text_log = "%s %s = %.3f dB %.3f dB , %.3f %.3f" % (
                                featfile_src[j], featfile_src_trg[j], tmp_batch_mcdpow_src_trg, tmp_batch_mcd_src_trg, tmp_batch_lat_dist_srctrg1, tmp_batch_lat_dist_srctrg2)
                        if os.path.basename(os.path.dirname(featfile_src[j])) == spk_src:
                            mcdpow_src_trg[i].append(tmp_batch_mcdpow_src_trg)
                            mcd_src_trg[i].append(tmp_batch_mcd_src_trg)
                            lat_dist_srctrg1[0].append(tmp_batch_lat_dist_srctrg1)
                            lat_dist_srctrg2[0].append(tmp_batch_lat_dist_srctrg2)
                            print("batch srctrg loss %s " % (text_log))
                        else:
                            mcdpow_trg_src[i].append(tmp_batch_mcdpow_src_trg)
                            mcd_trg_src[i].append(tmp_batch_mcd_src_trg)
                            lat_dist_trgsrc1[0].append(tmp_batch_lat_dist_srctrg1)
                            lat_dist_trgsrc2[0].append(tmp_batch_lat_dist_srctrg2)
                            print("batch trgsrc loss %s " % (text_log))

                    batch_mcdpow_src_trg[i] = np.mean(batch_mcdpow_src_trg[i])
                    batch_mcd_src_trg[i] = np.mean(batch_mcd_src_trg[i])
                    batch_lat_dist_srctrg1[i] = np.mean(batch_lat_dist_srctrg1[i])
                    batch_lat_dist_srctrg2[i] = np.mean(batch_lat_dist_srctrg2[i])

                for j in range(n_batch_utt):
                    _, tmp_batch_loss_mcd_src_src, _ = criterion_mcd(batch_trj_src_src[i][j,:flens_src[j]], batch_src[j,:flens_src[j],stdim:], L2=False, GV=False)
                    _, tmp_batch_loss_mcd_src_trg, _ = criterion_mcd(batch_trj_src_trg[i][j,:flens_src[j]], batch_src[j,:flens_src[j],stdim:], L2=False, GV=False)
                    _, tmp_batch_loss_mcd_src_trg_src, _ = criterion_mcd(batch_trj_src_trg_src[i][j,:flens_src[j]], batch_src[j,:flens_src[j],stdim:], L2=False, GV=False)

                    tmp_batch_loss_lat_src = loss_vae(batch_lat_src[i][j,:flens_src[j]], lat_dim=lat_dim)
                    tmp_batch_loss_lat_src_cv = loss_vae(batch_lat_src_trg[i][j,:flens_src[j]], lat_dim=lat_dim)

                    batch_src_spc_ = np.array(torch.index_select(batch_src[j,:,stdim:],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64)
                    batch_src_spc__ = np.array(torch.index_select(batch_src[j,:,stdim_:],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64)

                    tmp_batch_mcdpow_src_src, _ = dtw.calc_mcd(batch_src_spc_, np.array(torch.index_select(batch_trj_src_src[i][j],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64))
                    tmp_batch_mcd_src_src, _ = dtw.calc_mcd(batch_src_spc__, np.array(torch.index_select(batch_trj_src_src[i][j,:,1:],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64))

                    tmp_batch_mcdpow_src_trg_src, _ = dtw.calc_mcd(batch_src_spc_, np.array(torch.index_select(batch_trj_src_trg_src[i][j],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64))
                    tmp_batch_mcd_src_trg_src, _ = dtw.calc_mcd(batch_src_spc__, np.array(torch.index_select(batch_trj_src_trg_src[i][j,:,1:],0,spcidx_src[j,:flens_spc_src[j]]).cpu().data.numpy(), dtype=np.float64))

                    if j > 0:
                        batch_loss_mcd_src_src[i] = torch.cat((batch_loss_mcd_src_src[i], tmp_batch_loss_mcd_src_src.unsqueeze(0)))
                        batch_loss_mcd_src_trg[i] = torch.cat((batch_loss_mcd_src_trg[i], tmp_batch_loss_mcd_src_trg.unsqueeze(0)))
                        batch_loss_mcd_src_trg_src[i] = torch.cat((batch_loss_mcd_src_trg_src[i], tmp_batch_loss_mcd_src_trg_src.unsqueeze(0)))

                        batch_loss_lat_src[i] = torch.cat((batch_loss_lat_src[i], tmp_batch_loss_lat_src.unsqueeze(0)))
                        batch_loss_lat_src_cv[i] = torch.cat((batch_loss_lat_src_cv[i], tmp_batch_loss_lat_src_cv.unsqueeze(0)))
                    else:
                        batch_loss_mcd_src_src[i] = tmp_batch_loss_mcd_src_src.unsqueeze(0)
                        batch_loss_mcd_src_trg[i] = tmp_batch_loss_mcd_src_trg.unsqueeze(0)
                        batch_loss_mcd_src_trg_src[i] = tmp_batch_loss_mcd_src_trg_src.unsqueeze(0)

                        batch_loss_lat_src[i] = tmp_batch_loss_lat_src.unsqueeze(0)
                        batch_loss_lat_src_cv[i] = tmp_batch_loss_lat_src_cv.unsqueeze(0)

                    if os.path.basename(os.path.dirname(featfile_src[j])) == spk_src:
                        mcdpow_src_src[i].append(tmp_batch_mcdpow_src_src)
                        mcd_src_src[i].append(tmp_batch_mcd_src_src)
                        mcdpow_src_trg_src[i].append(tmp_batch_mcdpow_src_trg_src)
                        mcd_src_trg_src[i].append(tmp_batch_mcd_src_trg_src)

                        loss_mcd_src_src[i].append(tmp_batch_loss_mcd_src_src.item())
                        loss_mcd_src_trg[i].append(tmp_batch_loss_mcd_src_trg.item())
                        loss_mcd_src_trg_src[i].append(tmp_batch_loss_mcd_src_trg_src.item())

                        loss_lat_src[i].append(tmp_batch_loss_lat_src.item())
                        loss_lat_src_cv[i].append(tmp_batch_loss_lat_src_cv.item())
                    else:
                        mcdpow_trg_trg[i].append(tmp_batch_mcdpow_src_src)
                        mcd_trg_trg[i].append(tmp_batch_mcd_src_src)
                        mcdpow_trg_src_trg[i].append(tmp_batch_mcdpow_src_trg_src)
                        mcd_trg_src_trg[i].append(tmp_batch_mcd_src_trg_src)

                        loss_mcd_trg_trg[i].append(tmp_batch_loss_mcd_src_src.item())
                        loss_mcd_trg_src[i].append(tmp_batch_loss_mcd_src_trg.item())
                        loss_mcd_trg_src_trg[i].append(tmp_batch_loss_mcd_src_trg_src.item())

                        loss_lat_trg[i].append(tmp_batch_loss_lat_src.item())
                        loss_lat_trg_cv[i].append(tmp_batch_loss_lat_src_cv.item())

                    batch_mcdpow_src_src[i].append(tmp_batch_mcdpow_src_src)
                    batch_mcd_src_src[i].append(tmp_batch_mcd_src_src)
                    batch_mcdpow_src_trg_src[i].append(tmp_batch_mcdpow_src_trg_src)
                    batch_mcd_src_trg_src[i].append(tmp_batch_mcd_src_trg_src)

                batch_mcdpow_src_src[i] = np.mean(batch_mcdpow_src_src[i])
                batch_mcd_src_src[i] = np.mean(batch_mcd_src_src[i])
                batch_mcdpow_src_trg_src[i] = np.mean(batch_mcdpow_src_trg_src[i])
                batch_mcd_src_trg_src[i] = np.mean(batch_mcd_src_trg_src[i])

                if i > 0: 
                    if not half_cyc:
                        batch_loss += batch_loss_mcd_src_src[i].sum() + batch_loss_mcd_src_trg_src[i].sum() + batch_loss_lat_src[i].sum() + batch_loss_lat_src_cv[i].sum()
                    else:
                        batch_loss += batch_loss_mcd_src_src[i].sum() + batch_loss_lat_src[i].sum()
                else:
                    if not half_cyc:
                        batch_loss = batch_loss_mcd_src_src[0].sum() + batch_loss_mcd_src_trg_src[0].sum() + batch_loss_lat_src[0].sum() + batch_loss_lat_src_cv[0].sum()
                    else:
                        batch_loss = batch_loss_mcd_src_src[0].sum() + batch_loss_lat_src[0].sum()

                batch_loss_mcd_src_src[i] = torch.mean(batch_loss_mcd_src_src[i])
                batch_loss_mcd_src_trg_src[i] = torch.mean(batch_loss_mcd_src_trg_src[i])
                batch_loss_mcd_src_trg[i] = torch.mean(batch_loss_mcd_src_trg[i])
                batch_loss_lat_src[i] = torch.mean(batch_loss_lat_src[i])
                batch_loss_lat_src_cv[i] = torch.mean(batch_loss_lat_src_cv[i])

            print(f"1609:勾配更新")
            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

            loss.append(batch_loss.item())

            text_log = "%.3f ;; " % batch_loss.item()
            for i in range(n_cyc):
                text_log += "[%d] %.3f %.3f %.3f ; %.3f %.3f ; %.3f dB %.3f dB , %.3f dB %.3f dB;; " % (
                             i+1, batch_loss_mcd_src_src[i].item(), batch_loss_mcd_src_trg_src[i].item(), batch_loss_mcd_src_trg[i].item(),
                                 batch_loss_lat_src[i].item(), batch_loss_lat_src_cv[i].item(), batch_mcdpow_src_src[i], batch_mcd_src_src[i],
                                     batch_mcdpow_src_trg_src[i], batch_mcd_src_trg_src[i])
            print("batch loss [%d] = %s  (%.3f sec)" % (c_idx_src+1, text_log, time.time() - start))
            iter_idx += 1
            iter_count += 1
            total.append(time.time() - start)

# save final model
model_encoder.cpu()
model_decoder.cpu()
torch.save({"model_encoder": model_encoder.state_dict(), "model_decoder": model_decoder.state_dict()}, expdir + "/checkpoint-final.pkl")
print("final checkpoint created.")

==1 EPOCH==
4:ループ先頭
21:1回目train_generator起動
train_generatorの処理を開始
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10075.h5']
hs_src.shape：torch.Size([1, 545, 54])
yield1を返却
開始フレーム:0
終了フレーム:79
64：c_idx_src : 0
######### 1 iteration [1] ##########
674:ソース話者　ターゲット話者　ソのフレーム長　タのフレーム長　整後のソのフレーム長　整後のタのフレーム長　ミニバッチの開始F 終了F　有声Fの開始インデックス　終了インデックス　有声Fの開始インデックス内のフレーム番号　終了インデックス内のフレーム番号
hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10075.h5 hdf5/trt50_VCC2TF1_VCC2SF1/VCC2SF1/10075.h5 545 553 484 459 0 79 0 65 14 79
1255:prev_featfile_src:['']
700:0フレーム目のバッチの順伝搬
1279:batch_size_utt=1
1310
750 :criterion_mcd
798:batch_loss:103.17999267578125
750 :criterion_mcd
792:batch_loss累積和:205.89678955078125
750 :criterion_mcd
792:batch_loss累積和:308.3981628417969
807: 勾配更新
846: mcdpow_trg_trgをappend
846: mcdpow_trg_trgをappend
846: mcdpow_trg_trgをappend
865:batch loss [1] = 308.398 ;; [1] 49.877 50.423 51.605 ; 2.375 0.506 ; 16.070 dB 10.330 dB , 16.775 dB 10.628 dB ;; [2] 51.196 50.596 50.754 ; 0.426 0.498 ; 17.519 dB 10.43

In [6]:
y_in_src.shape

torch.Size([1, 1, 50])

In [7]:
test=next(generator_src)


yield2を返却
featfiles_src:['hdf5/tr50_VCC2SF1_VCC2TF1/VCC2SF1/10007.h5']
開始フレーム:80
終了フレーム:159


In [8]:
print(test)

(tensor([[[ 0.0000e+00,  5.8651e+00, -8.6857e-12,  ...,  2.9362e-02,
          -2.0928e-02,  1.2867e-02],
         [ 0.0000e+00,  5.8652e+00, -8.6857e-12,  ..., -5.6030e-02,
           5.4229e-02, -4.9499e-02],
         [ 0.0000e+00,  5.8650e+00, -8.6857e-12,  ...,  1.3119e-04,
           9.8673e-03, -1.9781e-02],
         ...,
         [ 0.0000e+00,  5.4911e+00, -8.6857e-12,  ...,  5.5624e-03,
          -1.9808e-02,  2.7706e-02],
         [ 0.0000e+00,  5.4919e+00, -8.6857e-12,  ..., -3.9925e-02,
           4.9828e-02, -5.5701e-02],
         [ 0.0000e+00,  5.4917e+00, -8.6857e-12,  ...,  3.8200e-02,
          -3.0959e-02,  2.3699e-02]]], device='cuda:0'), tensor([[[1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1., 0.],
         [1.

In [9]:
print(f"train_generatorの処理を開始")
while True:
    c_idx = 0
    # process over all of files
    for idx, batch in enumerate(dataloader):
        flens = batch['flen_src'].data.numpy()
        max_flen = np.max(flens) ## get max frame length
        flens_spc_src = batch['flen_spc_src'].data.numpy()
        max_flen_spc_src = np.max(flens_spc_src) ## get max frame length
        flens_src_trg = batch['flen_src_trg'].data.numpy()
        max_flen_src_trg = np.max(flens_src_trg) ## get max frame length
        flens_spc_src_trg = batch['flen_spc_src_trg'].data.numpy()
        max_flen_spc_src_trg = np.max(flens_spc_src_trg) ## get max frame length
        hs_src = batch['h_src'][:,:max_flen].to(device)
        src_codes = batch['src_code'][:,:max_flen].to(device)
        trg_codes = batch['trg_code'][:,:max_flen].to(device)
        cvs_src = batch['cv_src'][:,:max_flen].to(device)
        spcidcs_src = batch['spcidx_src'][:,:max_flen_spc_src].to(device)
        hs_src_trg = batch['h_src_trg'][:,:max_flen_src_trg].to(device)
        spcidcs_src_trg = batch['spcidx_src_trg'][:,:max_flen_spc_src_trg].to(device)
        featfiles_src = batch['featfile_src']
        featfiles_src_trg = batch['featfile_src_trg']
        n_batch_utt = hs_src.size(0)
        #dataloaderから取り出したテンソルは、先頭次元がサンプル数になる。今回は1つの発話がバッチなので1になる。
        print(f"featfiles_src:{featfiles_src}")
        print(f"hs_src.shape：{hs_src.shape}")
        #print(f"n_batch_utt：{n_batch_utt}")

        # use mini batch
        if batch_size != 0:
            #特徴量のフレームを指定したbatch_sizeに分割

            src_idx_s = 0
            src_idx_e = batch_size-1 # 79


            spcidcs_src_s_idx = np.repeat(-1,n_batch_utt)
            spcidcs_src_e_idx = np.repeat(-1,n_batch_utt)

            s_flag = np.repeat(False,n_batch_utt)
            e_flag = np.repeat(True,n_batch_utt)
            flen_acc = np.repeat(batch_size,n_batch_utt)

            for j in range(n_batch_utt):
                for i in range(spcidcs_src_e_idx[j]+1,flens_spc_src[j]):
                    if not s_flag[j] and spcidcs_src[j,i] >= src_idx_s:
                        if spcidcs_src[j,i] > src_idx_e:
                            spcidcs_src_s_idx[j] = -1
                            break
                        spcidcs_src_s_idx[j] = i
                        s_flag[j] = True
                        e_flag[j] = False
                        if i == flens_spc_src[j]-1:
                            spcidcs_src_e_idx[j] = i
                            s_flag[j] = False
                            e_flag[j] = True
                            break
                    elif not e_flag[j] and (spcidcs_src[j,i] >= src_idx_e or i == flens_spc_src[j]-1):
                        if spcidcs_src[j,i] > src_idx_e:
                            spcidcs_src_e_idx[j] = i-1
                        else:
                            spcidcs_src_e_idx[j] = i
                        s_flag[j] = False
                        e_flag[j] = True
                        break
            select_utt_idx = [i for i in range(n_batch_utt)]
            print(f"yield1を返却")
            print(f"開始フレーム:{src_idx_s}")
            print(f"終了フレーム:{src_idx_e}")
            #0~79フレームを、80つにバッチ分割した特徴量を返却
            #yield hs_src, src_codes[:,src_idx_s:src_idx_e+1], trg_codes[:,src_idx_s:src_idx_e+1], hs_src_trg, cvs_src, src_idx_s, src_idx_e, spcidcs_src_s_idx, spcidcs_src_e_idx, c_idx, idx, spcidcs_src, spcidcs_src_trg, featfiles_src, featfiles_src_trg, flens, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt

            #80~159,160~ ... までの80つのバッチを繰り返し処理していく
            while src_idx_e < max_flen-1:
                src_idx_s = src_idx_e + 1
                src_idx_e = src_idx_s+batch_size-1
                if src_idx_e >= max_flen:
                    src_idx_e = max_flen-1
                select_utt_idx  = []
                for j in range(n_batch_utt):
                    if spcidcs_src_e_idx[j] < flens_spc_src[j]-1:
                        if src_idx_e >= flens[j]:
                            flen_acc[j] = flens[j]-src_idx_s
                        for i in range(spcidcs_src_e_idx[j]+1,flens_spc_src[j]):
                            if not s_flag[j] and spcidcs_src[j,i] >= src_idx_s:
                                if spcidcs_src[j,i] > src_idx_e:
                                    spcidcs_src_s_idx[j] = -1
                                    break
                                spcidcs_src_s_idx[j] = i
                                s_flag[j] = True
                                e_flag[j] = False
                                if i == flens_spc_src[j]-1:
                                    spcidcs_src_e_idx[j] = i
                                    s_flag[j] = False
                                    e_flag[j] = True
                                    break
                            elif not e_flag[j] and (spcidcs_src[j,i] >= src_idx_e or i == flens_spc_src[j]-1):
                                if spcidcs_src[j,i] > src_idx_e:
                                    spcidcs_src_e_idx[j] = i-1
                                else:
                                    spcidcs_src_e_idx[j] = i
                                s_flag[j] = False
                                e_flag[j] = True
                                break
                        select_utt_idx.append(j)

                print(f"yield2を返却")
                print(f"featfiles_src:{featfiles_src}")
                print(f"開始フレーム:{src_idx_s}")
                print(f"終了フレーム:{src_idx_e}")
                #yield hs_src, src_codes[:,src_idx_s:src_idx_e+1], trg_codes[:,src_idx_s:src_idx_e+1], hs_src_trg, cvs_src, src_idx_s, src_idx_e, spcidcs_src_s_idx, spcidcs_src_e_idx, c_idx, idx, spcidcs_src, spcidcs_src_trg, featfiles_src, featfiles_src_trg, flens, flens_src_trg, flens_spc_src, flens_spc_src_trg, select_utt_idx, flen_acc, n_batch_utt

        # use utterance batch
        else:
            #yield hs_src, src_codes, trg_codes, hs_src_trg, cvs_src, c_idx, idx, spcidcs_src, spcidcs_src_trg, featfiles_src, featfiles_src_trg, flens, flens_src_trg, flens_spc_src, flens_spc_src_trg, n_batch_utt
            print()
    print(f"すべての発話処理を完了")
    c_idx += 1
    if c_idx > 0:
    #if c_idx > 1:
    #if c_idx > 2:
        break
    #すべてのフレーム区間の出力が完了
    #エポックの終了条件
print(f"yield3を返却")

if batch_size > 0:
    #yield [], [], [], [], [], [], [], [], [], -1, -1, [], [], [], [], [], [], [], [], [], [], []
    print()
else:
    #yield [], [], [], [], [], -1, -1, [], [], [], [], [], [], [], [], []
    print()

train_generatorの処理を開始
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
hs_src.shape：torch.Size([1, 878, 54])
yield1を返却
開始フレーム:0
終了フレーム:79
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:80
終了フレーム:159
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:160
終了フレーム:239
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:240
終了フレーム:319
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:320
終了フレーム:399
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:400
終了フレーム:479
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:480
終了フレーム:559
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:560
終了フレーム:639
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:640
終了フレーム:719
yield2を返却
featfiles_src:['hdf5/tr50_VCC2TF1_VCC2SF1/VCC2TF1/10052.h5']
開始フレーム:720
終了フレーム:799
yield2を返却
feat