In [2]:
from functools import partial
from logging import Logger
from pathlib import Path

import hydra
import torch
from hydra.utils import to_absolute_path
from matplotlib import pyplot as plt
from omegaconf import DictConfig
from torch import nn
from tqdm import tqdm

In [3]:
# from ttslearn.contrib.multispk_util import collate_fn_ms_tacotron, setup
from utils.multispk_util import collate_fn_ms_tacotron, setup
from ttslearn.tacotron.frontend.openjtalk import sequence_to_text
from ttslearn.util import make_non_pad_mask
from ttslearn.train_util import (
    get_epochs_with_optional_tqdm,
    plot_2d_feats,
    plot_attention,
    save_checkpoint,
)

logger: Logger = None

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from pathlib import Path

import hydra
import numpy as np
import torch
from hydra.utils import to_absolute_path
from omegaconf import OmegaConf
from torch import nn, optim
from torch.utils import data as data_utils
from torch.utils.tensorboard import SummaryWriter
from ttslearn.logger import getLogger
from ttslearn.train_util import (
    ensure_divisible_by,
    num_trainable_params,
    set_epochs_based_on_max_steps_,
)
from ttslearn.util import init_seed, load_utt_list, pad_1d, pad_2d

### データ用意テスト

In [32]:
class Dataset(data_utils.Dataset): 

    def __init__(self, in_paths, out_paths, spk_paths):
        self.in_paths = in_paths
        self.out_paths = out_paths
        self.spk_paths = spk_paths

    def __getitem__(self, idx):
        spk_id = np.load(self.spk_paths[idx])
        return np.load(self.in_paths[idx]), np.load(self.out_paths[idx]), spk_id

    def __len__(self):
        return len(self.in_paths)

In [76]:
def collate_fn_ms_tacotron(batch, reduction_factor=1):
    xs = [x[0] for x in batch]
    ys = [ensure_divisible_by(x[1], reduction_factor) for x in batch]
    spk_ids = torch.tensor([int(x[2]) for x in batch], dtype=torch.long).view(-1, 1)
    in_lens = [len(x) for x in xs]
    out_lens = [len(y) for y in ys]
    in_max_len = max(in_lens)
    out_max_len = max(out_lens)
    x_batch = torch.stack([torch.from_numpy(pad_1d(x, in_max_len)) for x in xs])
    y_batch = torch.stack([torch.from_numpy(pad_2d(y, out_max_len)) for y in ys])
    il_batch = torch.tensor(in_lens, dtype=torch.long)
    ol_batch = torch.tensor(out_lens, dtype=torch.long)
    stop_flags = torch.zeros(y_batch.shape[0], y_batch.shape[1])
    for idx, out_len in enumerate(out_lens):
        stop_flags[idx, out_len - 1 :] = 1.0

    return x_batch, il_batch, y_batch, ol_batch, stop_flags, spk_ids

In [77]:
# def get_data_loaders(data_config, collate_fn):
def get_data_loaders(collate_fn):
    data_loaders = {}

    for phase in ["train", "dev"]:
        utt_ids = load_utt_list(to_absolute_path(f"data/{phase}.list"))
        in_dir = Path(to_absolute_path(f"dump/hfc_men_sr24000/norm/{phase}/in_tacotron"))
        out_dir = Path(to_absolute_path(f"dump/hfc_men_sr24000/norm/{phase}/out_tacotron"))

        in_feats_paths = [in_dir / f"{utt_id}-feats.npy" for utt_id in utt_ids]
        out_feats_paths = [out_dir / f"{utt_id}-feats.npy" for utt_id in utt_ids]
        # spk_id_paths = [in_dir / f"{utt_id}-spk.npy" for utt_id in utt_ids]
        spk_id_paths = [ "data/fine-spk.npy" for utt_id in utt_ids]

        dataset = Dataset(in_feats_paths, out_feats_paths, spk_id_paths) # <- spk_idを自身で実装するため、その処理の変更を学習処理で行う
        data_loaders[phase] = data_utils.DataLoader(
            dataset,
            batch_size=32,
            collate_fn=collate_fn,
            pin_memory=True,
            num_workers=4,
            shuffle=phase.startswith("train"),
        )

    return data_loaders

In [85]:
np.save(
    "data/fine-spk.npy",
    np.array([10], dtype=np.int64),
    allow_pickle=False,
)

collate_fn = partial(
        collate_fn_ms_tacotron, reduction_factor=2
    )
data_loaders = get_data_loaders(collate_fn)
data_loaders["train"].__len__()


467

In [63]:
# in_x = np.load("dump/hfc_men_sr24000/norm/train/in_tacotron/Seikatsu01_A-A__000010-feats.npy")
# out = np.load("dump/hfc_men_sr24000/norm/train/out_tacotron/Seikatsu01_A-A__000010-feats.npy")
# spk = np.load("data/fine-spk.npy")

In [35]:
def setup(config, device, collate_fn):
    """Setup for traiining

    Args:
        config (dict): configuration for training
        device (torch.device): device to use for training
        collate_fn (callable): function to collate mini-batches

    Returns:
        (tuple): tuple containing model, optimizer, learning rate scheduler,
            data loaders, tensorboard writer, and logger.
    """
    # NOTE: hydra は内部で stream logger を追加するので、二重に追加しないことに注意
    logger = getLogger(100, add_stream_handler=False)
    logger.info(f"PyTorch version: {torch.__version__}")
    logger.info(f"Random seed: {773}")
    init_seed(773)

    # モデルのインスタンス化
    model = hydra.utils.instantiate(config.model.netG).to(device)
    logger.info(model)
    logger.info(
        "Number of trainable params: {:.3f} million".format(
            num_trainable_params(model) / 1000000.0
        )
    )

    # (optional) 学習済みモデルの読み込み
    # ファインチューニングしたい場合
    pretrained_checkpoint = config.train.pretrained.checkpoint
    if pretrained_checkpoint is not None and len(pretrained_checkpoint) > 0:
        logger.info(
            "Fine-tuning! Loading a checkpoint: {}".format(pretrained_checkpoint)
        )
        checkpoint = torch.load(pretrained_checkpoint, map_location=device)
        state_dict = checkpoint["state_dict"]
        model_dict = model.state_dict()
        state_dict = {k: v for k, v in state_dict.items() if k in model_dict}
        invalid_keys = []
        for k, v in state_dict.items():
            if model_dict[k].shape != v.shape:
                logger.info(f"Skip loading {k}")
                invalid_keys.append(k)
        for k in invalid_keys:
            state_dict.pop(k)
        model_dict.update(state_dict)
        model.load_state_dict(model_dict)

    # 複数 GPU 対応
    if config.data_parallel:
        model = nn.DataParallel(model)

    # Optimizer
    optimizer_class = getattr(optim, config.train.optim.optimizer.name)
    optimizer = optimizer_class( model.parameters(), **config.train.optim.optimizer.params )

    # 学習率スケジューラ
    lr_scheduler_class = getattr( optim.lr_scheduler, config.train.optim.lr_scheduler.name )
    lr_scheduler = lr_scheduler_class( optimizer, **config.train.optim.lr_scheduler.params )

    # DataLoader
    data_loaders = get_data_loaders(config.data, collate_fn)

    set_epochs_based_on_max_steps_(config.train, len(data_loaders["train"]), logger)

    # Tensorboard の設定
    writer = SummaryWriter(to_absolute_path(config.train.log_dir))

    # config ファイルを保存しておく
    out_dir = Path(to_absolute_path(config.train.out_dir))
    out_dir.mkdir(parents=True, exist_ok=True)
    with open(out_dir / "model.yaml", "w") as f:
        OmegaConf.save(config.model, f)
    with open(out_dir / "config.yaml", "w") as f:
        OmegaConf.save(config, f)

    return model, optimizer, lr_scheduler, data_loaders, writer, logger

In [36]:
# @hydra.main(config_path="conf/train_tacotron", config_name="config")
# def my_app(config: DictConfig) -> None:
def my_app():
    global logger
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    collate_fn = partial(
        collate_fn_ms_tacotron, reduction_factor=2
    )
    model, optimizer, lr_scheduler, data_loaders, writer, logger = setup(
        device, collate_fn
    )
    # train_loop(config, device, model, optimizer, lr_scheduler, data_loaders, writer)



In [37]:
my_app()

NameError: name 'config' is not defined

#### Configテスト

In [117]:
import hydra
from hydra.utils import to_absolute_path
from hydra import compose, initialize
from hydra.core.global_hydra import GlobalHydra
from omegaconf import OmegaConf

def load_config():
    hydra_instance = GlobalHydra.instance()
    if not hydra_instance.is_initialized():
        hydra_instance.clear()
        initialize(config_path="conf/train_tacotron")
    
    cfg = compose(config_name="config")
    return cfg


In [119]:
config = load_config()
print(config.data.train.utt_list)

data/train.list


The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path="conf/train_tacotron")


### Speaker Idの取り扱い

In [25]:
np.save(
    "dump/test/test-spk.npy",
    np.array([10], dtype=np.int64),
    allow_pickle=False,
)

In [26]:
data = np.load("dump/test/test-spk.npy", allow_pickle=False)
print(data)

[10]


### バッチの確認

In [75]:
tensor1 = torch.tensor([[1], [2], [3]])
tensor2 = torch.tensor([[4], [5], [6]])
tuple1 = (tensor1, tensor2)
stack = torch.stack(tuple1, 0)
print(tuple1)
print(stack)

(tensor([[1],
        [2],
        [3]]), tensor([[4],
        [5],
        [6]]))
tensor([[[1],
         [2],
         [3]],

        [[4],
         [5],
         [6]]])


#### mask test

---

In [106]:
import torch
from utils.util import make_non_pad_mask, make_pad_mask

lengths = [5, 3, 4]
maxlen = 6
mask = make_pad_mask(lengths, maxlen).unsqueeze(-1)
non_mask = make_non_pad_mask(lengths, maxlen)

# 結果を出力
print(mask.shape)
print(mask)
print(non_mask)

torch.Size([3, 6, 1])
tensor([[[False],
         [False],
         [False],
         [False],
         [False],
         [ True]],

        [[False],
         [False],
         [False],
         [ True],
         [ True],
         [ True]],

        [[False],
         [False],
         [False],
         [False],
         [ True],
         [ True]]])
tensor([[ True,  True,  True,  True,  True, False],
        [ True,  True,  True, False, False, False],
        [ True,  True,  True,  True, False, False]])


In [108]:
# ダミーのテンソルを作成
tensor = torch.tensor([[1, 2, 3, 4, 5, 0], [4, 5, 6, 0, 0, 0], [7, 8, 9, 10, 0, 0]])
masked_tensor = tensor.masked_select(mask.squeeze(-1))
non_masked_tensor = tensor.masked_select(non_mask)
print(masked_tensor)
print(non_masked_tensor)

tensor([0, 0, 0, 0, 0, 0])
tensor([ 1,  2,  3,  4,  5,  4,  5,  6,  7,  8,  9, 10])


##### バッチ数の確認

In [5]:
utt_ids_train = load_utt_list(to_absolute_path("data/train.list"))
utt_ids_dev = load_utt_list(to_absolute_path("data/dev.list"))
print(len(utt_ids_train) / 32)
print(len(utt_ids_dev) / 32)
print(len(utt_ids_dev))

466.375
58.28125
1865


In [4]:

print((1000 / 32) * 4)
"31.25 per epoch"

125.0


'31.25 per epoch'

### test early stopping class

In [12]:
%load_ext autoreload
%autoreload 2
from utils.early_stopping import EarlyStopping
import random

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:

early_stopping = EarlyStopping(patience=5)
flag = False
for epoch in range(10):
    for phase in ["train", "dev"]:
        sum_loss = 0
        for iter in range(50):
            if random.randint(1, 10) < 6:
                sum_loss += 500
            else:
                sum_loss -= 1500
        if phase == "dev":
            loss = sum_loss / 50
            flag = early_stopping(loss)
    
    if flag:
        print("flag is : True")
        break



Early Stopping! loss value exceed consecutively 6 times
flag is : True
