In [None]:
!pip install sentencepiece
!pip install python-Levenshtein
#!pip install warp_rnnt
!pip install hydra-core

In [None]:
!pip install wget

In [None]:
!pip install jamo

In [None]:
!wget https://www.openslr.org/resources/12/test-clean.tar.gz
!wget https://www.openslr.org/resources/12/dev-clean.tar.gz

In [None]:
!tar xvzf dev-clean.tar.gz
!tar xvzf test-clean.tar.gz

In [None]:
!python -m pip install -U pip

In [None]:
!pip install warp-rnnt

In [None]:
!git clone https://github.com/1ytic/warp-rnnt
#cd warp-rnnt/pytorch_binding
!python warp-rnnt/pytorch_binding/setup.py install

In [None]:
!git clone https://github.com/sooftware/kospeech.git

**Codes from :** https://github.com/sooftware/kospeech

* https://github.com/sooftware/kospeech/blob/latest/LICENSE


In [1]:
import Levenshtein as Lev

In [2]:
def wer(s1,s2):
    b = set(s1.split()+s2.split())
    word_map = dict(zip(b,range(len(b))))
    print("word_map:",word_map)
    w1 = [chr(word_map[w]) for w in s1.split()]
    w2 = [chr(word_map[w]) for w in s2.split()]
    dev = max(len(s2.split()),1)
    score = Lev.distance("".join(w1),"".join(w2))/dev
    return score

def cer(s1,s2):
    w1 = s1.replace(" ","")
    w2 = s2.replace(" ","")
    dev = max(len(s2),1)
    score = Lev.distance((w1),(w2))/dev
    return score

In [3]:
# Copyright (c) 2020, Soohwan Kim. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import torch.nn as nn
import torch.nn.init as init
from torch import Tensor


class ResidualConnectionModule(nn.Module):
    """
    Residual Connection Module.
    outputs = (module(inputs) x module_factor + inputs x input_factor)
    """
    def __init__(
            self,
            module: nn.Module,
            module_factor: float = 1.0,
            input_factor: float = 1.0,
    ) -> None:
        super(ResidualConnectionModule, self).__init__()
        self.module = module
        self.module_factor = module_factor
        self.input_factor = input_factor

    def forward(self, inputs: Tensor) -> Tensor:
        return (self.module(inputs) * self.module_factor) + (inputs * self.input_factor)


class Linear(nn.Module):
    """
    Wrapper class of torch.nn.Linear
    Weight initialize by xavier initialization and bias initialize to zeros.
    """
    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
        super(Linear, self).__init__()
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        init.xavier_uniform_(self.linear.weight)
        if bias:
            init.zeros_(self.linear.bias)

    def forward(self, x: Tensor) -> Tensor:
        return self.linear(x)


class View(nn.Module):
    """ Wrapper class of torch.view() for Sequential module. """
    def __init__(self, shape: tuple, contiguous: bool = False):
        super(View, self).__init__()
        self.shape = shape
        self.contiguous = contiguous

    def forward(self, inputs):
        if self.contiguous:
            inputs = inputs.contiguous()
        return inputs.view(*self.shape)


class Transpose(nn.Module):
    """ Wrapper class of torch.transpose() for Sequential module. """
    def __init__(self, shape: tuple):
        super(Transpose, self).__init__()
        self.shape = shape

    def forward(self, inputs: Tensor):
        return inputs.transpose(*self.shape)

In [4]:
import torch.nn as nn
from torch import Tensor
from typing import Tuple


class EncoderInterface(nn.Module):
    """ Base Interface of Encoder """
    def __init__(self):
        super(EncoderInterface, self).__init__()

    def count_parameters(self) -> int:
        """ Count parameters of encoder """
        return sum([p.numel for p in self.parameters()])

    def update_dropout(self, dropout_p: float) -> None:
        """ Update dropout probability of encoder """
        for name, child in self.named_children():
            if isinstance(child, nn.Dropout):
                child.p = dropout_p

    def forward(self, inputs: Tensor, input_lengths: Tensor):
        """
        Forward propagate for encoder training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        """
        raise NotImplementedError


class BaseEncoder(EncoderInterface):
    """ ASR Encoder Super Class for KoSpeech model implementation """
    """
    supported_extractors = {
        'ds2': DeepSpeech2Extractor,
        'vgg': VGGExtractor,
    }
    """

    def __init__(
            self,
            input_dim: int,
            extractor: str = 'vgg',
            d_model: int = None,
            num_classes: int = None,
            dropout_p: float = None,
            activation: str = 'hardtanh',
            joint_ctc_attention: bool = False,
    ) -> None:
        super(BaseEncoder, self).__init__()
        if joint_ctc_attention:
            assert num_classes, "If `joint_ctc_attention` True, `num_classes` should be not None"
            assert dropout_p, "If `joint_ctc_attention` True, `dropout_p` should be not None"
            assert d_model, "If `joint_ctc_attention` True, `d_model` should be not None"

        if extractor is not None:
            extractor = self.supported_extractors[extractor.lower()]
            self.conv = extractor(input_dim=input_dim, activation=activation)

        self.conv_output_dim = self.conv.get_output_dim()
        self.num_classes = num_classes
        self.joint_ctc_attention = joint_ctc_attention

        if self.joint_ctc_attention:
            self.fc = nn.Sequential(
                nn.BatchNorm1d(d_model),
                Transpose(shape=(1, 2)),
                nn.Dropout(dropout_p),
                Linear(d_model, num_classes, bias=False),
            )

    def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Forward propagate a `inputs` for  encoder training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            (Tensor, Tensor, Tensor):
            * encoder_outputs: A output sequence of encoder. `FloatTensor` of size ``(batch, seq_length, dimension)``
            * encoder_output_lengths: The length of encoder outputs. ``(batch)``
            * encoder_log_probs: Log probability of encoder outputs will be passed to CTC Loss.
                If joint_ctc_attention is False, return None.
        """
        raise NotImplementedError


class TransducerEncoder(EncoderInterface):
    """ ASR Transducer Encoder Super class for KoSpeech model implementation """
    def __init__(self):
        super(TransducerEncoder, self).__init__()

    def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tensor:
        """
        Forward propagate a `inputs` for  encoder training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            (Tensor, Tensor)
            * outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
            * output_lengths (torch.LongTensor): The length of output tensor. ``(batch)``
        """
        raise NotImplementedError

In [5]:
import torch
import torch.nn as nn
from torch import Tensor
from typing import Tuple


class DecoderInterface(nn.Module):
    def __init__(self):
        super(DecoderInterface, self).__init__()

    def count_parameters(self) -> int:
        """ Count parameters of encoder """
        return sum([p.numel for p in self.parameters()])

    def update_dropout(self, dropout_p: float) -> None:
        """ Update dropout probability of encoder """
        for name, child in self.named_children():
            if isinstance(child, nn.Dropout):
                child.p = dropout_p


class BaseDecoder(DecoderInterface):
    """ ASR Decoder Super Class for KoSpeech model implementation """
    def __init__(self):
        super(BaseDecoder, self).__init__()

    def forward(self, targets: Tensor, encoder_outputs: Tensor, **kwargs) -> Tensor:
        """
        Forward propagate a `encoder_outputs` for training.
        Args:
            targets (torch.LongTensr): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
            encoder_outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        Returns:
            * predicted_log_probs (torch.FloatTensor): Log probability of model predictions.
        """
        raise NotImplementedError

    @torch.no_grad()
    def decode(self, encoder_outputs: Tensor, *args) -> Tensor:
        """
        Decode encoder_outputs.
        Args:
            encoder_outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        Returns:
            * predicted_log_probs (torch.FloatTensor): Log probability of model predictions.
        """
        raise NotImplementedError


class TransducerDecoder(DecoderInterface):
    """ ASR Transducer Decoder Super Class for KoSpeech model implementation """
    def __init__(self):
        super(TransducerDecoder, self).__init__()

    def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor]:
        """
        Forward propage a `inputs` (targets) for training.
        Args:
            inputs (torch.LongTensor): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            (Tensor, Tensor):
            * decoder_outputs (torch.FloatTensor): A output sequence of decoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
            * hidden_states (torch.FloatTensor): A hidden state of decoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        """
        raise NotImplementedError

In [6]:
import torch
import torch.nn as nn
from torch import Tensor
from typing import Tuple



class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()

    def count_parameters(self) -> int:
        """ Count parameters of encoder """
        return sum([p.numel for p in self.parameters()])

    def update_dropout(self, dropout_p: float) -> None:
        """ Update dropout probability of encoder """
        for name, child in self.named_children():
            if isinstance(child, nn.Dropout):
                child.p = dropout_p

    @torch.no_grad()
    def recognize(self, inputs: Tensor, input_lengths: Tensor):
        raise NotImplementedError


class EncoderModel(BaseModel):
    """ Super class of KoSpeech's Encoder only Models """
    def __init__(self):
        super(EncoderModel, self).__init__()
        self.decoder = None

    def set_decoder(self, decoder):
        """ Setter for decoder """
        self.decoder = decoder

    def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor]:
        """
        Forward propagate a `inputs` for  ctc training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            (Tensor, Tensor):
            * predicted_log_prob (torch.FloatTensor)s: Log probability of model predictions.
            * output_lengths (torch.LongTensor): The length of output tensor ``(batch)``
        """
        raise NotImplementedError

    @torch.no_grad()
    def decode(self, predicted_log_probs: Tensor) -> Tensor:
        """
        Decode encoder_outputs.
        Args:
            predicted_log_probs (torch.FloatTensor):Log probability of model predictions. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        Returns:
            * predictions (torch.FloatTensor): Result of model predictions.
        """
        return predicted_log_probs.max(-1)[1]

    @torch.no_grad()
    def recognize(self, inputs: Tensor, input_lengths: Tensor) -> Tensor:
        """
        Recognize input speech.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            * predictions (torch.FloatTensor): Result of model predictions.
        """
        predicted_log_probs, _ = self.forward(inputs, input_lengths)
        if self.decoder is not None:
            return self.decoder.decode(predicted_log_probs)
        return self.decode(predicted_log_probs)


class EncoderDecoderModel(BaseModel):
    """ Super class of KoSpeech's Encoder-Decoder Models """
    def __init__(self, encoder: BaseEncoder, decoder: BaseDecoder) -> None:
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def set_encoder(self, encoder):
        """ Setter for encoder """
        self.encoder = encoder

    def set_decoder(self, decoder):
        """ Setter for decoder """
        self.decoder = decoder

    def count_parameters(self) -> int:
        """ Count parameters of encoder """
        num_encoder_parameters = self.encoder.count_parameters()
        num_decoder_parameters = self.decoder.count_parameters()
        return num_encoder_parameters + num_decoder_parameters

    def update_dropout(self, dropout_p) -> None:
        """ Update dropout probability of model """
        self.encoder.update_dropout(dropout_p)
        self.decoder.update_dropout(dropout_p)

    def forward(
            self,
            inputs: Tensor,
            input_lengths: Tensor,
            targets: Tensor,
            *args,
    ) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Forward propagate a `inputs` and `targets` pair for training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
            targets (torch.LongTensr): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
        Returns:
            (Tensor, Tensor, Tensor)
            * predicted_log_probs (torch.FloatTensor): Log probability of model predictions.
            * encoder_output_lengths: The length of encoder outputs. ``(batch)``
            * encoder_log_probs: Log probability of encoder outputs will be passed to CTC Loss.
                If joint_ctc_attention is False, return None.
        """
        raise NotImplementedError

    @torch.no_grad()
    def recognize(self, inputs: Tensor, input_lengths: Tensor) -> Tensor:
        """
        Recognize input speech. This method consists of the forward of the encoder and the decode() of the decoder.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            * predictions (torch.FloatTensor): Result of model predictions.
        """
        encoder_outputs, encoder_output_lengths, _ = self.encoder(inputs, input_lengths)
        return self.decoder.decode(encoder_outputs, encoder_output_lengths)


class TransducerModel(BaseModel):
    """ Super class of KoSpeech's Transducer Models """
    def __init__(
            self,
            encoder: TransducerEncoder,
            decoder: TransducerDecoder,
            d_model: int,
            num_classes: int,
    ) -> None:
        super(TransducerModel, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.fc = Linear(d_model<<1, num_classes, bias=False)

    def set_encoder(self, encoder):
        """ Setter for encoder """
        self.encoder = encoder

    def set_decoder(self, decoder):
        """ Setter for decoder """
        self.decoder = decoder

    def count_parameters(self) -> int:
        """ Count parameters of encoder """
        num_encoder_parameters = self.encoder.count_parameters()
        num_decoder_parameters = self.decoder.count_parameters()
        return num_encoder_parameters + num_decoder_parameters

    def update_dropout(self, dropout_p) -> None:
        """ Update dropout probability of model """
        self.encoder.update_dropout(dropout_p)
        self.decoder.update_dropout(dropout_p)

    def joint(self, encoder_outputs: Tensor, decoder_outputs: Tensor) -> Tensor:
        """
        Joint `encoder_outputs` and `decoder_outputs`.
        Args:
            encoder_outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
            decoder_outputs (torch.FloatTensor): A output sequence of decoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        Returns:
            * outputs (torch.FloatTensor): outputs of joint `encoder_outputs` and `decoder_outputs`..
        """
        if encoder_outputs.dim() == 3 and decoder_outputs.dim() == 3:
            input_length = encoder_outputs.size(1)
            target_length = decoder_outputs.size(1)

            encoder_outputs = encoder_outputs.unsqueeze(2)
            decoder_outputs = decoder_outputs.unsqueeze(1)

            encoder_outputs = encoder_outputs.repeat([1, 1, target_length, 1])
            decoder_outputs = decoder_outputs.repeat([1, input_length, 1, 1])

            #print("  encoder_outputs:",encoder_outputs.shape)
            #print("  decoder_outputs:",decoder_outputs.shape)
        #outputs = (encoder_outputs + decoder_outputs)
        outputs = torch.cat((encoder_outputs, decoder_outputs), dim=-1)
        #print(" outputs:",outputs.shape)
        outputs = self.fc(outputs).log_softmax(dim=-1)
        #print(" outputs2:",outputs.shape)
        return outputs

    def forward(
            self,
            inputs: Tensor,
            input_lengths: Tensor,
            targets: Tensor,
            target_lengths: Tensor,
    ) -> Tensor:
        """
        Forward propagate a `inputs` and `targets` pair for training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
            targets (torch.LongTensr): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
            target_lengths (torch.LongTensor): The length of target tensor. ``(batch)``
        Returns:
            * predictions (torch.FloatTensor): Result of model predictions.
        """
        encoder_outputs, _ = self.encoder(inputs, input_lengths)
        decoder_outputs, _ = self.decoder(targets, target_lengths)
        return self.joint(encoder_outputs, decoder_outputs)

    @torch.no_grad()
    def decode(self, encoder_output: Tensor, max_length: int) -> Tensor:
        """
        Decode `encoder_outputs`.
        Args:
            encoder_output (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(seq_length, dimension)``
            max_length (int): max decoding time step
        Returns:
            * predicted_log_probs (torch.FloatTensor): Log probability of model predictions.
        """
        pred_tokens, hidden_state = list(), None
        decoder_input = encoder_output.new_tensor([[self.decoder.sos_id]], dtype=torch.long)

        for t in range(max_length):
            decoder_output, hidden_state = self.decoder(decoder_input, hidden_states=hidden_state)
            step_output = self.joint(encoder_output[t].view(-1), decoder_output.view(-1))
            step_output = step_output.softmax(dim=0)
            pred_token = step_output.argmax(dim=0)
            pred_token = int(pred_token.item())
            pred_tokens.append(pred_token)
            decoder_input = step_output.new_tensor([[pred_token]], dtype=torch.long)

        return torch.LongTensor(pred_tokens)

    @torch.no_grad()
    def recognize(self, inputs: Tensor, input_lengths: Tensor) -> Tensor:
        """
        Recognize input speech. This method consists of the forward of the encoder and the decode() of the decoder.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            * outputs (torch.FloatTensor): Result of model predictions.
        """
        outputs = list()

        encoder_outputs, output_lengths = self.encoder(inputs, input_lengths)
        max_length = encoder_outputs.size(1)

        for encoder_output in encoder_outputs:
            decoded_seq = self.decode(encoder_output, max_length)
            outputs.append(decoded_seq)

        outputs = torch.stack(outputs, dim=1).transpose(0, 1)

        return outputs

In [7]:
import torch.nn as nn
from torch import Tensor
from typing import Tuple


class DecoderRNNT(TransducerDecoder):
    """
    Decoder of RNN-Transducer
    Args:
        num_classes (int): number of classification
        hidden_state_dim (int, optional): hidden state dimension of decoder (default: 512)
        output_dim (int, optional): output dimension of encoder and decoder (default: 512)
        num_layers (int, optional): number of decoder layers (default: 1)
        rnn_type (str, optional): type of rnn cell (default: lstm)
        sos_id (int, optional): start of sentence identification
        eos_id (int, optional): end of sentence identification
        dropout_p (float, optional): dropout probability of decoder
    Inputs: inputs, input_lengths
        inputs (torch.LongTensor): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
        input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        hidden_states (torch.FloatTensor): A previous hidden state of decoder. `FloatTensor` of size
            ``(batch, seq_length, dimension)``
    Returns:
        (Tensor, Tensor):
        * decoder_outputs (torch.FloatTensor): A output sequence of decoder. `FloatTensor` of size
            ``(batch, seq_length, dimension)``
        * hidden_states (torch.FloatTensor): A hidden state of decoder. `FloatTensor` of size
            ``(batch, seq_length, dimension)``
    """
    supported_rnns = {
        'lstm': nn.LSTM,
        'gru': nn.GRU,
        'rnn': nn.RNN,
    }

    def __init__(
            self,
            num_classes: int,
            hidden_state_dim: int,
            output_dim: int,
            num_layers: int,
            rnn_type: str = 'lstm',
            sos_id: int = 1,
            eos_id: int = 2,
            dropout_p: float = 0.2,
    ):
        super(DecoderRNNT, self).__init__()
        self.hidden_state_dim = hidden_state_dim
        self.sos_id = sos_id
        self.eos_id = eos_id
        self.embedding = nn.Embedding(num_classes, hidden_state_dim)
        rnn_cell = self.supported_rnns[rnn_type.lower()]
        self.rnn = rnn_cell(
            input_size=hidden_state_dim,
            hidden_size=hidden_state_dim,
            num_layers=num_layers,
            bias=True,
            batch_first=True,
            dropout=dropout_p,
            bidirectional=False,
        )
        self.out_proj = Linear(hidden_state_dim, output_dim)

    def forward(
            self,
            inputs: Tensor,
            input_lengths: Tensor = None,
            hidden_states: Tensor = None,
    ) -> Tuple[Tensor, Tensor]:
        """
        Forward propage a `inputs` (targets) for training.
        Args:
            inputs (torch.LongTensor): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
            hidden_states (torch.FloatTensor): A previous hidden state of decoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        Returns:
            (Tensor, Tensor):
            * decoder_outputs (torch.FloatTensor): A output sequence of decoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
            * hidden_states (torch.FloatTensor): A hidden state of decoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
        """
        embedded = self.embedding(inputs)
        #print("embeded:",embedded.shape,"hidden_states:",hidden_states)
        outputs, hidden_states = self.rnn(embedded, hidden_states)
        outputs = self.out_proj(outputs)
        return outputs, hidden_states
      

In [8]:
# Copyright (c) 2021, Soohwan Kim. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch.nn as nn
from torch import Tensor
from typing import Tuple



class EncoderRNNT(TransducerEncoder):
    """
    Encoder of RNN-Transducer.
    Args:
        input_dim (int): dimension of input vector
        hidden_state_dim (int, optional): hidden state dimension of encoder (default: 320)
        output_dim (int, optional): output dimension of encoder and decoder (default: 512)
        num_layers (int, optional): number of encoder layers (default: 4)
        rnn_type (str, optional): type of rnn cell (default: lstm)
        bidirectional (bool, optional): if True, becomes a bidirectional encoder (default: True)
    Inputs: inputs, input_lengths
        inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
            `FloatTensor` of size ``(batch, seq_length, dimension)``.
        input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
    Returns:
        (Tensor, Tensor)
        * outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
            ``(batch, seq_length, dimension)``
        * hidden_states (torch.FloatTensor): A hidden state of encoder. `FloatTensor` of size
            ``(batch, seq_length, dimension)``
    """
    supported_rnns = {
        'lstm': nn.LSTM,
        'gru': nn.GRU,
        'rnn': nn.RNN,
    }

    def __init__(
            self,
            input_dim: int,
            hidden_state_dim: int,
            output_dim: int,
            num_layers: int,
            rnn_type: str = 'lstm',
            dropout_p: float = 0.2,
            bidirectional: bool = True,
    ):
        super(EncoderRNNT, self).__init__()
        self.hidden_state_dim = hidden_state_dim
        rnn_cell = self.supported_rnns[rnn_type.lower()]
        self.rnn = rnn_cell(
            input_size=input_dim,
            hidden_size=hidden_state_dim,
            num_layers=num_layers,
            bias=True,
            batch_first=True,
            dropout=dropout_p,
            bidirectional=bidirectional,
        )
        self.out_proj = Linear(hidden_state_dim << 1 if bidirectional else hidden_state_dim, output_dim)

    def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor]:
        """
        Forward propagate a `inputs` for  encoder training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        Returns:
            (Tensor, Tensor)
            * outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
            * output_lengths (torch.LongTensor): The length of output tensor. ``(batch)``
        """
        inputs = nn.utils.rnn.pack_padded_sequence(inputs.transpose(0, 1), input_lengths.cpu())
        outputs, hidden_states = self.rnn(inputs)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = self.out_proj(outputs.transpose(0, 1))
        return outputs, input_lengths

In [9]:
# Copyright (c) 2021, Soohwan Kim. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from torch import Tensor



class RNNTransducer(TransducerModel):
    """
    RNN-Transducer are a form of sequence-to-sequence models that do not employ attention mechanisms.
    Unlike most sequence-to-sequence models, which typically need to process the entire input sequence
    (the waveform in our case) to produce an output (the sentence), the RNN-T continuously processes input samples and
    streams output symbols, a property that is welcome for speech dictation. In our implementation,
    the output symbols are the characters of the alphabet.
    Args:
        num_classes (int): number of classification
        input_dim (int): dimension of input vector
        num_encoder_layers (int, optional): number of encoder layers (default: 4)
        num_decoder_layers (int, optional): number of decoder layers (default: 1)
        encoder_hidden_state_dim (int, optional): hidden state dimension of encoder (default: 320)
        decoder_hidden_state_dim (int, optional): hidden state dimension of decoder (default: 512)
        output_dim (int, optional): output dimension of encoder and decoder (default: 512)
        rnn_type (str, optional): type of rnn cell (default: lstm)
        bidirectional (bool, optional): if True, becomes a bidirectional encoder (default: True)
        encoder_dropout_p (float, optional): dropout probability of encoder
        decoder_dropout_p (float, optional): dropout probability of decoder
        sos_id (int, optional): start of sentence identification
        eos_id (int, optional): end of sentence identification
    Inputs: inputs, input_lengths, targets, target_lengths
        inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
            `FloatTensor` of size ``(batch, seq_length, dimension)``.
        input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
        targets (torch.LongTensr): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
        target_lengths (torch.LongTensor): The length of target tensor. ``(batch)``
    Returns:
        * predictions (torch.FloatTensor): Result of model predictions.
    """
    def __init__(
            self,
            num_classes: int,
            input_dim: int,
            num_encoder_layers: int = 4,
            num_decoder_layers: int = 1,
            encoder_hidden_state_dim: int = 320,
            decoder_hidden_state_dim: int = 256,
            output_dim: int = 256,
            rnn_type: str = "lstm",
            bidirectional: bool = True,
            encoder_dropout_p: float = 0.2,
            decoder_dropout_p: float = 0.2,
            sos_id: int = 1,
            eos_id: int = 2,
    ):
        encoder = EncoderRNNT(
            input_dim=input_dim,
            hidden_state_dim=encoder_hidden_state_dim,
            output_dim=output_dim,
            num_layers=num_encoder_layers,
            rnn_type=rnn_type,
            dropout_p=encoder_dropout_p,
            bidirectional=bidirectional,
        )
        decoder = DecoderRNNT(
            num_classes=num_classes,
            hidden_state_dim=decoder_hidden_state_dim,
            output_dim=output_dim,
            num_layers=num_decoder_layers,
            rnn_type=rnn_type,
            sos_id=sos_id,
            eos_id=eos_id,
            dropout_p=decoder_dropout_p,
        )
        super(RNNTransducer, self).__init__(encoder, decoder, output_dim, num_classes)

    def forward(
            self,
            inputs: Tensor,
            input_lengths: Tensor,
            targets: Tensor,
            target_lengths: Tensor
    ) -> Tensor:
        """
        Forward propagate a `inputs` and `targets` pair for training.
        Args:
            inputs (torch.FloatTensor): A input sequence passed to encoder. Typically for inputs this will be a padded
                `FloatTensor` of size ``(batch, seq_length, dimension)``.
            input_lengths (torch.LongTensor): The length of input tensor. ``(batch)``
            targets (torch.LongTensr): A target sequence passed to decoder. `IntTensor` of size ``(batch, seq_length)``
            target_lengths (torch.LongTensor): The length of target tensor. ``(batch)``
        Returns:
            * predictions (torch.FloatTensor): Result of model predictions.
        """
        return super().forward(inputs, input_lengths, targets, target_lengths)

In [10]:
# Copyright (c) 2021, Soohwan Kim. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import torch.nn as nn
import torchaudio


class TransducerLoss(nn.Module):
    """
    Transducer loss module.
    Args:
        blank_id (int): blank symbol id
    """

    def __init__(self, blank_id: int) -> None:
        """Construct an TransLoss object."""
        super().__init__()
        #self._device = torch.device("cuda:0" if(torch.cuda.is_available()) else "cpu")
        """
        try:
            from warp_rnnt import rnnt_loss
        except ImportError:
            raise ImportError("warp-rnnt is not installed. Please re-setup")
        """
        rnnt_loss = torchaudio.transforms.RNNTLoss(blank=blank_id,reduction='sum')
        self.rnnt_loss = rnnt_loss
        self.blank_id = blank_id

    def forward(
            self,
            log_probs: torch.FloatTensor,
            targets: torch.IntTensor,
            input_lengths: torch.IntTensor,
            target_lengths: torch.IntTensor,
    ) -> torch.FloatTensor:
        """
        Compute path-aware regularization transducer loss.
        Args:
            log_probs (torch.FloatTensor): Batch of predicted sequences (batch, maxlen_in, maxlen_out+1, odim)
            targets (torch.IntTensor): Batch of target sequences (batch, maxlen_out)
            input_lengths (torch.IntTensor): batch of lengths of predicted sequences (batch)
            target_lengths (torch.IntTensor): batch of lengths of target sequences (batch)
        Returns:
            loss (torch.FloatTensor): transducer loss
        """
        """
        return self.rnnt_loss(
            log_probs,
            targets,
            input_lengths,
            target_lengths,
            reduction="mean",
            blank=self.blank_id,
            gather=True,
        )
        """
        return self.rnnt_loss(log_probs,targets,input_lengths,target_lengths)

In [11]:
import numpy as np
#import librosa
from glob import glob
import os
import soundfile as sf
from torchaudio import transforms as audioT
import random
from pathlib import Path
class DataLoader:
    def __init__(self,batch_size=1,is_test_set=False):
        super().__init__()
        self.batch_size = batch_size
        self.is_test_set = is_test_set
        common_folder = "D:\\datasets\\LibriSpeech\\train-clean-100" #
        #common_folder = "LibriSpeech/dev-clean"#"LibriSpeech/train-clean-100"
        if(is_test_set==True):
            common_folder = "LibriSpeech/test-clean"
        transcripts_files = glob(os.path.join(common_folder,"*\\*\\*.txt"),recursive=True)
        audio_files = glob(os.path.join(common_folder,"*\\*\\*.flac"),recursive=True)
        if(is_test_set==False):
            audio_files_len = len(audio_files)
            audio_files = audio_files[:audio_files_len//8]
        print("transcripts_files:",len(transcripts_files))
        print("audio_files:",len(audio_files))
        transcripts = {}
        for ts_fpath in transcripts_files:
            with open(ts_fpath,"r") as fp:
                txt = fp.read().split("\n")
                for ln in txt:
                    ln=ln.strip()
                    if(ln==""):
                        continue
                  
                    lines = ln.split(" ",1)
                    fn = lines[0]
                    lbls = lines[1].upper()
                    transcripts[fn] = lbls
        self.audio_files = audio_files
        self.transcript_keys = list(transcripts.keys())
        self.transcript_table = "".join([chr(ord("A")+i) for i in range(26)])+(" .'!?")
        #print("transcript",transcripts)
        #print("transcript keys:",self.transcript_keys)
        
        self.transcripts = transcripts
        self._audio_file_len = len(self.audio_files)
        if(is_test_set==False):
            self.shuffleDataset()

    def __len__(self):
        return len(self.audio_files)//self.batch_size
    def _loadFeature(self,fpath):
        rawinput,sample_rate = torchaudio.load(fpath) #sf.read(fpath)
        audio_transform = audioT.MFCC(sample_rate=sample_rate,n_mfcc=96,log_mels=True)
        mfcc = audio_transform(rawinput)
        mfcc = mfcc.squeeze(dim=0).transpose(1,0)
        return mfcc
    def _convertTranscript2(self,text_list,with_blank=False):
        result = []
        for char in text_list:
            result.append(self.transcript_table.find(char)+3)
            if(with_blank==True):
                result.append(2)
        return result
    def shuffleDataset(self):
        random.shuffle(self.audio_files)
    def token2String(self,token_list,with_visible_tag=True):
        result = ""
        max_len = len(self.transcript_table)+2
        for i in range(len(token_list)):
            token_id = token_list[i]
            if(token_id==1):
                result+='<s>' if(with_visible_tag==True) else ""
            elif(token_id==0):
                result += '</s>' if(with_visible_tag==True) else ""
            elif(token_id==2):
                result += '</b>' if(with_visible_tag==True) else ""
            elif(2<token_id<max_len):
                result+= self.transcript_table[token_id-3]
            else:
                result+='</u?>' if(with_visible_tag==True) else ""
        return result
    
    def __getitem__(self,idx):
        if(idx>=self.__len__()):
            raise StopIteration()
        start_index,end_cond = (idx*self.batch_size,min((idx+1)*self.batch_size,(self._audio_file_len)))
        #print((start_index,end_cond))
        specs = []
        transcripts = []
        transcript_lengths = []
        specs_lengths = []

        for i in range(start_index,end_cond):
            fpath = self.audio_files[i]
            feature = self._loadFeature(fpath)
            fn = Path(fpath).stem
            specs.append(feature)#torch.from_numpy(feature))
            specs_lengths.append(feature.size(0))
            transcript_indices = [0]+self._convertTranscript2(self.transcripts[fn],with_blank=False)#+[2]
            transcripts.append(torch.from_numpy(np.array(transcript_indices)))
            transcript_lengths.append(len(transcript_indices))
        transcript_lengths = np.array(transcript_lengths)
        specs_lengths = np.array(specs_lengths)
        return specs,specs_lengths,transcripts,transcript_lengths

In [12]:

chosung = ("ㄱ", "ㄲ", "ㄴ", "ㄷ", "ㄸ", "ㄹ", "ㅁ", "ㅂ", "ㅃ", "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅉ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ")

jungsung = ("ㅏ", "ㅐ", "ㅑ", "ㅒ", "ㅓ", "ㅔ", "ㅕ", "ㅖ", "ㅗ", "ㅘ", "ㅙ", "ㅚ", "ㅛ", "ㅜ", "ㅝ", "ㅞ", "ㅟ", "ㅠ", "ㅡ", "ㅢ", "ㅣ")

jongsung = ("", "ㄱ", "ㄲ", "ㄳ", "ㄴ", "ㄵ", "ㄶ", "ㄷ", "ㄹ", "ㄺ", "ㄻ", "ㄼ", "ㄽ", "ㄾ", "ㄿ", "ㅀ", "ㅁ", "ㅂ", "ㅄ", "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ")

ENGS = tuple([chr(ord("A")+i) for i in range(26)])

special_chars = tuple(list(" #$!?@~^&*()'\"`_:"))

import re

def replaceBracket(sentence):
    rst = re.sub( "(\(((\S|\s)*)(\)_))","",sentence)
    rst = re.sub( "(\(((\S|\s)*)(\)))","",rst)
    return rst

def getHangeulIndex(one_character):
    for i,comp in enumerate(chosung+jungsung+jongsung+ENGS+special_chars):
        if(one_character.upper()==comp):
            return i+4
    return 3
def convertString2HangeulIndex(text):
    result = []
    for character in text:
        result.append(getHangeulIndex(character))
    return result

def isHangeul(one_character):
    return 0xAC00 <= ord(one_character[:1]) <= 0xD7A3

def hangeulExplode(one_hangeul):
    a = one_hangeul[:1]
    if isHangeul(a) != True:
        return False
    b = ord(a) - 0xAC00
    cho = b // (21*28)
    jung = b % (21*28) // 28
    jong = b % 28
    if jong == 0:
        return (chosung[cho], jungsung[jung])
    else:
        return (chosung[cho], jungsung[jung], jongsung[jong])

def hangeulJoin(inputlist):
    result = ""
    cho, jung, jong = 0, 0, 0
    inputlist.insert(0, "")
    while len(inputlist) > 1:
        if inputlist[-1] in jongsung:
            if inputlist[-2] in jungsung:
                jong = jongsung.index(inputlist.pop())
            
            else:
                result += inputlist.pop()
        elif inputlist[-1] in jungsung:
            if inputlist[-2] in chosung:
                jung = jungsung.index(inputlist.pop())
                cho = chosung.index(inputlist.pop())
                result += chr(0xAC00 + ((cho*21)+jung)*28+jong)
                cho, jung, jong = 0, 0, 0
            else:
                result += inputlist.pop()

        else:
            result += inputlist.pop()
    else:
        return result[::-1]

def pureosseugi(inputtext):
    result = ""
    for i in inputtext:
        if isHangeul(i) == True:
            for j in hangeulExplode(i):
                result += j
        else:
            result += i
    
    return result

def moasseugi(inputtext):
    t1 = []
    for i in inputtext:
        t1.append(i)

    return hangeulJoin(t1)

In [22]:
import numpy as np
import os
from glob import glob
import json
from pathlib import Path
import torch
import torchaudio
from torchaudio import transforms as audioT



class DatasetLoader:
    def __init__(self,is_train_dataset:bool,batch_size,top_dataset_folder = "자유대화 음성(일반남녀)"):
        super().__init__()
        section_folder = "Training"
        if(is_train_dataset==False):
            section_folder = "Validation"
        top_folder = os.path.join(top_dataset_folder,section_folder)
        tmp_audios = glob(os.path.join(top_folder,"*\\*.wav"))
        audios = []
        for aud in tmp_audios:
            ffolder = os.path.dirname(aud)
            fname = os.path.basename(aud)
            fn = Path(fname).stem
            trans_path = os.path.join(ffolder,fn+".json")
            if(os.path.exists(trans_path)==True):
                trans = self.__getTranscription(trans_path)
                trans = replaceBracket(trans)
                if(len(trans)>0):
                    audios.append(aud)
        self.__audios = audios
        self.__audio_len = len(audios)
        self._batch_size = batch_size
    def __len__(self):
        return self.__audio_len//self._batch_size
    def __getTranscription(self,fpath):
        with open(fpath,"r",encoding="UTF-8") as fp:
            data = json.load(fp)["발화정보"]["stt"]
            data = pureosseugi(data)
            #data = jamo.h2j(data)
            #data = jamo.j2hcj(data)
        return data
    def __getRawAudioData(self,fpath,with_pytorch=True):
        if(with_pytorch==True):
            data,sample_rate = torchaudio.load(fpath)
            return data,sample_rate
        else:
            import librosa
            data,sample_rate = librosa.load(fpath)
            return data,sample_rate

    def __getAudioData(self,fpath,as_raw_data:bool = False,with_pytorch=True):
        data,sample_rate = self.__getRawAudioData(fpath,with_pytorch)
        if(as_raw_data==False):
            if(with_pytorch==True):
                mfcc_transforms = audioT.MFCC(sample_rate=sample_rate,n_mfcc=96)
                data = mfcc_transforms(data)
                data = data.transpose(1,2)
                data = data.squeeze(0)
                length = data.size(0)

            else:
                import librosa
                data= librosa.feature.mfcc(y=data,sr=sample_rate).T
                data = torch.from_numpy(data)
                length = data.size(0)
        return data,length
        
        
    def shuffleDataset(self):
        random.shuffle(self.__audios)
    def __getitem__(self,idx):
        st_idx,ed_idx = idx*self._batch_size,min((idx+1)*self._batch_size,self.__audio_len)
        audios = []
        audio_lengths = []
        transcripts = []
        transcript_length = []
        for i in range(st_idx,ed_idx):
            fpath = self.__audios[i]
            fname = os.path.basename(fpath)
            #print(fname)
            ffolder = os.path.dirname(fpath)
            transcript_fpath = os.path.join(ffolder,Path(fname).stem+".json")
            transcription = self.__getTranscription(transcript_fpath)
            audio_data,data_length = self.__getAudioData(fpath)
            audios.append(audio_data)
            audio_lengths.append(data_length)
            #print("transcript:",transcription)
            transcription = replaceBracket(transcription)
            #print("transcript2:",transcription)
            transcription = convertString2HangeulIndex(transcription)
            transcripts.append(torch.from_numpy(np.array(transcription)))

            transcript_length.append(len(transcription))#temp length
        audios = torch.nn.utils.rnn.pad_sequence(audios,batch_first=True)
        audio_lengths = np.array(audio_lengths)
        transcript_length = np.array(transcript_length)
        #transcripts = np.array(transcripts)
        return audios,audio_lengths,transcripts,transcript_length
        
if(__name__=="__main__"):
    debug_dataset = True
    if(debug_dataset==True):
        dataset_loader = DatasetLoader(is_train_dataset=True,batch_size=2,top_dataset_folder="자유대화 음성(일반남녀)")
        for i, (audios,audio_lengths,transcripts,transcript_length) in enumerate(dataset_loader):
            #print("audios:",len(audios),"transcripts:",len(transcripts))
            sample_audio = audios[0]
            sample_transcripts = transcripts[0]
            print(sample_audio.shape)
            print("  ",sample_transcripts)
            labels = nn.utils.rnn.pad_sequence(transcripts,batch_first=True)
            print("label:",labels.shape)
            if(i>10):
                break


torch.Size([666, 96])
   tensor([ 4, 41,  9, 27,  6, 98, 13, 28, 10, 15, 43, 16, 43, 98,  4, 41, 98, 15,
        43, 22, 36, 98, 18, 27,  6, 22, 32, 15, 15, 43, 98, 16, 43,  6, 23,  7,
        23,  4, 23, 98,  6, 24,  9, 29,  4, 15, 41,  9, 98,  7, 41,  7,  4, 31,
        98, 15, 43,  9, 41, 10, 15, 41,  9, 98,  4, 41, 10, 13, 31,  9, 31,  9,
        31, 98, 11, 23,  5, 37, 14,  7, 24], dtype=torch.int32)
label: torch.Size([2, 81])
torch.Size([698, 96])
   tensor([19, 27,  7, 23,  9, 23,  6, 98, 16, 27,  6, 18, 27,  9, 15, 41,  9, 98,
        20, 23,  4, 31, 98, 15, 27,  7, 43,  9, 31,  6,  4, 23, 98,  4, 23,  4,
        31, 98, 15, 43, 14,  6, 41,  6,  7, 28, 98, 18, 23, 15, 11, 23,  5, 15,
        41,  9, 31, 98, 22, 24, 13, 13, 23,  9, 15, 43, 98,  6, 36,  6, 11, 36,
        13, 43,  4, 28, 98, 11, 36, 13, 27, 16, 43,  7, 27,  9, 23],
       dtype=torch.int32)
label: torch.Size([2, 88])
torch.Size([532, 96])
   tensor([ 4, 41,  9, 43,  4, 31, 98, 15, 35,  4, 23, 22, 23,  6, 41,  6, 

In [14]:
def predict(net,dataset):
    net.eval()
    state = None
    with torch.no_grad():
        for (features,feat_lens,labels,label_lens) in dataset:
            #features = torch.from_numpy(features)
            feat_lens = torch.from_numpy(feat_lens)
            #labels = torch.from_numpy(labels)
            label_lens = torch.from_numpy(label_lens)
            features = nn.utils.rnn.pad_sequence(features,batch_first=True)#.unsqueeze(1).transpose(2, 3)
            labels = nn.utils.rnn.pad_sequence(labels,batch_first=True)
            
            features = features.to(device)
            labels = labels.to(device)
            feat_lens = feat_lens.to(device)
            label_lens = label_lens.to(device)
            
            print(features.shape,labels.shape)
            output = net.recognize(features.float(),feat_lens)  # (batch, time, n_class)
            print("output:",output.shape)
            
            #output = output.argmax(dim=-1)
            print("raw output:",output)
            output = output.numpy().tolist()
            
                
            features = features.cpu()
            labels = labels.cpu()
            feat_lens = feat_lens.cpu()
            label_lens = label_lens.cpu()
            print("-"*10)
            print("\toriginal:",labels)
            #with_visible_tag
            ori_string = dataset.token2String(labels.numpy()[0].tolist())
            pred_string = dataset.token2String(output[0])
            print("\tori string: ",ori_string)
            print("\toutput result:",output[0][0])
            print("\topt string: ",pred_string)
            ori_string = dataset.token2String(labels.numpy()[0].tolist(),with_visible_tag=False)
            pred_string = dataset.token2String(output[0],with_visible_tag=False)
            print(f"\topt' string: '{pred_string}'")
            print("\twer:",wer(pred_string,ori_string))
            print("\tcer:",cer(pred_string,ori_string))
  

In [15]:
device = torch.device("cuda:0" if(torch.cuda.is_available()==True) else "cpu")

In [23]:
net = RNNTransducer(num_classes=200,input_dim=96)
net = net.to(device)

In [24]:
from torch import optim
initial_learning_rate = 0.0005
optimizer = optim.Adam(net.parameters(),lr=initial_learning_rate)#,rho=0.95,eps=1e-8,weight_decay=0.0)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.95) #.CosineAnnealingLR(optimizer,T_max=100,eta_min=0.001)

In [None]:
predict(net,testdataset)

In [None]:
torch.save(net.state_dict(),"rnnt_ko.pth")

In [27]:
if(__name__=="__main__"):
    from tqdm import tqdm
    #initial_learning_rate = 0.0001
    #import nemo.collections.asr as nemo_asr

    max_num_epoch = 3
    #optimizer = optim.Adam(net.parameters(),lr=initial_learning_rate)#,rho=0.95,eps=1e-8,weight_decay=0.0)
    #criterion = audioT.RNNTLoss(blank=0,fused_log_softmax=True,reduction='sum')#,clamp=1.0)#'sum')
    criterion =  TransducerLoss(blank_id=0)#nemo_asr.losses.rnnt.RNNTLoss(num_classes=0)
    traindataset = DatasetLoader(is_train_dataset=True,batch_size=1,top_dataset_folder="자유대화 음성(일반남녀)")
    testdataset = DatasetLoader(is_train_dataset=False,batch_size=1,top_dataset_folder="자유대화 음성(일반남녀)")
    #torch.save(net.parameters(),"rnnt.pth")
    #predict(net,testdataset)
    
    for epoch in range(max_num_epoch):
        index = 0
        traindataset.shuffleDataset()
        losses = []
        net.train()
        for (features,feat_lens,labels,label_lens) in tqdm((traindataset)):
            feat_lens,label_lens = torch.from_numpy(feat_lens),torch.from_numpy(label_lens)

            features = nn.utils.rnn.pad_sequence(features,batch_first=True)#.unsqueeze(1).transpose(2, 3)
            labels = nn.utils.rnn.pad_sequence(labels,batch_first=True)

            #"""
            sorted_lens,indices = torch.sort(feat_lens.view(-1),dim=0,descending=True)
            features = features[indices]
            labels = labels[indices]
            feat_lens = sorted_lens
            label_lens = label_lens[indices]
            #"""
            
            features = features.float()
            features = features.to(device)
            labels = labels.to(device)
            feat_lens = feat_lens.to(device)
            label_lens = label_lens.to(device)
            
            optimizer.zero_grad()
            
            joint_outputs = net(features.float(),feat_lens,labels.int(),label_lens)
            
            outputs = joint_outputs#
            

            loss = criterion(log_probs=outputs,targets=(labels[...,1:]).int(),input_lengths=(feat_lens).int() ,target_lengths=(label_lens-1).int())
            del features,labels,feat_lens,label_lens
            torch.cuda.empty_cache()
            
            loss.backward()
            
            optimizer.step()
            losses.append(loss.item())
            if(index%100==0):
                print(loss.item())
            index+=1
        print(f"ep={epoch}] loss= ",np.mean(losses))
        scheduler.step()
    torch.save(net.state_dict(),"kornnt_ko.pth")
    #predict(net,testdataset)

  0%|                                                                              | 1/46290 [00:00<4:42:29,  2.73it/s]

75.58484649658203


  0%|▏                                                                           | 101/46290 [00:20<2:21:51,  5.43it/s]

82.97039031982422


  0%|▎                                                                           | 202/46290 [00:41<2:06:26,  6.08it/s]

63.96460723876953


  1%|▍                                                                           | 301/46290 [01:01<2:14:25,  5.70it/s]

69.58851623535156


  1%|▋                                                                           | 402/46290 [01:23<2:18:58,  5.50it/s]

49.22011184692383


  1%|▊                                                                           | 502/46290 [01:44<2:19:45,  5.46it/s]

89.96521759033203


  1%|▉                                                                           | 602/46290 [02:05<2:28:08,  5.14it/s]

34.719417572021484


  2%|█▏                                                                          | 701/46290 [02:25<2:46:47,  4.56it/s]

62.309696197509766


  2%|█▎                                                                          | 802/46290 [02:46<1:57:17,  6.46it/s]

31.020946502685547


  2%|█▍                                                                          | 901/46290 [03:05<2:39:46,  4.73it/s]

181.8939208984375


  2%|█▌                                                                         | 1002/46290 [03:26<2:05:26,  6.02it/s]

47.117183685302734


  2%|█▊                                                                         | 1101/46290 [03:47<2:44:01,  4.59it/s]

89.80774688720703


  3%|█▉                                                                         | 1201/46290 [04:07<2:05:24,  5.99it/s]

88.61614227294922


  3%|██                                                                         | 1301/46290 [04:28<2:11:04,  5.72it/s]

77.85064697265625


  3%|██▎                                                                        | 1401/46290 [04:49<2:41:46,  4.62it/s]

113.34015655517578


  3%|██▍                                                                        | 1501/46290 [05:09<2:32:57,  4.88it/s]

106.61524200439453


  3%|██▌                                                                        | 1602/46290 [05:31<2:12:11,  5.63it/s]

74.9659423828125


  4%|██▊                                                                        | 1701/46290 [05:52<2:05:49,  5.91it/s]

148.17340087890625


  4%|██▉                                                                        | 1801/46290 [06:12<2:18:24,  5.36it/s]

19.750892639160156


  4%|███                                                                        | 1901/46290 [06:32<2:04:15,  5.95it/s]

53.57270050048828


  4%|███▏                                                                       | 2002/46290 [06:51<2:06:23,  5.84it/s]

108.65145111083984


  5%|███▍                                                                       | 2102/46290 [07:12<2:34:28,  4.77it/s]

11.792133331298828


  5%|███▌                                                                       | 2201/46290 [07:31<2:10:22,  5.64it/s]

42.82441711425781


  5%|███▋                                                                       | 2301/46290 [07:51<2:30:26,  4.87it/s]

102.06041717529297


  5%|███▉                                                                       | 2402/46290 [08:14<2:30:22,  4.86it/s]

58.88202667236328


  5%|████                                                                       | 2502/46290 [08:35<2:03:05,  5.93it/s]

71.9266357421875


  6%|████▏                                                                      | 2601/46290 [08:55<2:18:04,  5.27it/s]

85.53187561035156


  6%|████▍                                                                      | 2701/46290 [09:16<2:38:31,  4.58it/s]

101.48155975341797


  6%|████▌                                                                      | 2802/46290 [09:37<2:10:12,  5.57it/s]

38.43658447265625


  6%|████▋                                                                      | 2901/46290 [09:57<2:20:37,  5.14it/s]

44.619300842285156


  6%|████▊                                                                      | 3002/46290 [10:19<2:38:30,  4.55it/s]

121.73786926269531


  7%|█████                                                                      | 3102/46290 [10:38<2:12:32,  5.43it/s]

nan


  7%|█████▏                                                                     | 3201/46290 [10:59<2:52:58,  4.15it/s]

nan


  7%|█████▎                                                                     | 3301/46290 [11:20<2:55:02,  4.09it/s]

nan


  7%|█████▌                                                                     | 3402/46290 [11:41<2:03:13,  5.80it/s]

nan


  8%|█████▋                                                                     | 3502/46290 [12:01<1:59:11,  5.98it/s]

nan


  8%|█████▊                                                                     | 3601/46290 [12:21<2:27:46,  4.81it/s]

nan


  8%|█████▉                                                                     | 3701/46290 [12:42<2:13:55,  5.30it/s]

nan


  8%|██████▏                                                                    | 3802/46290 [13:03<2:10:19,  5.43it/s]

nan


  8%|██████▎                                                                    | 3902/46290 [13:23<2:35:34,  4.54it/s]

nan


  9%|██████▍                                                                    | 4001/46290 [13:44<2:16:09,  5.18it/s]

nan


  9%|██████▋                                                                    | 4102/46290 [14:06<2:05:17,  5.61it/s]

nan


  9%|██████▊                                                                    | 4201/46290 [14:26<2:21:07,  4.97it/s]

nan


  9%|██████▉                                                                    | 4301/46290 [14:47<2:08:19,  5.45it/s]

nan


 10%|███████▏                                                                   | 4401/46290 [15:07<2:29:13,  4.68it/s]

nan


 10%|███████▎                                                                   | 4502/46290 [15:28<2:29:24,  4.66it/s]

nan


 10%|███████▍                                                                   | 4601/46290 [15:48<2:25:16,  4.78it/s]

nan


 10%|███████▌                                                                   | 4702/46290 [16:09<2:19:34,  4.97it/s]

nan


 10%|███████▊                                                                   | 4801/46290 [16:29<2:02:42,  5.64it/s]

nan


 11%|███████▉                                                                   | 4902/46290 [16:49<2:17:06,  5.03it/s]

nan


 11%|████████                                                                   | 5002/46290 [17:10<2:01:12,  5.68it/s]

nan


 11%|████████▎                                                                  | 5102/46290 [17:30<2:24:03,  4.77it/s]

nan


 11%|████████▍                                                                  | 5201/46290 [17:50<2:20:25,  4.88it/s]

nan


 11%|████████▌                                                                  | 5301/46290 [18:12<2:38:08,  4.32it/s]

nan


 12%|████████▊                                                                  | 5401/46290 [18:33<2:12:02,  5.16it/s]

nan


 12%|████████▉                                                                  | 5502/46290 [18:52<1:51:31,  6.10it/s]

nan


 12%|█████████                                                                  | 5601/46290 [19:11<2:01:05,  5.60it/s]

nan


 12%|█████████▏                                                                 | 5702/46290 [19:32<1:57:44,  5.75it/s]

nan


 13%|█████████▍                                                                 | 5801/46290 [19:51<2:08:20,  5.26it/s]

nan


 13%|█████████▌                                                                 | 5901/46290 [20:13<2:55:31,  3.84it/s]

nan


 13%|█████████▋                                                                 | 6001/46290 [20:33<2:08:33,  5.22it/s]

nan


 13%|█████████▉                                                                 | 6102/46290 [20:54<2:06:38,  5.29it/s]

nan


 13%|██████████                                                                 | 6202/46290 [21:13<2:12:38,  5.04it/s]

nan


 14%|██████████▏                                                                | 6301/46290 [21:34<2:17:10,  4.86it/s]

nan


 14%|██████████▎                                                                | 6401/46290 [21:54<2:34:54,  4.29it/s]

nan


 14%|██████████▌                                                                | 6501/46290 [22:16<2:16:28,  4.86it/s]

nan


 14%|██████████▋                                                                | 6602/46290 [22:36<1:55:27,  5.73it/s]

nan


 14%|██████████▊                                                                | 6701/46290 [22:56<2:36:17,  4.22it/s]

nan


 15%|███████████                                                                | 6801/46290 [23:17<2:56:55,  3.72it/s]

nan


 15%|███████████▏                                                               | 6902/46290 [23:37<1:50:07,  5.96it/s]

nan


 15%|███████████▎                                                               | 7001/46290 [23:57<2:12:58,  4.92it/s]

nan


 15%|███████████▌                                                               | 7101/46290 [24:17<2:04:05,  5.26it/s]

nan


 16%|███████████▋                                                               | 7201/46290 [24:38<3:05:10,  3.52it/s]

nan


 16%|███████████▊                                                               | 7302/46290 [24:58<2:51:11,  3.80it/s]

nan


 16%|███████████▉                                                               | 7401/46290 [25:18<2:15:57,  4.77it/s]

nan


 16%|████████████▏                                                              | 7501/46290 [25:37<2:41:17,  4.01it/s]

nan


 16%|████████████▎                                                              | 7601/46290 [25:58<2:00:35,  5.35it/s]

nan


 17%|████████████▍                                                              | 7702/46290 [26:19<2:10:53,  4.91it/s]

nan


 17%|████████████▋                                                              | 7801/46290 [26:38<1:49:22,  5.87it/s]

nan


 17%|████████████▊                                                              | 7902/46290 [26:58<1:55:53,  5.52it/s]

nan


 17%|████████████▉                                                              | 8001/46290 [27:18<1:54:58,  5.55it/s]

nan


 18%|█████████████▏                                                             | 8102/46290 [27:39<1:45:29,  6.03it/s]

nan


 18%|█████████████▎                                                             | 8201/46290 [27:59<1:57:43,  5.39it/s]

nan


 18%|█████████████▍                                                             | 8301/46290 [28:19<1:51:19,  5.69it/s]

nan


 18%|█████████████▌                                                             | 8401/46290 [28:40<2:44:59,  3.83it/s]

nan


 18%|█████████████▊                                                             | 8501/46290 [29:01<2:12:32,  4.75it/s]

nan


 19%|█████████████▉                                                             | 8602/46290 [29:22<2:05:56,  4.99it/s]

nan


 19%|██████████████                                                             | 8701/46290 [29:41<2:25:39,  4.30it/s]

nan


 19%|██████████████▎                                                            | 8802/46290 [30:01<1:55:45,  5.40it/s]

nan


 19%|██████████████▍                                                            | 8901/46290 [30:20<1:51:57,  5.57it/s]

nan


 19%|██████████████▌                                                            | 9002/46290 [30:40<2:21:27,  4.39it/s]

nan


 20%|██████████████▋                                                            | 9101/46290 [31:00<2:16:09,  4.55it/s]

nan


 20%|██████████████▉                                                            | 9201/46290 [31:20<1:53:32,  5.44it/s]

nan


 20%|███████████████                                                            | 9302/46290 [31:41<2:11:06,  4.70it/s]

nan


 20%|███████████████▏                                                           | 9401/46290 [32:02<2:35:05,  3.96it/s]

nan


 21%|███████████████▍                                                           | 9502/46290 [32:22<1:36:25,  6.36it/s]

nan


 21%|███████████████▌                                                           | 9601/46290 [32:41<2:00:44,  5.06it/s]

nan


 21%|███████████████▋                                                           | 9702/46290 [33:02<1:52:11,  5.44it/s]

nan


 21%|███████████████▉                                                           | 9802/46290 [33:22<1:49:28,  5.56it/s]

nan


 21%|████████████████                                                           | 9901/46290 [33:42<1:57:19,  5.17it/s]

nan


 22%|███████████████▉                                                          | 10001/46290 [34:03<2:16:09,  4.44it/s]

nan


 22%|████████████████▏                                                         | 10101/46290 [34:23<1:53:11,  5.33it/s]

nan


 22%|████████████████▎                                                         | 10201/46290 [34:44<2:35:51,  3.86it/s]

nan


 22%|████████████████▍                                                         | 10302/46290 [35:05<1:51:01,  5.40it/s]

nan


 22%|████████████████▋                                                         | 10402/46290 [35:26<2:00:14,  4.97it/s]

nan


 23%|████████████████▊                                                         | 10502/46290 [35:46<1:59:18,  5.00it/s]

nan


 23%|████████████████▉                                                         | 10601/46290 [36:06<2:14:58,  4.41it/s]

nan


 23%|█████████████████                                                         | 10701/46290 [36:27<1:56:22,  5.10it/s]

nan


 23%|█████████████████▎                                                        | 10802/46290 [36:48<1:48:47,  5.44it/s]

nan


 24%|█████████████████▍                                                        | 10901/46290 [37:09<1:55:47,  5.09it/s]

nan


 24%|█████████████████▌                                                        | 11002/46290 [37:30<1:49:27,  5.37it/s]

nan


 24%|█████████████████▋                                                        | 11101/46290 [37:50<1:50:06,  5.33it/s]

nan


 24%|█████████████████▉                                                        | 11202/46290 [38:11<2:04:36,  4.69it/s]

nan


 24%|██████████████████                                                        | 11302/46290 [38:31<1:36:36,  6.04it/s]

nan


 25%|██████████████████▏                                                       | 11402/46290 [38:52<1:58:15,  4.92it/s]

nan


 25%|██████████████████▍                                                       | 11501/46290 [39:12<2:16:52,  4.24it/s]

nan


 25%|██████████████████▌                                                       | 11601/46290 [39:33<1:59:36,  4.83it/s]

nan


 25%|██████████████████▋                                                       | 11701/46290 [39:55<2:43:29,  3.53it/s]

nan


 25%|██████████████████▊                                                       | 11801/46290 [40:14<2:08:04,  4.49it/s]

nan


 26%|███████████████████                                                       | 11902/46290 [40:34<1:51:20,  5.15it/s]

nan


 26%|███████████████████▏                                                      | 12002/46290 [40:55<1:40:04,  5.71it/s]

nan


 26%|███████████████████▎                                                      | 12101/46290 [41:16<2:18:17,  4.12it/s]

nan


 26%|███████████████████▌                                                      | 12201/46290 [41:37<1:31:11,  6.23it/s]

nan


 27%|███████████████████▋                                                      | 12301/46290 [41:58<2:07:09,  4.45it/s]

nan


 27%|███████████████████▊                                                      | 12401/46290 [42:18<1:41:43,  5.55it/s]

nan


 27%|███████████████████▉                                                      | 12501/46290 [42:38<1:45:20,  5.35it/s]

nan


 27%|████████████████████▏                                                     | 12601/46290 [42:58<1:56:16,  4.83it/s]

nan


 27%|████████████████████▎                                                     | 12702/46290 [43:17<1:35:18,  5.87it/s]

nan


 28%|████████████████████▍                                                     | 12801/46290 [43:38<2:07:55,  4.36it/s]

nan


 28%|████████████████████▌                                                     | 12901/46290 [43:58<1:44:25,  5.33it/s]

nan


 28%|████████████████████▊                                                     | 13001/46290 [44:18<1:59:38,  4.64it/s]

nan


 28%|████████████████████▉                                                     | 13102/46290 [44:40<1:44:42,  5.28it/s]

nan


 29%|█████████████████████                                                     | 13202/46290 [45:00<1:42:31,  5.38it/s]

nan


 29%|█████████████████████▎                                                    | 13302/46290 [45:21<1:46:25,  5.17it/s]

nan


 29%|█████████████████████▍                                                    | 13402/46290 [45:41<1:48:39,  5.04it/s]

nan


 29%|█████████████████████▌                                                    | 13501/46290 [46:01<1:47:56,  5.06it/s]

nan


 29%|█████████████████████▋                                                    | 13602/46290 [46:22<1:43:34,  5.26it/s]

nan


 30%|█████████████████████▉                                                    | 13702/46290 [46:41<1:37:45,  5.56it/s]

nan


 30%|██████████████████████                                                    | 13801/46290 [47:01<1:28:02,  6.15it/s]

nan


 30%|██████████████████████▏                                                   | 13901/46290 [47:22<1:29:55,  6.00it/s]

nan


 30%|██████████████████████▍                                                   | 14002/46290 [47:43<1:32:54,  5.79it/s]

nan


 30%|██████████████████████▌                                                   | 14102/46290 [48:04<2:04:42,  4.30it/s]

nan


 31%|██████████████████████▋                                                   | 14201/46290 [48:25<1:52:55,  4.74it/s]

nan


 31%|██████████████████████▊                                                   | 14302/46290 [48:46<1:29:06,  5.98it/s]

nan


 31%|███████████████████████                                                   | 14402/46290 [49:06<1:50:20,  4.82it/s]

nan


 31%|███████████████████████▏                                                  | 14501/46290 [49:26<1:31:53,  5.77it/s]

nan


 32%|███████████████████████▎                                                  | 14601/46290 [49:46<1:40:26,  5.26it/s]

nan


 32%|███████████████████████▌                                                  | 14702/46290 [50:06<1:45:21,  5.00it/s]

nan


 32%|███████████████████████▋                                                  | 14801/46290 [50:26<1:29:16,  5.88it/s]

nan


 32%|███████████████████████▊                                                  | 14901/46290 [50:47<1:32:08,  5.68it/s]

nan


 32%|███████████████████████▉                                                  | 15001/46290 [51:06<1:45:44,  4.93it/s]

nan


 33%|████████████████████████▏                                                 | 15101/46290 [51:28<1:36:39,  5.38it/s]

nan


 33%|████████████████████████▎                                                 | 15202/46290 [51:49<1:46:50,  4.85it/s]

nan


 33%|████████████████████████▍                                                 | 15302/46290 [52:10<1:34:52,  5.44it/s]

nan


 33%|████████████████████████▌                                                 | 15401/46290 [52:30<1:46:02,  4.86it/s]

nan


 33%|████████████████████████▊                                                 | 15502/46290 [52:50<1:37:36,  5.26it/s]

nan


 34%|████████████████████████▉                                                 | 15601/46290 [53:13<1:52:54,  4.53it/s]

nan


 34%|█████████████████████████                                                 | 15701/46290 [53:33<1:27:04,  5.85it/s]

nan


 34%|█████████████████████████▎                                                | 15802/46290 [53:54<1:48:45,  4.67it/s]

nan


 34%|█████████████████████████▍                                                | 15902/46290 [54:14<1:40:20,  5.05it/s]

nan


 35%|█████████████████████████▌                                                | 16001/46290 [54:34<1:32:19,  5.47it/s]

nan


 35%|█████████████████████████▋                                                | 16102/46290 [54:55<1:47:27,  4.68it/s]

nan


 35%|█████████████████████████▉                                                | 16201/46290 [55:15<1:40:12,  5.00it/s]

nan


 35%|██████████████████████████                                                | 16301/46290 [55:35<1:37:03,  5.15it/s]

nan


 35%|██████████████████████████▏                                               | 16401/46290 [55:54<1:44:08,  4.78it/s]

nan


 36%|██████████████████████████▍                                               | 16502/46290 [56:14<1:14:24,  6.67it/s]

nan


 36%|██████████████████████████▌                                               | 16602/46290 [56:35<1:48:03,  4.58it/s]

nan


 36%|██████████████████████████▋                                               | 16702/46290 [56:55<1:32:45,  5.32it/s]

nan


 36%|██████████████████████████▊                                               | 16801/46290 [57:16<1:43:32,  4.75it/s]

nan


 37%|███████████████████████████                                               | 16902/46290 [57:36<1:29:23,  5.48it/s]

nan


 37%|███████████████████████████▏                                              | 17001/46290 [57:58<1:39:26,  4.91it/s]

nan


 37%|███████████████████████████▎                                              | 17102/46290 [58:18<1:29:15,  5.45it/s]

nan


 37%|███████████████████████████▍                                              | 17202/46290 [58:38<1:21:19,  5.96it/s]

nan


 37%|███████████████████████████▋                                              | 17301/46290 [58:59<1:34:14,  5.13it/s]

nan


 38%|███████████████████████████▊                                              | 17401/46290 [59:20<1:57:26,  4.10it/s]

nan


 38%|███████████████████████████▉                                              | 17502/46290 [59:40<1:30:47,  5.28it/s]

nan


 38%|████████████████████████████▏                                             | 17602/46290 [59:59<1:22:24,  5.80it/s]

nan


 38%|███████████████████████████▌                                            | 17702/46290 [1:00:21<1:36:51,  4.92it/s]

nan


 38%|███████████████████████████▋                                            | 17801/46290 [1:00:42<1:49:39,  4.33it/s]

nan


 39%|███████████████████████████▊                                            | 17901/46290 [1:01:03<1:31:53,  5.15it/s]

nan


 39%|███████████████████████████▉                                            | 18001/46290 [1:01:24<1:39:13,  4.75it/s]

nan


 39%|████████████████████████████▏                                           | 18101/46290 [1:01:44<1:18:07,  6.01it/s]

nan


 39%|████████████████████████████▎                                           | 18201/46290 [1:02:06<1:46:08,  4.41it/s]

nan


 40%|████████████████████████████▍                                           | 18301/46290 [1:02:26<1:40:42,  4.63it/s]

nan


 40%|████████████████████████████▌                                           | 18401/46290 [1:02:48<1:37:43,  4.76it/s]

nan


 40%|████████████████████████████▊                                           | 18502/46290 [1:03:08<1:22:41,  5.60it/s]

nan


 40%|████████████████████████████▉                                           | 18602/46290 [1:03:28<1:26:56,  5.31it/s]

nan


 40%|█████████████████████████████                                           | 18701/46290 [1:03:49<1:23:53,  5.48it/s]

nan


 41%|█████████████████████████████▏                                          | 18802/46290 [1:04:10<1:30:26,  5.07it/s]

nan


 41%|█████████████████████████████▍                                          | 18901/46290 [1:04:32<1:30:22,  5.05it/s]

nan


 41%|█████████████████████████████▌                                          | 19002/46290 [1:04:53<1:31:48,  4.95it/s]

nan


 41%|█████████████████████████████▋                                          | 19102/46290 [1:05:13<1:33:04,  4.87it/s]

nan


 41%|█████████████████████████████▊                                          | 19202/46290 [1:05:34<1:08:39,  6.58it/s]

nan


 42%|██████████████████████████████                                          | 19301/46290 [1:05:54<1:15:32,  5.95it/s]

nan


 42%|██████████████████████████████▏                                         | 19401/46290 [1:06:15<1:20:49,  5.54it/s]

nan


 42%|██████████████████████████████▎                                         | 19501/46290 [1:06:35<1:38:39,  4.53it/s]

nan


 42%|██████████████████████████████▍                                         | 19602/46290 [1:06:55<1:11:48,  6.19it/s]

nan


 43%|██████████████████████████████▋                                         | 19701/46290 [1:07:19<1:53:08,  3.92it/s]

nan


 43%|██████████████████████████████▊                                         | 19802/46290 [1:07:41<1:27:22,  5.05it/s]

nan


 43%|██████████████████████████████▉                                         | 19901/46290 [1:08:01<1:19:54,  5.50it/s]

nan


 43%|███████████████████████████████                                         | 20001/46290 [1:08:21<1:18:46,  5.56it/s]

nan


 43%|███████████████████████████████▎                                        | 20102/46290 [1:08:42<1:26:28,  5.05it/s]

nan


 44%|███████████████████████████████▍                                        | 20202/46290 [1:09:01<1:30:45,  4.79it/s]

nan


 44%|███████████████████████████████▌                                        | 20302/46290 [1:09:21<1:23:43,  5.17it/s]

nan


 44%|███████████████████████████████▋                                        | 20401/46290 [1:09:42<1:33:20,  4.62it/s]

nan


 44%|███████████████████████████████▉                                        | 20501/46290 [1:10:03<1:18:58,  5.44it/s]

nan


 45%|████████████████████████████████                                        | 20602/46290 [1:10:23<1:14:24,  5.75it/s]

nan


 45%|████████████████████████████████▏                                       | 20702/46290 [1:10:42<1:17:41,  5.49it/s]

nan


 45%|████████████████████████████████▎                                       | 20801/46290 [1:11:02<1:18:59,  5.38it/s]

nan


 45%|████████████████████████████████▌                                       | 20902/46290 [1:11:23<1:26:18,  4.90it/s]

nan


 45%|████████████████████████████████▋                                       | 21001/46290 [1:11:44<1:16:05,  5.54it/s]

nan


 46%|████████████████████████████████▊                                       | 21101/46290 [1:12:07<1:34:59,  4.42it/s]

nan


 46%|████████████████████████████████▉                                       | 21201/46290 [1:12:29<1:17:13,  5.41it/s]

nan


 46%|█████████████████████████████████▏                                      | 21301/46290 [1:12:50<1:46:45,  3.90it/s]

nan


 46%|█████████████████████████████████▎                                      | 21401/46290 [1:13:12<1:30:59,  4.56it/s]

nan


 46%|█████████████████████████████████▍                                      | 21501/46290 [1:13:33<1:05:41,  6.29it/s]

nan


 47%|█████████████████████████████████▌                                      | 21601/46290 [1:13:53<1:31:40,  4.49it/s]

nan


 47%|█████████████████████████████████▊                                      | 21701/46290 [1:14:14<1:39:53,  4.10it/s]

nan


 47%|█████████████████████████████████▉                                      | 21801/46290 [1:14:35<1:11:58,  5.67it/s]

nan


 47%|██████████████████████████████████                                      | 21901/46290 [1:14:55<1:10:55,  5.73it/s]

nan


 48%|██████████████████████████████████▏                                     | 22002/46290 [1:15:17<1:14:52,  5.41it/s]

nan


 48%|██████████████████████████████████▍                                     | 22102/46290 [1:15:38<1:27:57,  4.58it/s]

nan


 48%|██████████████████████████████████▌                                     | 22201/46290 [1:15:59<1:25:40,  4.69it/s]

nan


 48%|██████████████████████████████████▋                                     | 22302/46290 [1:16:20<1:07:43,  5.90it/s]

nan


 48%|██████████████████████████████████▊                                     | 22401/46290 [1:16:40<1:16:18,  5.22it/s]

nan


 49%|██████████████████████████████████▉                                     | 22502/46290 [1:17:01<1:06:33,  5.96it/s]

nan


 49%|███████████████████████████████████▏                                    | 22601/46290 [1:17:21<1:07:20,  5.86it/s]

nan


 49%|███████████████████████████████████▎                                    | 22701/46290 [1:17:41<1:22:34,  4.76it/s]

nan


 49%|███████████████████████████████████▍                                    | 22801/46290 [1:18:03<1:16:52,  5.09it/s]

nan


 49%|███████████████████████████████████▌                                    | 22901/46290 [1:18:23<1:29:15,  4.37it/s]

nan


 50%|███████████████████████████████████▊                                    | 23002/46290 [1:18:44<1:23:03,  4.67it/s]

nan


 50%|███████████████████████████████████▉                                    | 23102/46290 [1:19:05<1:15:38,  5.11it/s]

nan


 50%|████████████████████████████████████                                    | 23201/46290 [1:19:25<1:25:28,  4.50it/s]

nan


 50%|█████████████████████████████████████▎                                    | 23302/46290 [1:19:46<57:03,  6.71it/s]

nan


 51%|████████████████████████████████████▍                                   | 23401/46290 [1:20:06<1:18:48,  4.84it/s]

nan


 51%|████████████████████████████████████▌                                   | 23501/46290 [1:20:27<1:37:37,  3.89it/s]

nan


 51%|████████████████████████████████████▋                                   | 23602/46290 [1:20:47<1:11:24,  5.30it/s]

nan


 51%|████████████████████████████████████▊                                   | 23701/46290 [1:21:08<1:41:39,  3.70it/s]

nan


 51%|█████████████████████████████████████                                   | 23802/46290 [1:21:30<1:14:11,  5.05it/s]

nan


 52%|█████████████████████████████████████▏                                  | 23901/46290 [1:21:50<1:11:34,  5.21it/s]

nan


 52%|█████████████████████████████████████▎                                  | 24002/46290 [1:22:10<1:06:19,  5.60it/s]

nan


 52%|█████████████████████████████████████▍                                  | 24102/46290 [1:22:31<1:09:04,  5.35it/s]

nan


 52%|█████████████████████████████████████▋                                  | 24201/46290 [1:22:51<1:32:06,  4.00it/s]

nan


 52%|█████████████████████████████████████▊                                  | 24301/46290 [1:23:12<1:06:28,  5.51it/s]

nan


 53%|█████████████████████████████████████▉                                  | 24402/46290 [1:23:33<1:07:09,  5.43it/s]

nan


 53%|██████████████████████████████████████                                  | 24502/46290 [1:23:54<1:08:53,  5.27it/s]

nan


 53%|██████████████████████████████████████▎                                 | 24601/46290 [1:24:13<1:04:37,  5.59it/s]

nan


 53%|██████████████████████████████████████▍                                 | 24702/46290 [1:24:35<1:03:13,  5.69it/s]

nan


 54%|███████████████████████████████████████▋                                  | 24802/46290 [1:24:55<49:03,  7.30it/s]

nan


 54%|██████████████████████████████████████▋                                 | 24900/46290 [1:25:16<1:14:28,  4.79it/s]

nan


 54%|██████████████████████████████████████▉                                 | 25001/46290 [1:25:38<1:10:01,  5.07it/s]

nan


 54%|███████████████████████████████████████                                 | 25101/46290 [1:25:59<1:11:30,  4.94it/s]

nan


 54%|███████████████████████████████████████▏                                | 25201/46290 [1:26:20<1:21:50,  4.29it/s]

nan


 55%|███████████████████████████████████████▎                                | 25301/46290 [1:26:41<1:10:36,  4.95it/s]

nan


 55%|███████████████████████████████████████▌                                | 25401/46290 [1:27:01<1:04:34,  5.39it/s]

nan


 55%|███████████████████████████████████████▋                                | 25502/46290 [1:27:22<1:03:08,  5.49it/s]

nan


 55%|███████████████████████████████████████▊                                | 25601/46290 [1:27:42<1:10:30,  4.89it/s]

nan


 56%|█████████████████████████████████████████                                 | 25702/46290 [1:28:02<54:06,  6.34it/s]

nan


 56%|████████████████████████████████████████▏                               | 25801/46290 [1:28:24<1:02:55,  5.43it/s]

nan


 56%|████████████████████████████████████████▎                               | 25901/46290 [1:28:45<1:12:56,  4.66it/s]

nan


 56%|████████████████████████████████████████▍                               | 26001/46290 [1:29:06<1:19:00,  4.28it/s]

nan


 56%|█████████████████████████████████████████▋                                | 26102/46290 [1:29:27<55:23,  6.07it/s]

nan


 57%|████████████████████████████████████████▊                               | 26202/46290 [1:29:48<1:14:32,  4.49it/s]

nan


 57%|████████████████████████████████████████▉                               | 26302/46290 [1:30:09<1:16:09,  4.37it/s]

nan


 57%|█████████████████████████████████████████                               | 26401/46290 [1:30:31<1:18:53,  4.20it/s]

nan


 57%|█████████████████████████████████████████▏                              | 26501/46290 [1:30:52<1:08:39,  4.80it/s]

nan


 57%|█████████████████████████████████████████▍                              | 26601/46290 [1:31:13<1:05:42,  4.99it/s]

nan


 58%|██████████████████████████████████████████▋                               | 26702/46290 [1:31:34<57:28,  5.68it/s]

nan


 58%|█████████████████████████████████████████▋                              | 26801/46290 [1:31:55<1:15:47,  4.29it/s]

nan


 58%|█████████████████████████████████████████▊                              | 26901/46290 [1:32:17<1:03:19,  5.10it/s]

nan


 58%|███████████████████████████████████████████▏                              | 27001/46290 [1:32:37<52:09,  6.16it/s]

nan


 59%|██████████████████████████████████████████▏                             | 27102/46290 [1:32:57<1:05:03,  4.92it/s]

nan


 59%|██████████████████████████████████████████▎                             | 27201/46290 [1:33:18<1:09:28,  4.58it/s]

nan


 59%|██████████████████████████████████████████▍                             | 27302/46290 [1:33:39<1:11:47,  4.41it/s]

nan


 59%|███████████████████████████████████████████▊                              | 27401/46290 [1:34:00<50:26,  6.24it/s]

nan


 59%|███████████████████████████████████████████▉                              | 27501/46290 [1:34:21<58:26,  5.36it/s]

nan


 60%|██████████████████████████████████████████▉                             | 27601/46290 [1:34:41<1:00:32,  5.14it/s]

nan


 60%|███████████████████████████████████████████                             | 27701/46290 [1:35:03<1:02:44,  4.94it/s]

nan


 60%|███████████████████████████████████████████▏                            | 27801/46290 [1:35:24<1:01:40,  5.00it/s]

nan


 60%|████████████████████████████████████████████▌                             | 27902/46290 [1:35:45<50:38,  6.05it/s]

nan


 60%|███████████████████████████████████████████▌                            | 28002/46290 [1:36:06<1:05:53,  4.63it/s]

nan


 61%|████████████████████████████████████████████▉                             | 28101/46290 [1:36:26<52:04,  5.82it/s]

nan


 61%|█████████████████████████████████████████████                             | 28201/46290 [1:36:47<52:13,  5.77it/s]

nan


 61%|████████████████████████████████████████████                            | 28302/46290 [1:37:09<1:04:03,  4.68it/s]

nan


 61%|████████████████████████████████████████████▏                           | 28401/46290 [1:37:29<1:01:15,  4.87it/s]

nan


 62%|█████████████████████████████████████████████▌                            | 28502/46290 [1:37:50<59:44,  4.96it/s]

nan


 62%|█████████████████████████████████████████████▋                            | 28602/46290 [1:38:11<52:57,  5.57it/s]

nan


 62%|█████████████████████████████████████████████▉                            | 28701/46290 [1:38:32<52:45,  5.56it/s]

nan


 62%|██████████████████████████████████████████████                            | 28801/46290 [1:38:54<55:16,  5.27it/s]

nan


 62%|████████████████████████████████████████████▉                           | 28901/46290 [1:39:15<1:12:40,  3.99it/s]

nan


 63%|█████████████████████████████████████████████                           | 29001/46290 [1:39:38<1:05:18,  4.41it/s]

nan


 63%|█████████████████████████████████████████████▎                          | 29101/46290 [1:39:59<1:11:43,  3.99it/s]

nan


 63%|██████████████████████████████████████████████▋                           | 29201/46290 [1:40:20<54:38,  5.21it/s]

nan


 63%|█████████████████████████████████████████████▌                          | 29301/46290 [1:40:42<1:02:24,  4.54it/s]

nan


 64%|█████████████████████████████████████████████▋                          | 29402/46290 [1:41:02<1:03:50,  4.41it/s]

nan


 64%|███████████████████████████████████████████████▏                          | 29502/46290 [1:41:22<40:17,  6.94it/s]

nan


 64%|██████████████████████████████████████████████                          | 29601/46290 [1:41:43<1:02:08,  4.48it/s]

nan


 64%|███████████████████████████████████████████████▍                          | 29702/46290 [1:42:03<55:09,  5.01it/s]

nan


 64%|███████████████████████████████████████████████▋                          | 29802/46290 [1:42:23<52:20,  5.25it/s]

nan


 65%|███████████████████████████████████████████████▊                          | 29902/46290 [1:42:44<50:08,  5.45it/s]

nan


 65%|███████████████████████████████████████████████▉                          | 30001/46290 [1:43:05<54:38,  4.97it/s]

nan


 65%|████████████████████████████████████████████████                          | 30102/46290 [1:43:26<45:54,  5.88it/s]

nan


 65%|████████████████████████████████████████████████▎                         | 30202/46290 [1:43:47<54:21,  4.93it/s]

nan


 65%|████████████████████████████████████████████████▍                         | 30302/46290 [1:44:09<48:46,  5.46it/s]

nan


 66%|████████████████████████████████████████████████▌                         | 30401/46290 [1:44:32<57:44,  4.59it/s]

nan


 66%|████████████████████████████████████████████████▊                         | 30501/46290 [1:44:53<49:24,  5.33it/s]

nan


 66%|████████████████████████████████████████████████▉                         | 30601/46290 [1:45:13<51:36,  5.07it/s]

nan


 66%|█████████████████████████████████████████████████                         | 30701/46290 [1:45:34<47:34,  5.46it/s]

nan


 67%|███████████████████████████████████████████████▉                        | 30801/46290 [1:45:56<1:01:26,  4.20it/s]

nan


 67%|█████████████████████████████████████████████████▍                        | 30901/46290 [1:46:18<59:55,  4.28it/s]

nan


 67%|█████████████████████████████████████████████████▌                        | 31001/46290 [1:46:39<56:09,  4.54it/s]

nan


 67%|█████████████████████████████████████████████████▋                        | 31102/46290 [1:46:59<50:31,  5.01it/s]

nan


 67%|█████████████████████████████████████████████████▉                        | 31202/46290 [1:47:20<56:36,  4.44it/s]

nan


 68%|██████████████████████████████████████████████████                        | 31302/46290 [1:47:42<46:10,  5.41it/s]

nan


 68%|██████████████████████████████████████████████████▏                       | 31401/46290 [1:48:02<59:50,  4.15it/s]

nan


 68%|██████████████████████████████████████████████████▎                       | 31501/46290 [1:48:24<52:15,  4.72it/s]

nan


 68%|██████████████████████████████████████████████████▌                       | 31601/46290 [1:48:44<44:37,  5.49it/s]

nan


 68%|██████████████████████████████████████████████████▋                       | 31702/46290 [1:49:07<40:07,  6.06it/s]

nan


 69%|█████████████████████████████████████████████████▍                      | 31801/46290 [1:49:28<1:02:46,  3.85it/s]

nan


 69%|██████████████████████████████████████████████████▉                       | 31902/46290 [1:49:50<48:42,  4.92it/s]

nan


 69%|███████████████████████████████████████████████████▏                      | 32001/46290 [1:50:11<55:51,  4.26it/s]

nan


 69%|███████████████████████████████████████████████████▎                      | 32101/46290 [1:50:33<41:30,  5.70it/s]

nan


 70%|███████████████████████████████████████████████████▍                      | 32201/46290 [1:50:54<36:17,  6.47it/s]

nan


 70%|███████████████████████████████████████████████████▋                      | 32302/46290 [1:51:16<48:50,  4.77it/s]

nan


 70%|███████████████████████████████████████████████████▊                      | 32401/46290 [1:51:36<46:57,  4.93it/s]

nan


 70%|███████████████████████████████████████████████████▉                      | 32502/46290 [1:51:57<45:08,  5.09it/s]

nan


 70%|████████████████████████████████████████████████████                      | 32602/46290 [1:52:17<38:54,  5.86it/s]

nan


 71%|████████████████████████████████████████████████████▎                     | 32701/46290 [1:52:37<48:26,  4.68it/s]

nan


 71%|████████████████████████████████████████████████████▍                     | 32802/46290 [1:52:57<37:21,  6.02it/s]

nan


 71%|████████████████████████████████████████████████████▌                     | 32901/46290 [1:53:19<56:05,  3.98it/s]

nan


 71%|████████████████████████████████████████████████████▊                     | 33002/46290 [1:53:40<40:30,  5.47it/s]

nan


 72%|████████████████████████████████████████████████████▉                     | 33101/46290 [1:54:02<43:51,  5.01it/s]

nan


 72%|█████████████████████████████████████████████████████                     | 33201/46290 [1:54:22<39:50,  5.48it/s]

nan


 72%|█████████████████████████████████████████████████████▏                    | 33301/46290 [1:54:43<38:14,  5.66it/s]

nan


 72%|█████████████████████████████████████████████████████▍                    | 33401/46290 [1:55:05<45:37,  4.71it/s]

nan


 72%|█████████████████████████████████████████████████████▌                    | 33502/46290 [1:55:25<42:21,  5.03it/s]

nan


 73%|█████████████████████████████████████████████████████▋                    | 33601/46290 [1:55:47<44:09,  4.79it/s]

nan


 73%|█████████████████████████████████████████████████████▉                    | 33701/46290 [1:56:07<37:37,  5.58it/s]

nan


 73%|██████████████████████████████████████████████████████                    | 33802/46290 [1:56:29<42:02,  4.95it/s]

nan


 73%|██████████████████████████████████████████████████████▏                   | 33902/46290 [1:56:51<40:03,  5.15it/s]

nan


 73%|██████████████████████████████████████████████████████▎                   | 34001/46290 [1:57:12<49:36,  4.13it/s]

nan


 74%|██████████████████████████████████████████████████████▌                   | 34101/46290 [1:57:32<43:19,  4.69it/s]

nan


 74%|██████████████████████████████████████████████████████▋                   | 34202/46290 [1:57:53<37:54,  5.31it/s]

nan


 74%|██████████████████████████████████████████████████████▊                   | 34302/46290 [1:58:14<41:38,  4.80it/s]

nan


 74%|██████████████████████████████████████████████████████▉                   | 34401/46290 [1:58:35<44:49,  4.42it/s]

nan


 75%|███████████████████████████████████████████████████████▏                  | 34501/46290 [1:58:58<43:06,  4.56it/s]

nan


 75%|███████████████████████████████████████████████████████▎                  | 34602/46290 [1:59:19<39:28,  4.93it/s]

nan


 75%|███████████████████████████████████████████████████████▍                  | 34701/46290 [1:59:40<41:18,  4.68it/s]

nan


 75%|███████████████████████████████████████████████████████▋                  | 34801/46290 [2:00:01<42:37,  4.49it/s]

nan


 75%|███████████████████████████████████████████████████████▊                  | 34901/46290 [2:00:22<33:37,  5.64it/s]

nan


 76%|███████████████████████████████████████████████████████▉                  | 35001/46290 [2:00:43<38:14,  4.92it/s]

nan


 76%|████████████████████████████████████████████████████████                  | 35102/46290 [2:01:04<42:46,  4.36it/s]

nan


 76%|████████████████████████████████████████████████████████▎                 | 35201/46290 [2:01:24<43:05,  4.29it/s]

nan


 76%|████████████████████████████████████████████████████████▍                 | 35301/46290 [2:01:45<33:18,  5.50it/s]

nan


 76%|████████████████████████████████████████████████████████▌                 | 35401/46290 [2:02:06<32:24,  5.60it/s]

nan


 77%|████████████████████████████████████████████████████████▊                 | 35501/46290 [2:02:28<39:19,  4.57it/s]

nan


 77%|████████████████████████████████████████████████████████▉                 | 35602/46290 [2:02:48<33:47,  5.27it/s]

nan


 77%|█████████████████████████████████████████████████████████                 | 35701/46290 [2:03:11<38:26,  4.59it/s]

nan


 77%|█████████████████████████████████████████████████████████▏                | 35802/46290 [2:03:32<30:26,  5.74it/s]

nan


 78%|█████████████████████████████████████████████████████████▍                | 35901/46290 [2:03:55<34:39,  5.00it/s]

nan


 78%|█████████████████████████████████████████████████████████▌                | 36001/46290 [2:04:17<38:22,  4.47it/s]

nan


 78%|█████████████████████████████████████████████████████████▋                | 36101/46290 [2:04:38<35:22,  4.80it/s]

nan


 78%|█████████████████████████████████████████████████████████▊                | 36202/46290 [2:04:59<34:53,  4.82it/s]

nan


 78%|██████████████████████████████████████████████████████████                | 36301/46290 [2:05:21<41:30,  4.01it/s]

nan


 79%|██████████████████████████████████████████████████████████▏               | 36401/46290 [2:05:42<34:18,  4.80it/s]

nan


 79%|██████████████████████████████████████████████████████████▎               | 36502/46290 [2:06:03<31:59,  5.10it/s]

nan


 79%|██████████████████████████████████████████████████████████▌               | 36602/46290 [2:06:25<32:16,  5.00it/s]

nan


 79%|██████████████████████████████████████████████████████████▋               | 36701/46290 [2:06:45<32:32,  4.91it/s]

nan


 80%|██████████████████████████████████████████████████████████▊               | 36802/46290 [2:07:07<30:25,  5.20it/s]

nan


 80%|██████████████████████████████████████████████████████████▉               | 36902/46290 [2:07:28<30:14,  5.17it/s]

nan


 80%|███████████████████████████████████████████████████████████▏              | 37002/46290 [2:07:49<32:23,  4.78it/s]

nan


 80%|███████████████████████████████████████████████████████████▎              | 37101/46290 [2:08:09<27:44,  5.52it/s]

nan


 80%|███████████████████████████████████████████████████████████▍              | 37202/46290 [2:08:32<32:20,  4.68it/s]

nan


 81%|███████████████████████████████████████████████████████████▋              | 37301/46290 [2:08:52<28:35,  5.24it/s]

nan


 81%|███████████████████████████████████████████████████████████▊              | 37402/46290 [2:09:13<30:18,  4.89it/s]

nan


 81%|███████████████████████████████████████████████████████████▉              | 37502/46290 [2:09:34<29:16,  5.00it/s]

nan


 81%|████████████████████████████████████████████████████████████              | 37602/46290 [2:09:55<27:54,  5.19it/s]

nan


 81%|████████████████████████████████████████████████████████████▎             | 37701/46290 [2:10:15<32:50,  4.36it/s]

nan


 82%|████████████████████████████████████████████████████████████▍             | 37802/46290 [2:10:36<26:01,  5.44it/s]

nan


 82%|████████████████████████████████████████████████████████████▌             | 37902/46290 [2:10:58<28:16,  4.94it/s]

nan


 82%|████████████████████████████████████████████████████████████▊             | 38002/46290 [2:11:19<26:40,  5.18it/s]

nan


 82%|████████████████████████████████████████████████████████████▉             | 38101/46290 [2:11:40<31:11,  4.38it/s]

nan


 83%|█████████████████████████████████████████████████████████████             | 38201/46290 [2:12:00<29:07,  4.63it/s]

nan


 83%|█████████████████████████████████████████████████████████████▏            | 38301/46290 [2:12:21<23:08,  5.75it/s]

nan


 83%|█████████████████████████████████████████████████████████████▍            | 38401/46290 [2:12:44<26:43,  4.92it/s]

nan


 83%|█████████████████████████████████████████████████████████████▌            | 38502/46290 [2:13:05<27:09,  4.78it/s]

nan


 83%|█████████████████████████████████████████████████████████████▋            | 38601/46290 [2:13:24<18:31,  6.91it/s]

nan


 84%|█████████████████████████████████████████████████████████████▊            | 38701/46290 [2:13:46<29:17,  4.32it/s]

nan


 84%|██████████████████████████████████████████████████████████████            | 38802/46290 [2:14:07<19:47,  6.31it/s]

nan


 84%|██████████████████████████████████████████████████████████████▏           | 38902/46290 [2:14:28<22:53,  5.38it/s]

nan


 84%|██████████████████████████████████████████████████████████████▎           | 39001/46290 [2:14:48<18:36,  6.53it/s]

nan


 84%|██████████████████████████████████████████████████████████████▌           | 39101/46290 [2:15:09<27:30,  4.36it/s]

nan


 85%|██████████████████████████████████████████████████████████████▋           | 39201/46290 [2:15:29<24:31,  4.82it/s]

nan


 85%|██████████████████████████████████████████████████████████████▊           | 39301/46290 [2:15:50<29:33,  3.94it/s]

nan


 85%|██████████████████████████████████████████████████████████████▉           | 39401/46290 [2:16:12<26:35,  4.32it/s]

nan


 85%|███████████████████████████████████████████████████████████████▏          | 39501/46290 [2:16:33<20:15,  5.59it/s]

nan


 86%|███████████████████████████████████████████████████████████████▎          | 39602/46290 [2:16:54<23:21,  4.77it/s]

nan


 86%|███████████████████████████████████████████████████████████████▍          | 39701/46290 [2:17:15<25:54,  4.24it/s]

nan


 86%|███████████████████████████████████████████████████████████████▋          | 39802/46290 [2:17:37<21:20,  5.07it/s]

nan


 86%|███████████████████████████████████████████████████████████████▊          | 39901/46290 [2:17:59<28:29,  3.74it/s]

nan


 86%|███████████████████████████████████████████████████████████████▉          | 40001/46290 [2:18:22<21:50,  4.80it/s]

nan


 87%|████████████████████████████████████████████████████████████████          | 40101/46290 [2:18:44<21:01,  4.91it/s]

nan


 87%|████████████████████████████████████████████████████████████████▎         | 40201/46290 [2:19:05<18:44,  5.41it/s]

nan


 87%|████████████████████████████████████████████████████████████████▍         | 40301/46290 [2:19:26<20:25,  4.89it/s]

nan


 87%|████████████████████████████████████████████████████████████████▌         | 40401/46290 [2:19:46<21:14,  4.62it/s]

nan


 87%|████████████████████████████████████████████████████████████████▋         | 40502/46290 [2:20:06<17:32,  5.50it/s]

nan


 88%|████████████████████████████████████████████████████████████████▉         | 40601/46290 [2:20:26<18:46,  5.05it/s]

nan


 88%|█████████████████████████████████████████████████████████████████         | 40701/46290 [2:20:46<17:08,  5.43it/s]

nan


 88%|█████████████████████████████████████████████████████████████████▏        | 40802/46290 [2:21:08<17:57,  5.09it/s]

nan


 88%|█████████████████████████████████████████████████████████████████▍        | 40901/46290 [2:21:29<29:49,  3.01it/s]

nan


 89%|█████████████████████████████████████████████████████████████████▌        | 41002/46290 [2:21:51<18:39,  4.72it/s]

nan


 89%|█████████████████████████████████████████████████████████████████▋        | 41101/46290 [2:22:14<14:49,  5.83it/s]

nan


 89%|█████████████████████████████████████████████████████████████████▊        | 41201/46290 [2:22:34<15:18,  5.54it/s]

nan


 89%|██████████████████████████████████████████████████████████████████        | 41302/46290 [2:22:55<17:15,  4.82it/s]

nan


 89%|██████████████████████████████████████████████████████████████████▏       | 41401/46290 [2:23:15<22:34,  3.61it/s]

nan


 90%|██████████████████████████████████████████████████████████████████▎       | 41501/46290 [2:23:36<15:42,  5.08it/s]

nan


 90%|██████████████████████████████████████████████████████████████████▌       | 41601/46290 [2:23:58<19:21,  4.04it/s]

nan


 90%|██████████████████████████████████████████████████████████████████▋       | 41702/46290 [2:24:19<14:47,  5.17it/s]

nan


 90%|██████████████████████████████████████████████████████████████████▊       | 41801/46290 [2:24:39<17:48,  4.20it/s]

nan


 91%|██████████████████████████████████████████████████████████████████▉       | 41902/46290 [2:24:59<13:12,  5.54it/s]

nan


 91%|███████████████████████████████████████████████████████████████████▏      | 42001/46290 [2:25:21<14:24,  4.96it/s]

nan


 91%|███████████████████████████████████████████████████████████████████▎      | 42102/46290 [2:25:42<13:36,  5.13it/s]

nan


 91%|███████████████████████████████████████████████████████████████████▍      | 42202/46290 [2:26:03<14:10,  4.80it/s]

nan


 91%|███████████████████████████████████████████████████████████████████▌      | 42301/46290 [2:26:24<14:07,  4.70it/s]

nan


 92%|███████████████████████████████████████████████████████████████████▊      | 42401/46290 [2:26:48<13:19,  4.86it/s]

nan


 92%|███████████████████████████████████████████████████████████████████▉      | 42501/46290 [2:27:09<15:22,  4.11it/s]

nan


 92%|████████████████████████████████████████████████████████████████████      | 42602/46290 [2:27:31<12:44,  4.83it/s]

nan


 92%|████████████████████████████████████████████████████████████████████▎     | 42701/46290 [2:27:52<12:38,  4.73it/s]

nan


 92%|████████████████████████████████████████████████████████████████████▍     | 42801/46290 [2:28:13<10:40,  5.45it/s]

nan


 93%|████████████████████████████████████████████████████████████████████▌     | 42901/46290 [2:28:34<09:53,  5.71it/s]

nan


 93%|████████████████████████████████████████████████████████████████████▋     | 43001/46290 [2:28:56<10:47,  5.08it/s]

nan


 93%|████████████████████████████████████████████████████████████████████▉     | 43101/46290 [2:29:17<15:49,  3.36it/s]

nan


 93%|█████████████████████████████████████████████████████████████████████     | 43201/46290 [2:29:38<10:07,  5.08it/s]

nan


 94%|█████████████████████████████████████████████████████████████████████▏    | 43301/46290 [2:29:59<08:20,  5.97it/s]

nan


 94%|█████████████████████████████████████████████████████████████████████▍    | 43401/46290 [2:30:20<11:55,  4.04it/s]

nan


 94%|█████████████████████████████████████████████████████████████████████▌    | 43502/46290 [2:30:39<08:50,  5.26it/s]

nan


 94%|█████████████████████████████████████████████████████████████████████▋    | 43601/46290 [2:31:01<08:51,  5.06it/s]

nan


 94%|█████████████████████████████████████████████████████████████████████▊    | 43701/46290 [2:31:22<09:27,  4.56it/s]

nan


 95%|██████████████████████████████████████████████████████████████████████    | 43801/46290 [2:31:47<08:59,  4.61it/s]

nan


 95%|██████████████████████████████████████████████████████████████████████▏   | 43901/46290 [2:32:09<08:58,  4.44it/s]

nan


 95%|██████████████████████████████████████████████████████████████████████▎   | 44002/46290 [2:32:30<07:44,  4.93it/s]

nan


 95%|██████████████████████████████████████████████████████████████████████▌   | 44101/46290 [2:32:50<06:59,  5.22it/s]

nan


 95%|██████████████████████████████████████████████████████████████████████▋   | 44201/46290 [2:33:12<06:57,  5.00it/s]

nan


 96%|██████████████████████████████████████████████████████████████████████▊   | 44301/46290 [2:33:34<06:33,  5.06it/s]

nan


 96%|██████████████████████████████████████████████████████████████████████▉   | 44402/46290 [2:33:55<06:03,  5.19it/s]

nan


 96%|███████████████████████████████████████████████████████████████████████▏  | 44501/46290 [2:34:17<05:34,  5.36it/s]

nan


 96%|███████████████████████████████████████████████████████████████████████▎  | 44601/46290 [2:34:39<06:52,  4.10it/s]

nan


 97%|███████████████████████████████████████████████████████████████████████▍  | 44701/46290 [2:34:59<05:14,  5.06it/s]

nan


 97%|███████████████████████████████████████████████████████████████████████▌  | 44802/46290 [2:35:21<05:13,  4.74it/s]

nan


 97%|███████████████████████████████████████████████████████████████████████▊  | 44902/46290 [2:35:41<04:35,  5.03it/s]

nan


 97%|███████████████████████████████████████████████████████████████████████▉  | 45001/46290 [2:36:02<05:18,  4.05it/s]

nan


 97%|████████████████████████████████████████████████████████████████████████  | 45101/46290 [2:36:24<04:13,  4.70it/s]

nan


 98%|████████████████████████████████████████████████████████████████████████▎ | 45201/46290 [2:36:45<03:22,  5.39it/s]

nan


 98%|████████████████████████████████████████████████████████████████████████▍ | 45302/46290 [2:37:07<03:21,  4.90it/s]

nan


 98%|████████████████████████████████████████████████████████████████████████▌ | 45401/46290 [2:37:30<03:58,  3.73it/s]

nan


 98%|████████████████████████████████████████████████████████████████████████▋ | 45500/46290 [2:37:51<02:45,  4.78it/s]

nan


 99%|████████████████████████████████████████████████████████████████████████▉ | 45602/46290 [2:38:12<02:02,  5.61it/s]

nan


 99%|█████████████████████████████████████████████████████████████████████████ | 45702/46290 [2:38:34<01:45,  5.58it/s]

nan


 99%|█████████████████████████████████████████████████████████████████████████▏| 45801/46290 [2:38:54<01:32,  5.29it/s]

nan


 99%|█████████████████████████████████████████████████████████████████████████▍| 45902/46290 [2:39:16<01:05,  5.95it/s]

nan


 99%|█████████████████████████████████████████████████████████████████████████▌| 46001/46290 [2:39:38<00:56,  5.11it/s]

nan


100%|█████████████████████████████████████████████████████████████████████████▋| 46102/46290 [2:39:58<00:36,  5.08it/s]

nan


100%|█████████████████████████████████████████████████████████████████████████▊| 46201/46290 [2:40:19<00:19,  4.60it/s]

nan


100%|██████████████████████████████████████████████████████████████████████████| 46290/46290 [2:40:40<00:00,  4.80it/s]


RuntimeError: received an empty list of sequences

In [None]:
predict(net,testdataset)

Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.