In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

my_model = Model()
my_scripted_model = torch.jit.script(my_model)

In [3]:
my_scripted_model(torch.rand(1, 1, 9, 9)).shape

torch.Size([1, 20, 1, 1])

---

In [1]:
from CookieTTS._5_infer.t2s_server.text2speech import T2S
import json

# load T2S config
with open(r"G:\TwiBot\CookiePPPTTS\CookieTTS\_5_infer\t2s_server\t2s_config.json", 'r') as f:
    conf = json.load(f)

t2s = T2S(conf['workers'])

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit


Loading Tacotron... Done
Compiling Tacotron Decoder... Done
This Tacotron model has been trained for 18000 Iterations.
{'yoyo': True, 'yoyo_WN': False, 'n_mel_channels': 160, 'channel_mixing': 'permute', 'mix_first': False, 'n_flows': 6, 'n_group': 20, 'n_early_every': 16, 'n_early_size': 2, 'memory_efficient': 0.0, 'spect_scaling': False, 'upsample_mode': 'normal', 'WN_config': {'n_layers': 8, 'n_channels': 128, 'kernel_size_w': 7, 'kernel_size_h': 7, 'n_layers_dilations_w': None, 'n_layers_dilations_h': 1, 'speaker_embed_dim': 96, 'rezero': False, 'cond_layers': 3, 'cond_activation_func': 'lrelu', 'negative_slope': 0.5, 'cond_hidden_channels': 256, 'cond_padding_mode': 'replicate', 'seperable_conv': True, 'merge_res_skip': False, 'upsample_mode': 'linear', 'cond_kernel_size': 1}, 'speaker_embed': 96, 'cond_layers': 3, 'cond_activation_func': 'lrelu', 'negative_slope': 0.5, 'cond_hidden_channels': 256, 'cond_output_channels': 256, 'cond_residual': True, 'cond_res_rezero': True, 'cond_

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
import torch
#t2s.tacotron.decoder.attention_layer = torch.jit.script(t2s.tacotron.decoder.attention_layer)
t2s.tacotron.decoder = torch.jit.script(t2s.tacotron.decoder)

In [None]:
class Postnet(nn.Module):
    """Postnet
        - Five 1-d convolution with 512 channels and kernel size 5
    """
    def __init__(self, hparams):
        super(Postnet, self).__init__()
        self.b_res = hparams.postnet_residual_connections if hasattr(hparams, 'postnet_residual_connections') else False
        self.convolutions = nn.ModuleList()
        
        for i in range(hparams.postnet_n_convolutions):
            is_output_layer = (bool(self.b_res) and bool( i % self.b_res == 0 )) or (i+1 == hparams.postnet_n_convolutions)
            layers = [ ConvNorm(hparams.n_mel_channels*(hparams.LL_SpectLoss+1) if i == 0 else hparams.postnet_embedding_dim,
                             hparams.n_mel_channels*(hparams.LL_SpectLoss+1) if is_output_layer else hparams.postnet_embedding_dim,
                             kernel_size=hparams.postnet_kernel_size, stride=1,
                             padding=int((hparams.postnet_kernel_size - 1) / 2),
                             dilation=1, w_init_gain='tanh'), ]
            if not is_output_layer:
                layers.append(nn.BatchNorm1d(hparams.postnet_embedding_dim))
            self.convolutions.append(nn.Sequential(*layers))
    
    def forward(self, x):
        x_orig = x.clone()
        len_convs = len(self.convolutions)
        for i, conv in enumerate(self.convolutions):
            if (self.b_res and (i % self.b_res == 0)) or (i+1 == len_convs):
                x_orig = x_orig + conv(x)
                x = x_orig
            else:
                x = F.dropout(torch.tanh(conv(x)), drop_rate, self.training)
        
        return x_orig

In [None]:

hparams.use_postnet_generator_and_discriminator=False,
hparams.adv_postnet_noise_dim=64,
hparams.adv_postnet_embedding_dim=512,
hparams.adv_postnet_kernel_size=5,
hparams.adv_postnet_n_convolutions=6,
hparams.adv_postnet_residual_connections=2,

In [None]:
class GANPostnet(nn.Module):
    """GANPostnet
        - Five 1-d convolution with 512 channels and kernel size 5
    """
    def __init__(self, hparams):
        super(Postnet, self).__init__()
        self.b_res = hparams.adv_postnet_residual_connections if hasattr(hparams, 'adv_postnet_residual_connections') else False
        self.convolutions = nn.ModuleList()
        self.noise_dim = hparams.adv_postnet_noise_dim
        self.n_mel_channels = hparams.n_mel_channels
        
        for i in range(hparams.adv_postnet_n_convolutions):
            is_output_layer = (self.b_res and bool( i % self.b_res == 0 )) or (i+1 == hparams.adv_postnet_n_convolutions)
            layers = [ ConvNorm(
                             (hparams.n_mel_channels*(hparams.LL_SpectLoss+1))+self.noise_dim if i==0 else hparams.adv_postnet_embedding_dim,
                             hparams.n_mel_channels if is_output_layer else hparams.adv_postnet_embedding_dim,
                             kernel_size=hparams.adv_postnet_kernel_size, stride=1,
                             padding=int((hparams.adv_postnet_kernel_size - 1) / 2),
                             dilation=1, w_init_gain='tanh'), ]
            if not is_output_layer:
                layers.append(nn.BatchNorm1d(hparams.adv_postnet_embedding_dim))
            self.convolutions.append(nn.Sequential(*layers))
    
    def forward(self, x):
        B, C, dec_T = x.shape# [B, n_mel+logvar, dec_T]
        rand_noise = torch.randn(B, self.noise_dim, dec_T, device=x.device, dtype=x.dtype)# -> [B, noise, dec_T]
        x_orig = torch.cat((x, rand_noise), dim=1)# [B, n_mel+logvar, dec_T] -> [B, n_mel+logvar+noise, dec_T]
        len_convs = len(self.convolutions)
        for i, conv in enumerate(self.convolutions):
            if (self.b_res and (i % self.b_res == 0)) or (i+1 == len_convs):
                x = conv(x)# [B, conv_dim, dec_T] -> [B, n_mel+logvar+noise, dec_T]
                x = x_orig = x_orig + x# [B, n_mel+logvar+noise, dec_T] + [B, n_mel+logvar+noise, dec_T] -> [B, n_mel+logvar+noise, dec_T]
            else:
                x = F.dropout(torch.tanh(conv(x)), drop_rate, self.training)# [B, n_mel+logvar+noise, dec_T] -> [B, conv_dim, dec_T]
        
        return x_orig[:, :self.n_mel_channels]# [B, conv_dim, dec_T] -> [B, n_mel, dec_T]

In [None]:
# (Adversarial Postnet Discriminator) - Learns the difference between real and fake spectrograms, teaches the postnet generator how to make convincing looking outputs.
dis_postnet_embedding_dim=128,
dis_postnet_kernel_size=5,
dis_postnet_n_convolutions=6,
dis_postnet_residual_connections=2,

In [3]:
list( range(10)[1:-1] )

[1, 2, 3, 4, 5, 6, 7, 8]

In [54]:

#@torch.jit.script
def scale_grads(input, scale: float):
    """
    Change gradient magnitudes
    Note: Do not use @torch.jit.script on pytorch <= 1.6 with this function!
          no_grad() and detach() do not work correctly till version 1.7 with JIT script.
    """
    out = input.clone()
    out *= scale               # multiply tensor
    out.detach().mul_(1/scale) # reverse multiply without telling autograd
    return out

x = torch.ones(5, requires_grad=True)
scale = 0.1

output = scale_grads(x, scale)
output.sum().backward()

print("Correct Output Value?")
print(output == x)
print("\nCorrect Output Gradients?")
print(x.grad == scale)

Correct Output Value?
tensor([True, True, True, True, True])

Correct Output Gradients?
tensor([True, True, True, True, True])


In [None]:
class GANDiscriminator(nn.Module):
    """GANDiscriminator
        - Five 1-d convolution with 512 channels and kernel size 5
    """
    def __init__(self, hparams):
        super(Postnet, self).__init__()
        self.b_res = hparams.dis_postnet_residual_connections if hasattr(hparams, 'dis_postnet_residual_connections') else False
        self.convolutions = nn.ModuleList()
        self.n_mel_channels = hparams.n_mel_channels
        
        for i in range(hparams.dis_postnet_n_convolutions):
            is_output_layer = (self.b_res and bool( i % self.b_res == 0 )) or (i+1 == hparams.dis_postnet_n_convolutions)
            layers = [ ConvNorm(
                             hparams.n_mel_channels if i == 0 else hparams.dis_postnet_embedding_dim,
                             hparams.dis_postnet_embedding_dim,
                             kernel_size=hparams.dis_postnet_kernel_size, stride=1,
                             padding=int((hparams.dis_postnet_kernel_size - 1) / 2),
                             dilation=1, w_init_gain='tanh'), ]
            if not is_output_layer:
                layers.append(nn.BatchNorm1d(hparams.dis_postnet_embedding_dim))
            self.convolutions.append(nn.Sequential(*layers))
        
        self.end_conv = ConvNorm( hparams.dis_postnet_embedding_dim, 1, kernel_size=3, padding=0, w_init_gain='tanh')
    
    def forward(self, x):
        len_convs = len(self.convolutions)
        for i, conv in enumerate(self.convolutions):
            if i==0:
                x = x_res = conv(x)
            else:
                if (self.b_res and (i % self.b_res == 0)) or (i+1 == len_convs):
                    x = x_res = x_res + conv(x)# [B, conv_dim, dec_T] + [B, conv_dim, dec_T] -> [B, conv_dim, dec_T]
                else:
                    x = F.dropout(torch.tanh(conv(x)), drop_rate, self.training)# [B, conv_dim, dec_T] -> [B, conv_dim, dec_T]
        
        pred_fakeness = self.end_conv(x_res)# [B, conv_dim, dec_T] -> [B, 1, dec_T-2]
        pred_fakeness = pred_fakeness.mean(dim=2).squeeze(1).squeeze(1)# [B, 1, dec_T-2] -> [B, 1, 1] -> [B, 1] -> [B]
        
        return pred_fakeness.sigmoid()# [B]

In [1]:
import torch
torch.rand(5, 1, 2).squeeze(1).shape

torch.Size([5, 2])