In [2]:
batch_size = 16
num_epochs = 10
learning_rate = 2e-4
S = 100

img_size = 64
img_channels = 3

num_groups = 8
first_conv_channels = 64
channel_multiplier = [1, 2, 4, 8]
has_attention = [first_conv_channels * 4, first_conv_channels * 8]
num_res_blocks = 2  # Number of residual blocks

total_timesteps = 1_000

diffusion_config = {
    "beta_0": 0.0001,
    "beta_T": 0.02,
    "T": total_timesteps,
}

In [3]:
from torchvision.datasets import LFWPeople
import torchvision.transforms as transforms
import torch

pil_transformer = transforms.Compose([
    transforms.PILToTensor()
])

train_ds = LFWPeople('/content/drive/MyDrive', split='train', download=True,
                        image_set='original', transform=lambda pil: pil_transformer(pil.resize(
                            (img_size, img_size))) / 255)

test_ds = LFWPeople('/content/drive/MyDrive', split='test', download=True,
                       image_set='original', transform=lambda pil: pil_transformer(pil.resize(
                            (img_size, img_size))) / 255)

ds = torch.utils.data.ConcatDataset([train_ds, test_ds])
# ds = torch.utils.data.Subset(train_ds, list(range(30)))

print(len(ds))

Files already downloaded and verified
Files already downloaded and verified
13233


In [4]:
import math
import torch
import torch.nn as nn
from torchvision.datasets import LFWPeople
import torchvision.transforms as transforms
import torch

def get_timestep_embedding(timesteps, embedding_dim):
    """
    This matches the implementation in Denoising Diffusion Probabilistic Models:
    From Fairseq.
    Build sinusoidal embeddings.
    This matches the implementation in tensor2tensor, but differs slightly
    from the description in Section 3.5 of "Attention Is All You Need".
    """
    assert len(timesteps.shape) == 1

    half_dim = embedding_dim // 2
    emb = math.log(10000) / (half_dim - 1)
    emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
    emb = emb.to(device=timesteps.device)
    emb = timesteps.float()[:, None] * emb[None, :]
    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)

    if embedding_dim % 2 == 1:  # zero pad
        emb = torch.nn.functional.pad(emb, (0,1,0,0))
    return emb

def nonlinearity(x):
    # swish
    return x*torch.sigmoid(x)


def Normalize(in_channels):
    return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)


class Upsample(nn.Module):
    def __init__(self, in_channels, with_conv):
        super().__init__()
        self.with_conv = with_conv
        if self.with_conv:
            self.conv = torch.nn.Conv2d(in_channels,
                                        in_channels,
                                        kernel_size=3,
                                        stride=1,
                                        padding=1)

    def forward(self, x):
        x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
        if self.with_conv:
            x = self.conv(x)
        return x


class Downsample(nn.Module):
    def __init__(self, in_channels, with_conv):
        super().__init__()
        self.with_conv = with_conv
        if self.with_conv:
            # no asymmetric padding in torch conv, must do it ourselves
            self.conv = torch.nn.Conv2d(in_channels,
                                        in_channels,
                                        kernel_size=3,
                                        stride=2,
                                        padding=0)

    def forward(self, x):
        if self.with_conv:
            pad = (0,1,0,1)
            x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
            x = self.conv(x)
        else:
            x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
        return x


class ResnetBlock(nn.Module):
    def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False,
                 dropout, temb_channels=512):
        super().__init__()
        self.in_channels = in_channels
        out_channels = in_channels if out_channels is None else out_channels
        self.out_channels = out_channels
        self.use_conv_shortcut = conv_shortcut

        self.norm1 = Normalize(in_channels)
        self.conv1 = torch.nn.Conv2d(in_channels,
                                     out_channels,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1)
        self.temb_proj = torch.nn.Linear(temb_channels,
                                         out_channels)
        self.norm2 = Normalize(out_channels)
        self.dropout = torch.nn.Dropout(dropout)
        self.conv2 = torch.nn.Conv2d(out_channels,
                                     out_channels,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1)
        if self.in_channels != self.out_channels:
            if self.use_conv_shortcut:
                self.conv_shortcut = torch.nn.Conv2d(in_channels,
                                                     out_channels,
                                                     kernel_size=3,
                                                     stride=1,
                                                     padding=1)
            else:
                self.nin_shortcut = torch.nn.Conv2d(in_channels,
                                                    out_channels,
                                                    kernel_size=1,
                                                    stride=1,
                                                    padding=0)

    def forward(self, x, temb):
        h = x
        h = self.norm1(h)
        h = nonlinearity(h)
        h = self.conv1(h)

        h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None]

        h = self.norm2(h)
        h = nonlinearity(h)
        h = self.dropout(h)
        h = self.conv2(h)

        if self.in_channels != self.out_channels:
            if self.use_conv_shortcut:
                x = self.conv_shortcut(x)
            else:
                x = self.nin_shortcut(x)

        return x+h


class AttnBlock(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.in_channels = in_channels

        self.norm = Normalize(in_channels)
        self.q = torch.nn.Conv2d(in_channels,
                                 in_channels,
                                 kernel_size=1,
                                 stride=1,
                                 padding=0)
        self.k = torch.nn.Conv2d(in_channels,
                                 in_channels,
                                 kernel_size=1,
                                 stride=1,
                                 padding=0)
        self.v = torch.nn.Conv2d(in_channels,
                                 in_channels,
                                 kernel_size=1,
                                 stride=1,
                                 padding=0)
        self.proj_out = torch.nn.Conv2d(in_channels,
                                        in_channels,
                                        kernel_size=1,
                                        stride=1,
                                        padding=0)


    def forward(self, x):
        h_ = x
        h_ = self.norm(h_)
        q = self.q(h_)
        k = self.k(h_)
        v = self.v(h_)

        # compute attention
        b,c,h,w = q.shape
        q = q.reshape(b,c,h*w)
        q = q.permute(0,2,1)   # b,hw,c
        k = k.reshape(b,c,h*w) # b,c,hw
        w_ = torch.bmm(q,k)     # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
        w_ = w_ * (int(c)**(-0.5))
        w_ = torch.nn.functional.softmax(w_, dim=2)

        # attend to values
        v = v.reshape(b,c,h*w)
        w_ = w_.permute(0,2,1)   # b,hw,hw (first hw of k, second of q)
        h_ = torch.bmm(v,w_)     # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
        h_ = h_.reshape(b,c,h,w)

        h_ = self.proj_out(h_)

        return x+h_

class Model(nn.Module):
    def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
                 attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
                 resolution):
        """
        ch: Starting curve-down filters of first unit
        out_ch: Output channels
        num_res_blocks: number of residuals in each unit(width of each encode-decode unit)
        attn_resolutions: specify which resolution to apply attention
        in_channels: images input channels
        resolution: images resolution
        """

        super().__init__()
        self.ch = ch
        self.temb_ch = self.ch*4
        self.num_resolutions = len(ch_mult)
        self.num_res_blocks = num_res_blocks
        self.resolution = resolution
        self.in_channels = in_channels

        # timestep embedding
        self.temb = nn.Module()
        self.temb.dense = nn.ModuleList([
            torch.nn.Linear(self.ch,
                            self.temb_ch),
            torch.nn.Linear(self.temb_ch,
                            self.temb_ch),
        ])

        # downsampling
        self.conv_in = torch.nn.Conv2d(in_channels,
                                       self.ch,
                                       kernel_size=3,
                                       stride=1,
                                       padding=1)

        curr_res = resolution
        in_ch_mult = (1,)+ch_mult

        self.down = nn.ModuleList()
        for i_level in range(self.num_resolutions):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_in = ch*in_ch_mult[i_level]
            block_out = ch*ch_mult[i_level]
            for i_block in range(self.num_res_blocks):
                block.append(ResnetBlock(in_channels=block_in,
                                         out_channels=block_out,
                                         temb_channels=self.temb_ch,
                                         dropout=dropout))
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(AttnBlock(block_in))
            down = nn.Module()
            down.block = block
            down.attn = attn
            if i_level != self.num_resolutions-1:
                down.downsample = Downsample(block_in, resamp_with_conv)
                curr_res = curr_res // 2
            self.down.append(down)

        # middle
        self.mid = nn.Module()
        self.mid.block_1 = ResnetBlock(in_channels=block_in,
                                       out_channels=block_in,
                                       temb_channels=self.temb_ch,
                                       dropout=dropout)
        self.mid.attn_1 = AttnBlock(block_in)
        self.mid.block_2 = ResnetBlock(in_channels=block_in,
                                       out_channels=block_in,
                                       temb_channels=self.temb_ch,
                                       dropout=dropout)

        # upsampling
        self.up = nn.ModuleList()

        for i_level in reversed(range(self.num_resolutions)):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_out = ch*ch_mult[i_level]
            skip_in = ch*ch_mult[i_level]
            for i_block in range(self.num_res_blocks+1):
                if i_block == self.num_res_blocks:
                    skip_in = ch*in_ch_mult[i_level]
                block.append(ResnetBlock(in_channels=block_in+skip_in,
                                         out_channels=block_out,
                                         temb_channels=self.temb_ch,
                                         dropout=dropout))
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(AttnBlock(block_in))

            up = nn.Module()
            up.block = block
            up.attn = attn
            if i_level != 0:
                up.upsample = Upsample(block_in, resamp_with_conv)
                curr_res = curr_res * 2
            self.up.insert(0, up) # prepend to get consistent order

        # end
        self.norm_out = Normalize(block_in)
        self.conv_out = torch.nn.Conv2d(block_in,
                                        out_ch,
                                        kernel_size=3,
                                        stride=1,
                                        padding=1)


    def forward(self, x, t):
        assert x.shape[2] == x.shape[3] == self.resolution

        # timestep embedding
        temb = get_timestep_embedding(t, self.ch)
        temb = self.temb.dense[0](temb)
        temb = nonlinearity(temb)
        temb = self.temb.dense[1](temb)

        # downsampling
        hs = [self.conv_in(x)]
        for i_level in range(self.num_resolutions):
            for i_block in range(self.num_res_blocks):
                h = self.down[i_level].block[i_block](hs[-1], temb)
                if len(self.down[i_level].attn) > 0:
                    h = self.down[i_level].attn[i_block](h)
                hs.append(h)
            if i_level != self.num_resolutions-1:
                hs.append(self.down[i_level].downsample(hs[-1]))

        # middle
        h = hs[-1]
        h = self.mid.block_1(h, temb)
        h = self.mid.attn_1(h)
        h = self.mid.block_2(h, temb)

        # upsampling
        for i_level in reversed(range(self.num_resolutions)):
            for i_block in range(self.num_res_blocks+1):
                h = self.up[i_level].block[i_block](
                    torch.cat([h, hs.pop()], dim=1), temb)
                if len(self.up[i_level].attn) > 0:
                    h = self.up[i_level].attn[i_block](h)
            if i_level != 0:
                h = self.up[i_level].upsample(h)

        # end
        h = self.norm_out(h)
        h = nonlinearity(h)
        h = self.conv_out(h)
        return h

In [5]:
import os
from tqdm import tqdm
import numpy as np
import torch
from torchvision.utils import save_image, make_grid

torch.manual_seed(0)
np.random.seed(0)

def _map_gpu(gpu):
    if gpu == 'cuda':
        return lambda x: x.cuda()
    else:
        return lambda x: x.to(torch.device('cuda:'+gpu))

map_gpu = _map_gpu('cuda')

def rescale(X, batch=True):
    if not batch:
        return (X - X.min()) / (X.max() - X.min())
    else:
        for i in range(X.shape[0]):
            X[i] = rescale(X[i], batch=False)
        return X


def std_normal(size):
    return map_gpu(torch.normal(0, 1, size=size))

def print_size(net):
    """
    Print the number of parameters of a network
    """
    if net is not None and isinstance(net, torch.nn.Module):
        module_parameters = filter(lambda p: p.requires_grad, net.parameters())
        params = sum([np.prod(p.size()) for p in module_parameters])
        print("{} Parameters: {:.6f}M".format(
            net.__class__.__name__, params / 1e6), flush=True)


def calc_diffusion_hyperparams(T, beta_0, beta_T):
    """
    Compute diffusion process hyperparameters

    Parameters:
    T (int):                    number of diffusion steps
    beta_0 and beta_T (float):  beta schedule start/end value,
                                where any beta_t in the middle is linearly interpolated

    Returns:
    a dictionary of diffusion hyperparameters including:
        T (int), Beta/Alpha/Alpha_bar/Sigma (torch.tensor on cpu, shape=(T, ))
    """

    Beta = torch.linspace(beta_0, beta_T, T)
    Alpha = 1 - Beta
    Alpha_bar = Alpha + 0
    Beta_tilde = Beta + 0
    for t in range(1, T):
        Alpha_bar[t] *= Alpha_bar[t-1]
        Beta_tilde[t] *= (1-Alpha_bar[t-1]) / (1-Alpha_bar[t])
    Sigma = torch.sqrt(Beta_tilde)

    _dh = {}
    _dh["T"], _dh["Beta"], _dh["Alpha"], _dh["Alpha_bar"], _dh["Sigma"] = T, Beta, Alpha, Alpha_bar, Sigma
    diffusion_hyperparams = _dh

    # print(Alpha_bar)
    return diffusion_hyperparams


def bisearch(f, domain, target, eps=1e-8):
    """
    find smallest x such that f(x) > target

    Parameters:
    f (function):               function
    domain (tuple):             x in (left, right)
    target (float):             target value

    Returns:
    x (float)
    """
    #
    sign = -1 if target < 0 else 1
    left, right = domain
    for _ in range(1000):
        x = (left + right) / 2
        if f(x) < target:
            right = x
        elif f(x) > (1 + sign * eps) * target:
            left = x
        else:
            break
    return x


def get_VAR_noise(S, schedule='linear'):
    """
    Compute VAR noise levels

    Parameters:
    S (int):            approximante diffusion process length
    schedule (str):     linear or quadratic

    Returns:
    np array of noise levels, size = (S, )
    """
    target = np.prod(1 - np.linspace(diffusion_config["beta_0"], diffusion_config["beta_T"], diffusion_config["T"]))

    if schedule == 'linear':
        g = lambda x: np.linspace(diffusion_config["beta_0"], x, S)
        domain = (diffusion_config["beta_0"], 0.99)
    elif schedule == 'quadratic':
        g = lambda x: np.array([diffusion_config["beta_0"] * (1+i*x) ** 2 for i in range(S)])
        domain = (0.0, 0.95 / np.sqrt(diffusion_config["beta_0"]) / S)
    else:
        raise NotImplementedError

    f = lambda x: np.prod(1 - g(x))
    largest_var = bisearch(f, domain, target, eps=1e-4)
    return g(largest_var)


def _log_gamma(x):
    # Gamma(x+1) ~= sqrt(2\pi x) * (x/e)^x  (1 + 1 / 12x)
    y = x - 1
    return np.log(2 * np.pi * y) / 2 + y * (np.log(y) - 1) + np.log(1 + 1 / (12 * y))


def _log_cont_noise(t, beta_0, beta_T, T):
    # We want log_cont_noise(t, beta_0, beta_T, T) ~= np.log(Alpha_bar[-1].numpy())
    delta_beta = (beta_T - beta_0) / (T - 1)
    _c = (1.0 - beta_0) / delta_beta
    t_1 = t + 1
    return t_1 * np.log(delta_beta) + _log_gamma(_c + 1) - _log_gamma(_c - t_1 + 1)

# VAR
def _precompute_VAR_steps(diffusion_hyperparams, user_defined_eta):
    _dh = diffusion_hyperparams
    T, Alpha, Alpha_bar, Beta = _dh["T"], _dh["Alpha"], _dh["Alpha_bar"], _dh["Beta"]
    assert len(Alpha_bar) == T

    # compute diffusion hyperparameters for user defined noise
    T_user = len(user_defined_eta)
    Beta_tilde = map_gpu(torch.from_numpy(user_defined_eta)).to(torch.float32)
    Gamma_bar = 1 - Beta_tilde
    for t in range(1, T_user):
        Gamma_bar[t] *= Gamma_bar[t-1]

    assert Gamma_bar[0] <= Alpha_bar[0] and Gamma_bar[-1] >= Alpha_bar[-1]

    continuous_steps = []
    with torch.no_grad():
        for t in range(T_user-1, -1, -1):
            t_adapted = None
            for i in range(T - 1):
                if Alpha_bar[i] >= Gamma_bar[t] > Alpha_bar[i+1]:
                    t_adapted = bisearch(f=lambda _t: _log_cont_noise(_t, Beta[0].cpu().numpy(), Beta[-1].cpu().numpy(), T),
                                            domain=(i-0.01, i+1.01),
                                            target=np.log(Gamma_bar[t].cpu().numpy()))
                    break
            if t_adapted is None:
                t_adapted = T - 1
            continuous_steps.append(t_adapted)  # must be decreasing
    return continuous_steps


def VAR_sampling(net, size, diffusion_hyperparams, user_defined_eta, kappa, continuous_steps):
    """
    Perform the complete sampling step according to user defined variances

    Parameters:
    net (torch network):            the model
    size (tuple):                   size of tensor to be generated,
                                    usually is (number of audios to generate, channels=1, length of audio)
    diffusion_hyperparams (dict):   dictionary of diffusion hyperparameters returned by calc_diffusion_hyperparams
                                    note, the tensors need to be cuda tensors
    user_defined_eta (np.array):    User defined noise
    kappa (float):                  factor multipled over sigma, between 0 and 1
    continuous_steps (list):        continuous steps computed from user_defined_eta

    Returns:
    the generated images in torch.tensor, shape=size
    """

    _dh = diffusion_hyperparams
    T, Alpha, Alpha_bar, Beta = _dh["T"], _dh["Alpha"], _dh["Alpha_bar"], _dh["Beta"]
    assert len(Alpha_bar) == T
    assert len(size) == 4
    assert 0.0 <= kappa <= 1.0

    # compute diffusion hyperparameters for user defined noise
    T_user = len(user_defined_eta)
    Beta_tilde = map_gpu(torch.from_numpy(user_defined_eta)).to(torch.float32)
    Gamma_bar = 1 - Beta_tilde
    for t in range(1, T_user):
        Gamma_bar[t] *= Gamma_bar[t-1]

    assert Gamma_bar[0] <= Alpha_bar[0] and Gamma_bar[-1] >= Alpha_bar[-1]

    # print('begin sampling, total number of reverse steps = %s' % T_user)

    x = std_normal(size)
    with torch.no_grad():
        for i, tau in enumerate(continuous_steps):
            diffusion_steps = tau * map_gpu(torch.ones(size[0]))
            epsilon_theta = net(x, diffusion_steps)
            if i == T_user - 1:  # the next step is to generate x_0
                assert abs(tau) < 0.1
                alpha_next = torch.tensor(1.0)
                sigma = torch.tensor(0.0)
            else:
                alpha_next = Gamma_bar[T_user-1-i - 1]
                sigma = kappa * torch.sqrt((1-alpha_next) / (1-Gamma_bar[T_user-1-i]) * (1 - Gamma_bar[T_user-1-i] / alpha_next))
            x *= torch.sqrt(alpha_next / Gamma_bar[T_user-1-i])
            c = torch.sqrt(1 - alpha_next - sigma ** 2) - torch.sqrt(1 - Gamma_bar[T_user-1-i]) * torch.sqrt(alpha_next / Gamma_bar[T_user-1-i])
            x += c * epsilon_theta + sigma * std_normal(size)

    return x


def generate(net, model_config,
             n_generate, batchsize,
             kappa=1.0,
             S=100,
             output_name=None):
    """
    Parameters:
    output_name (str):              save generated images to this folder
    net(object):                    torch model
    model_config (dic):             dic of model config
    n_generate (int):               number of generated samples
    batchsize (int):                batch size of training

    Returns:
    Generated images (tensor):      (B, C, H, W) where C = 3
    """
    if batchsize > n_generate:
        batchsize = n_generate
    assert n_generate % batchsize == 0

    if output_name is not None:
      if 'generated' not in os.listdir():
          os.mkdir('generated')
      if output_name not in os.listdir('generated'):
          os.mkdir(os.path.join('generated', output_name))

    # map diffusion hyperparameters to gpu
    diffusion_hyperparams = calc_diffusion_hyperparams(**diffusion_config)
    for key in diffusion_hyperparams:
        if key != "T":
            diffusion_hyperparams[key] = map_gpu(diffusion_hyperparams[key])

    print_size(net)

    # sampling
    C, H, W = model_config["in_channels"], model_config["resolution"], model_config["resolution"]

    user_defined_eta = get_VAR_noise(S)
    continuous_steps = _precompute_VAR_steps(diffusion_hyperparams, user_defined_eta)
    Xi = VAR_sampling(net, (batchsize, C, H, W),
                      diffusion_hyperparams,
                      user_defined_eta,
                      kappa=kappa,
                      continuous_steps=continuous_steps)

    if output_name is not None:
      # save image
      for j, x in enumerate(rescale(Xi)):
          index = j
          save_image(x, fp=os.path.join('generated', output_name, '{}.jpg'.format(index)))
      save_image(make_grid(rescale(Xi)[:64]), fp=os.path.join('generated', '{}.jpg'.format(output_name)))

    return Xi

In [6]:
import tensorflow as tf

    # "beta_0": 0.0001,
    # "beta_T": 0.02,
    # "T": total_timesteps,

class GaussianDiffusion:
    """Gaussian diffusion utility.

    Args:
        beta_start: Start value of the scheduled variance
        beta_end: End value of the scheduled variance
        timesteps: Number of time steps in the forward process
    """

    def __init__(
        self,
        beta_start=diffusion_config['beta_0'],
        beta_end=diffusion_config['beta_T'],
        timesteps=diffusion_config['T'],
        clip_min=-1.0,
        clip_max=1.0,
    ):
        self.beta_start = beta_start
        self.beta_end = beta_end
        self.timesteps = timesteps
        self.clip_min = clip_min
        self.clip_max = clip_max

        # Define the linear variance schedule
        self.betas = betas = np.linspace(
            beta_start,
            beta_end,
            timesteps,
            dtype=np.float32,  # Using float64 for better precision
        )
        self.num_timesteps = int(timesteps)

        alphas = 1.0 - betas
        alphas_cumprod = np.cumprod(alphas, axis=0)
        alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1])

        self.betas = tf.constant(betas, dtype=tf.float32)
        self.alphas_cumprod = tf.constant(alphas_cumprod, dtype=tf.float32)
        self.alphas_cumprod_prev = tf.constant(alphas_cumprod_prev, dtype=tf.float32)

        # Calculations for diffusion q(x_t | x_{t-1}) and others
        self.sqrt_alphas_cumprod = tf.constant(
            np.sqrt(alphas_cumprod), dtype=tf.float32
        )

        self.sqrt_one_minus_alphas_cumprod = tf.constant(
            np.sqrt(1.0 - alphas_cumprod), dtype=tf.float32
        )

        self.log_one_minus_alphas_cumprod = tf.constant(
            np.log(1.0 - alphas_cumprod), dtype=tf.float32
        )

        self.sqrt_recip_alphas_cumprod = tf.constant(
            np.sqrt(1.0 / alphas_cumprod), dtype=tf.float32
        )
        self.sqrt_recipm1_alphas_cumprod = tf.constant(
            np.sqrt(1.0 / alphas_cumprod - 1), dtype=tf.float32
        )

        # Calculations for posterior q(x_{t-1} | x_t, x_0)
        posterior_variance = (
            betas * (1.0 - alphas_cumprod_prev) / (1.0 - alphas_cumprod)
        )
        self.posterior_variance = tf.constant(posterior_variance, dtype=tf.float32)

        # Log calculation clipped because the posterior variance is 0 at the beginning
        # of the diffusion chain
        self.posterior_log_variance_clipped = tf.constant(
            np.log(np.maximum(posterior_variance, 1e-20)), dtype=tf.float32
        )

        self.posterior_mean_coef1 = tf.constant(
            betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod),
            dtype=tf.float32,
        )

        self.posterior_mean_coef2 = tf.constant(
            (1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod),
            dtype=tf.float32,
        )

    def _extract(self, a, t, x_shape):
        """Extract some coefficients at specified timesteps,
        then reshape to [batch_size, 1, 1, 1, 1, ...] for broadcasting purposes.

        Args:
            a: Tensor to extract from
            t: Timestep for which the coefficients are to be extracted
            x_shape: Shape of the current batched samples
        """
        batch_size = x_shape[0]
        out = tf.gather(a, t)
        return tf.reshape(out, [batch_size, 1, 1, 1])

    def q_mean_variance(self, x_start, t):
        """Extracts the mean, and the variance at current timestep.

        Args:
            x_start: Initial sample (before the first diffusion step)
            t: Current timestep
        """
        x_start_shape = tf.shape(x_start)
        mean = self._extract(self.sqrt_alphas_cumprod, t, x_start_shape) * x_start
        variance = self._extract(1.0 - self.alphas_cumprod, t, x_start_shape)
        log_variance = self._extract(
            self.log_one_minus_alphas_cumprod, t, x_start_shape
        )
        return mean, variance, log_variance

    def q_sample(self, x_start, t, noise):
        """Diffuse the data.

        Args:
            x_start: Initial sample (before the first diffusion step)
            t: Current timestep
            noise: Gaussian noise to be added at the current timestep
        Returns:
            Diffused samples at timestep `t`
        """

        # B, C, H, W
        # B, H, W, C

        x_start_shape = tf.shape(x_start)
        result = (
            self._extract(self.sqrt_alphas_cumprod, t, tf.shape(x_start)) * x_start
            + self._extract(self.sqrt_one_minus_alphas_cumprod, t, x_start_shape)
            * noise
        )

        return result

    def predict_start_from_noise(self, x_t, t, noise):
        x_t_shape = tf.shape(x_t)
        return (
            self._extract(self.sqrt_recip_alphas_cumprod, t, x_t_shape) * x_t
            - self._extract(self.sqrt_recipm1_alphas_cumprod, t, x_t_shape) * noise
        )

    def q_posterior(self, x_start, x_t, t):
        """Compute the mean and variance of the diffusion
        posterior q(x_{t-1} | x_t, x_0).

        Args:
            x_start: Stating point(sample) for the posterior computation
            x_t: Sample at timestep `t`
            t: Current timestep
        Returns:
            Posterior mean and variance at current timestep
        """
        x_t_shape = tf.shape(x_t)
        posterior_mean = (
            self._extract(self.posterior_mean_coef1, t, x_t_shape) * x_start
            + self._extract(self.posterior_mean_coef2, t, x_t_shape) * x_t
        )
        posterior_variance = self._extract(self.posterior_variance, t, x_t_shape)
        posterior_log_variance_clipped = self._extract(
            self.posterior_log_variance_clipped, t, x_t_shape
        )
        return posterior_mean, posterior_variance, posterior_log_variance_clipped

    def p_mean_variance(self, pred_noise, x, t, clip_denoised=True):
        x_recon = self.predict_start_from_noise(x, t=t, noise=pred_noise)
        if clip_denoised:
            x_recon = tf.clip_by_value(x_recon, self.clip_min, self.clip_max)

        model_mean, posterior_variance, posterior_log_variance = self.q_posterior(
            x_start=x_recon, x_t=x, t=t
        )
        return model_mean, posterior_variance, posterior_log_variance

    def p_sample(self, pred_noise, x, t, clip_denoised=True):
        """Sample from the diffusion model.

        Args:
            pred_noise: Noise predicted by the diffusion model
            x: Samples at a given timestep for which the noise was predicted
            t: Current timestep
            clip_denoised (bool): Whether to clip the predicted noise
                within the specified range or not.
        """
        model_mean, _, model_log_variance = self.p_mean_variance(
            pred_noise, x=x, t=t, clip_denoised=clip_denoised
        )
        noise = tf.random.normal(shape=x.shape, dtype=x.dtype)
        # No noise when t == 0
        nonzero_mask = tf.reshape(
            1 - tf.cast(tf.equal(t, 0), tf.float32), [tf.shape(x)[0], 1, 1, 1]
        )
        return model_mean + nonzero_mask * tf.exp(0.5 * model_log_variance) * noise

In [7]:
def torch_to_tf_tensor(torch_tensor):
  np_format = torch_tensor.numpy()
  # print(np_format.dtype)
  return tf.convert_to_tensor(np_format)

def tf_to_torch_tensor(tf_tensor):
  np_format = tf_tensor.numpy()
  return torch.from_numpy(np_format)

# B, C, H, W -> torch
# B, H, W, C -> tf
def torch_to_tf_dim(torch_tensor):
  x = torch.swapaxes(torch_tensor, 1, 3)
  x = torch.swapaxes(x, 1, 2)
  return torch_to_tf_tensor(x)

def tf_to_torch_dim(tf_tensor):
  x = tf_to_torch_tensor(tf_tensor)
  x = torch.swapaxes(x, 1, 3)
  return torch.swapaxes(x, 2, 3)

In [7]:
# gdf_util = GaussianDiffusion(timesteps=total_timesteps)

In [8]:
# from torch.utils.tensorboard.writer import SummaryWriter
# from datetime import datetime
# from tqdm import tqdm
# import time

# class ModelTrainer():
#   def __init__(self, model, loss, optimizer, time_steps=100):
#     self.model = model
#     self.loss = loss
#     self.optimizer = optimizer
#     self.time_steps = time_steps

#   def fit(self, ds, epochs=1, callbacks=[], **kwargs):
#     print(f'Starting fitting with {self.time_steps}')

#     running_loss = 0.
#     last_loss = 0.

#     i = 0
#     data_size = len(ds.dataset)

#     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
#     writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
#     epoch_number = 0

#     start = time.time()

#     for epoch in range(epochs):
#       print('EPOCH {}:'.format(epoch + 1))
#       model.train()

#       i = 0
#       bar = tqdm(ds)
#       for data in bar:
#         inputs, _ = data

#         current_batch_size = inputs.size()[0]

#         self.optimizer.zero_grad()
#         t = torch.Tensor.to(
#             torch.floor(torch.rand(size=(current_batch_size,)) * self.time_steps),
#             dtype=torch.int32)

#         noise = torch.randn(size=inputs.size())

#         inputs = torch_to_tf_dim(inputs)
#         tf_t = torch_to_tf_tensor(t)
#         tf_noise = torch_to_tf_dim(noise)

#         noised_t = gdf_util.q_sample(inputs, tf_t, tf_noise) # 3 torch tensors -> converted -> processed by tensorflow -> 1 torch tensor
#         noised_t = tf_to_torch_dim(noised_t)

#         outputs = self.model(noised_t.to('cuda'), t.to('cuda'))
#         loss = self.loss(outputs.to('cuda'), noise.to('cuda'))
#         loss.backward()

#         self.optimizer.step()
#         running_loss += loss.item()
#         if i % 16 == 15:
#             tb_x = epoch * current_batch_size + i + 1
#             writer.add_scalar('Loss/train', last_loss, tb_x)
#             last_loss = running_loss / current_batch_size # loss per batch
#             bar.set_description_str(f'batch {i + 1}/{data_size // current_batch_size} loss: {last_loss:.4f}')

#             running_loss = 0.

#         i += 1

#       for callback in callbacks:
#         callback(model)

#       print(f'Run for {time.time() - start}s after {epoch} epochs!')

#     return last_loss

In [8]:
from matplotlib import pyplot as plt
import cv2
from sklearn.preprocessing import MinMaxScaler

plot_counts = 16
num_rows = 2
num_cols = plot_counts // num_rows

model_config = {
    "in_channels": img_channels,
    "resolution": img_size
}

save_path = '/content/drive/MyDrive/FastDPM/model.torch'

def plot(net):
    images = generate(net,
             model_config,
             plot_counts,
             batch_size,
              S=100,
             kappa=1.0)

    _, ax = plt.subplots(num_rows, num_cols, figsize=(10, 10))

    print(f'Plotting... Approximation using S={S}')
    for i, image in enumerate(images):
        cpu_image = image.detach().cpu()
        # C, H, W
        cpu_image = torch.swapaxes(cpu_image, 0, 2)
        cpu_image = torch.swapaxes(cpu_image, 0, 1).numpy()
        ori_shape = cpu_image.shape

        cpu_image = MinMaxScaler().fit_transform(cpu_image.reshape((
            cpu_image.shape[0] * cpu_image.shape[1], cpu_image.shape[2]
        ))).reshape(ori_shape)

        if num_rows == 1:
            ax[i].imshow(cpu_image)
            ax[i].axis("off")
        else:
            ax[i // num_cols, i % num_cols].imshow(cpu_image)
            ax[i // num_cols, i % num_cols].axis("off")

    plt.tight_layout()
    plt.show()

def save_model(net):
  with open(save_path, 'wb') as f:
    torch.save(net.module.state_dict(), f)

In [10]:
# from torch.nn import MSELoss
# from torch.optim import Adam

# print('Training info')
# print(f'bs: {batch_size}')
# print(f'n_epochs: {num_epochs}')
# print(f'time steps: {total_timesteps}')
# print(f'lr = {learning_rate}')
# print(f'img_size: {img_size}')

# """
#   ch: Starting curve-down filters of first unit
#   out_ch: Output channels num
#   num_res_blocks: number of residuals in each unit(width of each encode-decode unit)
#   attn_resolutions: specify which resolution to apply attention
#   in_channels:  first conv filters
#   resolution: images resolution
# """

# loader = torch.utils.data.DataLoader(
#              ds,
#              batch_size=batch_size,
#              shuffle=True,
#              num_workers=112,
#              persistent_workers=True)

# model = Model(
#   ch=first_conv_channels,
#   out_ch=img_channels,
#   num_res_blocks=num_res_blocks,
#   attn_resolutions=has_attention,
#   in_channels=3,
#   resolution=img_size)

# model = nn.DataParallel(model)
# model = model.to('cuda')

# trainer = ModelTrainer(model, MSELoss(),
#                        Adam(model.parameters(),
#                             lr=learning_rate), time_steps=total_timesteps)

# # plot(model)
# trainer.fit(loader, epochs=6, callbacks=[plot, save_model])

In [9]:
loaded_model =  Model(
  ch=first_conv_channels,
  out_ch=img_channels,
  num_res_blocks=num_res_blocks,
  attn_resolutions=has_attention,
  in_channels=3,
  resolution=img_size)

loaded_model.load_state_dict(torch.load(save_path))

loaded = nn.DataParallel(loaded_model)
loaded = loaded.to('cuda')

cnt = 0
for outer in tqdm(range(int(math.ceil(10_000 // batch_size)))):
  images = generate(loaded,
             model_config,
             plot_counts,
             batch_size,
              S=100,
             kappa=1.0)
  for i, image in enumerate(images):
      cpu_image = image.detach().cpu()
      # C, H, W
      cpu_image = torch.swapaxes(cpu_image, 0, 2)
      cpu_image = torch.swapaxes(cpu_image, 0, 1).numpy()
      ori_shape = cpu_image.shape
      cpu_image = MinMaxScaler().fit_transform(cpu_image.reshape((
          cpu_image.shape[0] * cpu_image.shape[1], cpu_image.shape[2]
      ))).reshape(ori_shape)

      # print(cpu_image)

      cnt += 1

      cv2.imwrite(f'/content/drive/MyDrive/FastDPM/fake/{cnt}.png',
                  (cpu_image * 255).astype(np.uint8))

  if cnt == 10_000:
    break

# trainer = ModelTrainer(loaded, MSELoss(),
#                        Adam(loaded.parameters(),
#                             lr=learning_rate), time_steps=total_timesteps)

# loader = torch.utils.data.DataLoader(
#              ds,
#              batch_size=batch_size,
#              shuffle=True,
#              num_workers=56,
#              persistent_workers=True)
# trainer.fit(loader, epochs=15, callbacks=[plot, save_model])

  0%|          | 0/625 [00:00<?, ?it/s]

DataParallel Parameters: 56.574595M


  0%|          | 1/625 [00:47<8:13:10, 47.42s/it]

DataParallel Parameters: 56.574595M


  0%|          | 2/625 [01:03<5:00:49, 28.97s/it]

DataParallel Parameters: 56.574595M


  0%|          | 3/625 [01:18<3:55:40, 22.73s/it]

DataParallel Parameters: 56.574595M


  1%|          | 4/625 [01:34<3:25:11, 19.82s/it]

DataParallel Parameters: 56.574595M


  1%|          | 5/625 [01:49<3:08:16, 18.22s/it]

DataParallel Parameters: 56.574595M


  1%|          | 6/625 [02:05<2:59:05, 17.36s/it]

DataParallel Parameters: 56.574595M


  1%|          | 7/625 [02:21<2:55:45, 17.06s/it]

DataParallel Parameters: 56.574595M


  1%|▏         | 8/625 [02:37<2:50:24, 16.57s/it]

DataParallel Parameters: 56.574595M


  1%|▏         | 9/625 [02:53<2:48:03, 16.37s/it]

DataParallel Parameters: 56.574595M


  2%|▏         | 10/625 [03:09<2:47:37, 16.35s/it]

DataParallel Parameters: 56.574595M


  2%|▏         | 11/625 [03:25<2:45:10, 16.14s/it]

DataParallel Parameters: 56.574595M


  2%|▏         | 12/625 [03:41<2:45:01, 16.15s/it]

DataParallel Parameters: 56.574595M


  2%|▏         | 13/625 [03:56<2:42:34, 15.94s/it]

DataParallel Parameters: 56.574595M


  2%|▏         | 14/625 [04:12<2:41:04, 15.82s/it]

DataParallel Parameters: 56.574595M


  2%|▏         | 15/625 [04:28<2:41:19, 15.87s/it]

DataParallel Parameters: 56.574595M


  3%|▎         | 16/625 [04:43<2:39:20, 15.70s/it]

DataParallel Parameters: 56.574595M


  3%|▎         | 17/625 [04:59<2:39:30, 15.74s/it]

DataParallel Parameters: 56.574595M


  3%|▎         | 18/625 [05:15<2:39:31, 15.77s/it]

DataParallel Parameters: 56.574595M


  3%|▎         | 19/625 [05:30<2:39:12, 15.76s/it]

DataParallel Parameters: 56.574595M


  3%|▎         | 20/625 [05:46<2:37:47, 15.65s/it]

DataParallel Parameters: 56.574595M


  3%|▎         | 21/625 [06:01<2:37:00, 15.60s/it]

DataParallel Parameters: 56.574595M


  4%|▎         | 22/625 [06:17<2:37:31, 15.67s/it]

DataParallel Parameters: 56.574595M


  4%|▎         | 23/625 [06:33<2:39:08, 15.86s/it]

DataParallel Parameters: 56.574595M


  4%|▍         | 24/625 [06:49<2:39:12, 15.89s/it]

DataParallel Parameters: 56.574595M


  4%|▍         | 25/625 [07:05<2:39:08, 15.91s/it]

DataParallel Parameters: 56.574595M


  4%|▍         | 26/625 [07:21<2:39:02, 15.93s/it]

DataParallel Parameters: 56.574595M


  4%|▍         | 27/625 [07:37<2:37:43, 15.83s/it]

DataParallel Parameters: 56.574595M


  4%|▍         | 28/625 [07:53<2:38:45, 15.96s/it]

DataParallel Parameters: 56.574595M


  5%|▍         | 29/625 [08:09<2:36:46, 15.78s/it]

DataParallel Parameters: 56.574595M


  5%|▍         | 30/625 [08:24<2:35:27, 15.68s/it]

DataParallel Parameters: 56.574595M


  5%|▍         | 31/625 [08:40<2:34:37, 15.62s/it]

DataParallel Parameters: 56.574595M


  5%|▌         | 32/625 [08:55<2:33:47, 15.56s/it]

DataParallel Parameters: 56.574595M


  5%|▌         | 33/625 [09:11<2:33:55, 15.60s/it]

DataParallel Parameters: 56.574595M


  5%|▌         | 34/625 [09:27<2:35:30, 15.79s/it]

DataParallel Parameters: 56.574595M


  6%|▌         | 35/625 [09:42<2:34:28, 15.71s/it]

DataParallel Parameters: 56.574595M


  6%|▌         | 36/625 [09:58<2:33:46, 15.66s/it]

DataParallel Parameters: 56.574595M


  6%|▌         | 37/625 [10:13<2:32:50, 15.60s/it]

DataParallel Parameters: 56.574595M


  6%|▌         | 38/625 [10:29<2:32:55, 15.63s/it]

DataParallel Parameters: 56.574595M


  6%|▌         | 39/625 [10:45<2:34:14, 15.79s/it]

DataParallel Parameters: 56.574595M


  6%|▋         | 40/625 [11:01<2:33:11, 15.71s/it]

DataParallel Parameters: 56.574595M


  7%|▋         | 41/625 [11:16<2:32:17, 15.65s/it]

DataParallel Parameters: 56.574595M


  7%|▋         | 42/625 [11:31<2:30:46, 15.52s/it]

DataParallel Parameters: 56.574595M


  7%|▋         | 43/625 [11:47<2:30:14, 15.49s/it]

DataParallel Parameters: 56.574595M


  7%|▋         | 44/625 [12:03<2:31:44, 15.67s/it]

DataParallel Parameters: 56.574595M


  7%|▋         | 45/625 [12:19<2:32:32, 15.78s/it]

DataParallel Parameters: 56.574595M


  7%|▋         | 46/625 [12:35<2:31:51, 15.74s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 47/625 [12:50<2:30:11, 15.59s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 48/625 [13:06<2:30:37, 15.66s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 49/625 [13:21<2:30:34, 15.68s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 50/625 [13:38<2:31:55, 15.85s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 51/625 [13:53<2:30:07, 15.69s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 52/625 [14:08<2:29:08, 15.62s/it]

DataParallel Parameters: 56.574595M


  8%|▊         | 53/625 [14:24<2:28:03, 15.53s/it]

DataParallel Parameters: 56.574595M


  9%|▊         | 54/625 [14:40<2:28:17, 15.58s/it]

DataParallel Parameters: 56.574595M


  9%|▉         | 55/625 [14:56<2:31:25, 15.94s/it]

DataParallel Parameters: 56.574595M


  9%|▉         | 56/625 [15:12<2:30:16, 15.85s/it]

DataParallel Parameters: 56.574595M


  9%|▉         | 57/625 [15:27<2:29:05, 15.75s/it]

DataParallel Parameters: 56.574595M


  9%|▉         | 58/625 [15:43<2:28:26, 15.71s/it]

DataParallel Parameters: 56.574595M


  9%|▉         | 59/625 [15:59<2:28:18, 15.72s/it]

DataParallel Parameters: 56.574595M


 10%|▉         | 60/625 [16:15<2:28:50, 15.81s/it]

DataParallel Parameters: 56.574595M


 10%|▉         | 61/625 [16:31<2:29:04, 15.86s/it]

DataParallel Parameters: 56.574595M


 10%|▉         | 62/625 [16:46<2:27:14, 15.69s/it]

DataParallel Parameters: 56.574595M


 10%|█         | 63/625 [17:02<2:26:31, 15.64s/it]

DataParallel Parameters: 56.574595M


 10%|█         | 64/625 [17:17<2:26:26, 15.66s/it]

DataParallel Parameters: 56.574595M


 10%|█         | 65/625 [17:33<2:27:23, 15.79s/it]

DataParallel Parameters: 56.574595M


 11%|█         | 66/625 [17:50<2:28:23, 15.93s/it]

DataParallel Parameters: 56.574595M


 11%|█         | 67/625 [18:05<2:27:15, 15.83s/it]

DataParallel Parameters: 56.574595M


 11%|█         | 68/625 [18:21<2:25:42, 15.70s/it]

DataParallel Parameters: 56.574595M


 11%|█         | 69/625 [18:36<2:24:25, 15.59s/it]

DataParallel Parameters: 56.574595M


 11%|█         | 70/625 [18:52<2:25:41, 15.75s/it]

DataParallel Parameters: 56.574595M


 11%|█▏        | 71/625 [19:08<2:26:58, 15.92s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 72/625 [19:24<2:26:10, 15.86s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 73/625 [19:40<2:24:41, 15.73s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 74/625 [19:55<2:24:12, 15.70s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 75/625 [20:11<2:23:38, 15.67s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 76/625 [20:27<2:26:02, 15.96s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 77/625 [20:43<2:25:15, 15.90s/it]

DataParallel Parameters: 56.574595M


 12%|█▏        | 78/625 [20:59<2:25:56, 16.01s/it]

DataParallel Parameters: 56.574595M


 13%|█▎        | 79/625 [21:15<2:25:35, 16.00s/it]

DataParallel Parameters: 56.574595M


 13%|█▎        | 80/625 [21:31<2:25:10, 15.98s/it]

DataParallel Parameters: 56.574595M


 13%|█▎        | 81/625 [21:47<2:24:18, 15.92s/it]

DataParallel Parameters: 56.574595M


 13%|█▎        | 82/625 [22:03<2:23:09, 15.82s/it]

DataParallel Parameters: 56.574595M


 13%|█▎        | 83/625 [22:19<2:23:19, 15.87s/it]

DataParallel Parameters: 56.574595M


 13%|█▎        | 84/625 [22:35<2:23:33, 15.92s/it]

DataParallel Parameters: 56.574595M


 14%|█▎        | 85/625 [22:51<2:25:09, 16.13s/it]

DataParallel Parameters: 56.574595M


 14%|█▍        | 86/625 [23:07<2:23:31, 15.98s/it]

DataParallel Parameters: 56.574595M


 14%|█▍        | 87/625 [23:23<2:22:06, 15.85s/it]

DataParallel Parameters: 56.574595M


 14%|█▍        | 88/625 [23:38<2:20:57, 15.75s/it]

DataParallel Parameters: 56.574595M


 14%|█▍        | 89/625 [23:54<2:20:19, 15.71s/it]

DataParallel Parameters: 56.574595M


 14%|█▍        | 90/625 [24:10<2:21:41, 15.89s/it]

DataParallel Parameters: 56.574595M


 15%|█▍        | 91/625 [24:26<2:20:45, 15.82s/it]

DataParallel Parameters: 56.574595M


 15%|█▍        | 92/625 [24:41<2:19:58, 15.76s/it]

DataParallel Parameters: 56.574595M


 15%|█▍        | 93/625 [25:00<2:27:14, 16.61s/it]

DataParallel Parameters: 56.574595M


 15%|█▌        | 94/625 [25:23<2:45:01, 18.65s/it]

DataParallel Parameters: 56.574595M


 15%|█▌        | 95/625 [25:40<2:39:13, 18.03s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 15%|█▌        | 96/625 [25:56<2:34:22, 17.51s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▌        | 97/625 [26:12<2:29:38, 17.00s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▌        | 98/625 [26:28<2:27:04, 16.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▌        | 99/625 [26:44<2:23:12, 16.34s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▌        | 100/625 [27:00<2:22:52, 16.33s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▌        | 101/625 [27:17<2:23:51, 16.47s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▋        | 102/625 [27:32<2:20:30, 16.12s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 16%|█▋        | 103/625 [27:47<2:18:15, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 17%|█▋        | 104/625 [28:04<2:19:45, 16.10s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 17%|█▋        | 105/625 [28:20<2:18:32, 15.99s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 17%|█▋        | 106/625 [28:35<2:17:58, 15.95s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 17%|█▋        | 107/625 [28:51<2:16:12, 15.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 17%|█▋        | 108/625 [29:07<2:16:21, 15.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 17%|█▋        | 109/625 [29:22<2:14:39, 15.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 18%|█▊        | 110/625 [29:37<2:13:45, 15.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 18%|█▊        | 111/625 [29:54<2:14:53, 15.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 18%|█▊        | 112/625 [30:09<2:15:04, 15.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 18%|█▊        | 113/625 [30:25<2:14:02, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 18%|█▊        | 114/625 [30:40<2:12:52, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 18%|█▊        | 115/625 [30:56<2:12:44, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 19%|█▊        | 116/625 [31:11<2:12:05, 15.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 19%|█▊        | 117/625 [31:28<2:14:41, 15.91s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 19%|█▉        | 118/625 [31:45<2:15:43, 16.06s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 19%|█▉        | 119/625 [32:00<2:13:53, 15.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 19%|█▉        | 120/625 [32:16<2:12:48, 15.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 19%|█▉        | 121/625 [32:31<2:12:05, 15.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|█▉        | 122/625 [32:47<2:12:49, 15.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|█▉        | 123/625 [33:04<2:14:21, 16.06s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|█▉        | 124/625 [33:19<2:12:39, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|██        | 125/625 [33:35<2:11:00, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|██        | 126/625 [33:51<2:11:43, 15.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|██        | 127/625 [34:06<2:10:53, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 20%|██        | 128/625 [34:23<2:11:50, 15.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 21%|██        | 129/625 [34:38<2:10:32, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 21%|██        | 130/625 [34:54<2:10:31, 15.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 21%|██        | 131/625 [35:10<2:09:23, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 21%|██        | 132/625 [35:25<2:08:18, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 21%|██▏       | 133/625 [35:41<2:09:17, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 21%|██▏       | 134/625 [35:57<2:09:41, 15.85s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 22%|██▏       | 135/625 [36:13<2:09:14, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 22%|██▏       | 136/625 [36:28<2:08:17, 15.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 22%|██▏       | 137/625 [36:44<2:06:42, 15.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 22%|██▏       | 138/625 [37:00<2:07:25, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 22%|██▏       | 139/625 [37:15<2:07:39, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 22%|██▏       | 140/625 [37:31<2:07:34, 15.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 23%|██▎       | 141/625 [37:46<2:05:45, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 23%|██▎       | 142/625 [38:02<2:05:01, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 23%|██▎       | 143/625 [38:17<2:05:01, 15.56s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 23%|██▎       | 144/625 [38:34<2:06:33, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 23%|██▎       | 145/625 [38:49<2:05:35, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 23%|██▎       | 146/625 [39:05<2:04:51, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▎       | 147/625 [39:20<2:04:13, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▎       | 148/625 [39:36<2:04:18, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▍       | 149/625 [39:52<2:05:09, 15.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▍       | 150/625 [40:07<2:03:37, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▍       | 151/625 [40:23<2:03:39, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▍       | 152/625 [40:39<2:03:00, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 24%|██▍       | 153/625 [40:54<2:02:43, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 25%|██▍       | 154/625 [41:10<2:04:10, 15.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 25%|██▍       | 155/625 [41:26<2:02:36, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 25%|██▍       | 156/625 [41:42<2:02:41, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 25%|██▌       | 157/625 [41:57<2:02:55, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 25%|██▌       | 158/625 [42:13<2:02:06, 15.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 25%|██▌       | 159/625 [42:30<2:04:29, 16.03s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 26%|██▌       | 160/625 [42:45<2:02:43, 15.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 26%|██▌       | 161/625 [43:01<2:02:20, 15.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 26%|██▌       | 162/625 [43:17<2:01:35, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 26%|██▌       | 163/625 [43:33<2:02:16, 15.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 26%|██▌       | 164/625 [43:48<2:01:06, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 26%|██▋       | 165/625 [44:04<2:00:16, 15.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 27%|██▋       | 166/625 [44:19<1:58:44, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 27%|██▋       | 167/625 [44:34<1:57:55, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 27%|██▋       | 168/625 [44:50<1:59:15, 15.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 27%|██▋       | 169/625 [45:06<1:59:58, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 27%|██▋       | 170/625 [45:22<1:58:29, 15.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 27%|██▋       | 171/625 [45:37<1:57:03, 15.47s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 172/625 [45:52<1:56:40, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 173/625 [46:08<1:56:13, 15.43s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 174/625 [46:23<1:56:07, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 175/625 [46:39<1:57:22, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 176/625 [46:55<1:56:44, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 177/625 [47:10<1:55:36, 15.48s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 28%|██▊       | 178/625 [47:25<1:55:32, 15.51s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 29%|██▊       | 179/625 [47:41<1:55:41, 15.56s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 29%|██▉       | 180/625 [47:57<1:55:54, 15.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 29%|██▉       | 181/625 [48:13<1:56:24, 15.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 29%|██▉       | 182/625 [48:28<1:54:55, 15.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 29%|██▉       | 183/625 [48:44<1:54:27, 15.54s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 29%|██▉       | 184/625 [48:59<1:54:05, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 30%|██▉       | 185/625 [49:15<1:53:51, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 30%|██▉       | 186/625 [49:31<1:55:49, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 30%|██▉       | 187/625 [49:47<1:54:43, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 30%|███       | 188/625 [50:02<1:54:21, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 30%|███       | 189/625 [50:18<1:53:37, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 30%|███       | 190/625 [50:33<1:53:46, 15.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 31%|███       | 191/625 [50:50<1:54:53, 15.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 31%|███       | 192/625 [51:05<1:53:08, 15.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 31%|███       | 193/625 [51:21<1:53:00, 15.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 31%|███       | 194/625 [51:36<1:51:48, 15.56s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 31%|███       | 195/625 [51:52<1:52:09, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 31%|███▏      | 196/625 [52:08<1:52:18, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 197/625 [52:24<1:52:46, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 198/625 [52:39<1:51:50, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 199/625 [52:55<1:51:32, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 200/625 [53:10<1:50:12, 15.56s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 201/625 [53:26<1:50:37, 15.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 202/625 [53:42<1:51:32, 15.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 32%|███▏      | 203/625 [53:58<1:50:42, 15.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 33%|███▎      | 204/625 [54:13<1:49:37, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 33%|███▎      | 205/625 [54:29<1:49:33, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 33%|███▎      | 206/625 [54:44<1:48:46, 15.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 33%|███▎      | 207/625 [55:00<1:49:32, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 33%|███▎      | 208/625 [55:16<1:49:28, 15.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 33%|███▎      | 209/625 [55:31<1:47:56, 15.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 34%|███▎      | 210/625 [55:47<1:47:47, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 34%|███▍      | 211/625 [56:02<1:46:57, 15.50s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 34%|███▍      | 212/625 [56:18<1:46:54, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 34%|███▍      | 213/625 [56:35<1:49:19, 15.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 34%|███▍      | 214/625 [56:50<1:48:51, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 34%|███▍      | 215/625 [57:06<1:48:36, 15.90s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 35%|███▍      | 216/625 [57:22<1:47:31, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 35%|███▍      | 217/625 [57:38<1:47:38, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 35%|███▍      | 218/625 [57:54<1:48:11, 15.95s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 35%|███▌      | 219/625 [58:09<1:46:41, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 35%|███▌      | 220/625 [58:25<1:45:17, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 35%|███▌      | 221/625 [58:40<1:44:39, 15.54s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▌      | 222/625 [58:55<1:44:09, 15.51s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▌      | 223/625 [59:11<1:44:27, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▌      | 224/625 [59:27<1:44:11, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▌      | 225/625 [59:42<1:43:41, 15.55s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▌      | 226/625 [59:59<1:44:58, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▋      | 227/625 [1:00:14<1:44:09, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 36%|███▋      | 228/625 [1:00:30<1:44:22, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 37%|███▋      | 229/625 [1:00:46<1:44:30, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 37%|███▋      | 230/625 [1:01:02<1:44:37, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 37%|███▋      | 231/625 [1:01:18<1:44:52, 15.97s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 37%|███▋      | 232/625 [1:01:34<1:43:31, 15.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 37%|███▋      | 233/625 [1:01:50<1:43:31, 15.85s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 37%|███▋      | 234/625 [1:02:06<1:44:02, 15.97s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 38%|███▊      | 235/625 [1:02:21<1:43:08, 15.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 38%|███▊      | 236/625 [1:02:37<1:42:07, 15.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 38%|███▊      | 237/625 [1:02:52<1:41:07, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 38%|███▊      | 238/625 [1:03:09<1:42:12, 15.85s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 38%|███▊      | 239/625 [1:03:25<1:42:09, 15.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 38%|███▊      | 240/625 [1:03:40<1:41:10, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 39%|███▊      | 241/625 [1:03:56<1:40:29, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 39%|███▊      | 242/625 [1:04:11<1:39:40, 15.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 39%|███▉      | 243/625 [1:04:27<1:39:30, 15.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 39%|███▉      | 244/625 [1:04:42<1:39:18, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 39%|███▉      | 245/625 [1:04:59<1:40:00, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 39%|███▉      | 246/625 [1:05:15<1:40:21, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|███▉      | 247/625 [1:05:30<1:39:27, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|███▉      | 248/625 [1:05:46<1:38:32, 15.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|███▉      | 249/625 [1:06:02<1:39:35, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|████      | 250/625 [1:06:17<1:38:27, 15.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|████      | 251/625 [1:06:33<1:37:54, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|████      | 252/625 [1:06:48<1:36:36, 15.54s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 40%|████      | 253/625 [1:07:03<1:35:48, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 41%|████      | 254/625 [1:07:19<1:36:33, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 41%|████      | 255/625 [1:07:36<1:37:37, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 41%|████      | 256/625 [1:07:51<1:36:34, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 41%|████      | 257/625 [1:08:07<1:36:48, 15.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 41%|████▏     | 258/625 [1:08:23<1:36:06, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 41%|████▏     | 259/625 [1:08:39<1:36:40, 15.85s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 42%|████▏     | 260/625 [1:08:55<1:36:50, 15.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 42%|████▏     | 261/625 [1:09:10<1:35:39, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 42%|████▏     | 262/625 [1:09:26<1:34:24, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 42%|████▏     | 263/625 [1:09:41<1:33:40, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 42%|████▏     | 264/625 [1:09:56<1:33:19, 15.51s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 42%|████▏     | 265/625 [1:10:12<1:34:05, 15.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 43%|████▎     | 266/625 [1:10:28<1:33:41, 15.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 43%|████▎     | 267/625 [1:10:44<1:33:02, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 43%|████▎     | 268/625 [1:10:59<1:32:24, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 43%|████▎     | 269/625 [1:11:14<1:31:40, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 43%|████▎     | 270/625 [1:11:30<1:31:48, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 43%|████▎     | 271/625 [1:11:46<1:33:15, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▎     | 272/625 [1:12:02<1:32:55, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▎     | 273/625 [1:12:18<1:32:40, 15.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▍     | 274/625 [1:12:34<1:32:04, 15.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▍     | 275/625 [1:12:49<1:31:55, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▍     | 276/625 [1:13:06<1:32:36, 15.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▍     | 277/625 [1:13:21<1:31:20, 15.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 44%|████▍     | 278/625 [1:13:36<1:30:22, 15.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 45%|████▍     | 279/625 [1:13:52<1:29:44, 15.56s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 45%|████▍     | 280/625 [1:14:07<1:29:42, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 45%|████▍     | 281/625 [1:14:24<1:30:22, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 45%|████▌     | 282/625 [1:14:39<1:29:52, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 45%|████▌     | 283/625 [1:14:55<1:29:16, 15.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 45%|████▌     | 284/625 [1:15:10<1:28:32, 15.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 46%|████▌     | 285/625 [1:15:25<1:27:44, 15.48s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 46%|████▌     | 286/625 [1:15:41<1:27:52, 15.55s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 46%|████▌     | 287/625 [1:15:57<1:29:04, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 46%|████▌     | 288/625 [1:16:13<1:27:49, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 46%|████▌     | 289/625 [1:16:28<1:27:17, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 46%|████▋     | 290/625 [1:16:43<1:26:24, 15.48s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 47%|████▋     | 291/625 [1:16:59<1:26:07, 15.47s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 47%|████▋     | 292/625 [1:17:14<1:25:52, 15.47s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 47%|████▋     | 293/625 [1:17:30<1:26:24, 15.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 47%|████▋     | 294/625 [1:17:46<1:25:53, 15.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 47%|████▋     | 295/625 [1:18:01<1:25:20, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 47%|████▋     | 296/625 [1:18:17<1:25:14, 15.55s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 297/625 [1:18:32<1:24:54, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 298/625 [1:18:48<1:24:52, 15.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 299/625 [1:19:04<1:25:02, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 300/625 [1:19:19<1:24:04, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 301/625 [1:19:35<1:23:52, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 302/625 [1:19:50<1:22:57, 15.41s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 48%|████▊     | 303/625 [1:20:06<1:24:38, 15.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 49%|████▊     | 304/625 [1:20:23<1:25:20, 15.95s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 49%|████▉     | 305/625 [1:20:38<1:24:13, 15.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 49%|████▉     | 306/625 [1:20:53<1:22:47, 15.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 49%|████▉     | 307/625 [1:21:08<1:22:01, 15.48s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 49%|████▉     | 308/625 [1:21:24<1:21:40, 15.46s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 49%|████▉     | 309/625 [1:21:39<1:21:22, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 50%|████▉     | 310/625 [1:21:55<1:22:16, 15.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 50%|████▉     | 311/625 [1:22:12<1:22:50, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 50%|████▉     | 312/625 [1:22:27<1:22:28, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 50%|█████     | 313/625 [1:22:43<1:22:03, 15.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 50%|█████     | 314/625 [1:22:59<1:21:25, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 50%|█████     | 315/625 [1:23:15<1:21:47, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 51%|█████     | 316/625 [1:23:30<1:21:26, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 51%|█████     | 317/625 [1:23:46<1:20:12, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 51%|█████     | 318/625 [1:24:01<1:19:54, 15.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 51%|█████     | 319/625 [1:24:17<1:20:08, 15.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 51%|█████     | 320/625 [1:24:33<1:19:27, 15.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 51%|█████▏    | 321/625 [1:24:49<1:19:38, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 322/625 [1:25:05<1:20:06, 15.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 323/625 [1:25:20<1:19:33, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 324/625 [1:25:36<1:19:20, 15.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 325/625 [1:25:52<1:18:36, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 326/625 [1:26:08<1:18:27, 15.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 327/625 [1:26:24<1:18:45, 15.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 52%|█████▏    | 328/625 [1:26:39<1:17:28, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 53%|█████▎    | 329/625 [1:26:54<1:16:53, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 53%|█████▎    | 330/625 [1:27:10<1:16:18, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 53%|█████▎    | 331/625 [1:27:26<1:17:00, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 53%|█████▎    | 332/625 [1:27:42<1:17:13, 15.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 53%|█████▎    | 333/625 [1:27:58<1:16:52, 15.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 53%|█████▎    | 334/625 [1:28:13<1:15:38, 15.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 54%|█████▎    | 335/625 [1:28:28<1:14:41, 15.45s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 54%|█████▍    | 336/625 [1:28:43<1:14:06, 15.38s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 54%|█████▍    | 337/625 [1:28:59<1:14:15, 15.47s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 54%|█████▍    | 338/625 [1:29:15<1:15:22, 15.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 54%|█████▍    | 339/625 [1:29:31<1:14:35, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 54%|█████▍    | 340/625 [1:29:46<1:13:47, 15.54s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 55%|█████▍    | 341/625 [1:30:02<1:13:40, 15.56s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 55%|█████▍    | 342/625 [1:31:08<2:25:41, 30.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 55%|█████▍    | 343/625 [1:31:24<2:03:28, 26.27s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 55%|█████▌    | 344/625 [1:31:39<1:47:53, 23.04s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 55%|█████▌    | 345/625 [1:31:55<1:37:20, 20.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 55%|█████▌    | 346/625 [1:32:10<1:29:11, 19.18s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▌    | 347/625 [1:32:26<1:24:30, 18.24s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▌    | 348/625 [1:32:42<1:20:49, 17.51s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▌    | 349/625 [1:32:58<1:18:09, 16.99s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▌    | 350/625 [1:33:13<1:15:45, 16.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▌    | 351/625 [1:33:29<1:13:50, 16.17s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▋    | 352/625 [1:33:44<1:13:08, 16.07s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 56%|█████▋    | 353/625 [1:34:00<1:12:28, 15.99s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 57%|█████▋    | 354/625 [1:34:16<1:11:45, 15.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 57%|█████▋    | 355/625 [1:34:31<1:10:43, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 57%|█████▋    | 356/625 [1:34:47<1:10:09, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 57%|█████▋    | 357/625 [1:35:02<1:09:36, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 57%|█████▋    | 358/625 [1:35:18<1:09:49, 15.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 57%|█████▋    | 359/625 [1:35:34<1:09:42, 15.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 58%|█████▊    | 360/625 [1:35:49<1:09:12, 15.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 58%|█████▊    | 361/625 [1:36:05<1:08:36, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 58%|█████▊    | 362/625 [1:36:20<1:08:01, 15.52s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 58%|█████▊    | 363/625 [1:36:36<1:08:10, 15.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 58%|█████▊    | 364/625 [1:36:52<1:08:18, 15.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 58%|█████▊    | 365/625 [1:37:08<1:07:54, 15.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 59%|█████▊    | 366/625 [1:37:23<1:07:15, 15.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 59%|█████▊    | 367/625 [1:37:38<1:06:48, 15.54s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 59%|█████▉    | 368/625 [1:37:54<1:06:32, 15.54s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 59%|█████▉    | 369/625 [1:38:10<1:06:51, 15.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 59%|█████▉    | 370/625 [1:38:26<1:06:49, 15.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 59%|█████▉    | 371/625 [1:38:41<1:05:34, 15.49s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|█████▉    | 372/625 [1:38:56<1:05:05, 15.43s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|█████▉    | 373/625 [1:39:11<1:04:37, 15.39s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|█████▉    | 374/625 [1:39:27<1:04:27, 15.41s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|██████    | 375/625 [1:39:43<1:04:56, 15.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|██████    | 376/625 [1:39:58<1:04:56, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|██████    | 377/625 [1:40:14<1:04:17, 15.55s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 60%|██████    | 378/625 [1:40:29<1:03:47, 15.50s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 61%|██████    | 379/625 [1:40:44<1:03:17, 15.44s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 61%|██████    | 380/625 [1:41:00<1:03:00, 15.43s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 61%|██████    | 381/625 [1:41:16<1:03:35, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 61%|██████    | 382/625 [1:41:32<1:04:10, 15.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 61%|██████▏   | 383/625 [1:41:48<1:03:08, 15.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 61%|██████▏   | 384/625 [1:42:03<1:02:22, 15.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 62%|██████▏   | 385/625 [1:42:19<1:02:36, 15.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 62%|██████▏   | 386/625 [1:42:35<1:03:04, 15.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 62%|██████▏   | 387/625 [1:42:50<1:02:02, 15.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 62%|██████▏   | 388/625 [1:43:01<55:52, 14.15s/it]  

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 62%|██████▏   | 389/625 [1:43:11<51:27, 13.08s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 62%|██████▏   | 390/625 [1:43:23<49:16, 12.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 63%|██████▎   | 391/625 [1:43:34<46:57, 12.04s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 63%|██████▎   | 392/625 [1:43:44<45:09, 11.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 63%|██████▎   | 393/625 [1:43:56<44:35, 11.53s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 63%|██████▎   | 394/625 [1:44:06<43:14, 11.23s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 63%|██████▎   | 395/625 [1:44:18<43:13, 11.28s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 63%|██████▎   | 396/625 [1:44:28<42:25, 11.12s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▎   | 397/625 [1:44:39<41:32, 10.93s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▎   | 398/625 [1:44:49<40:51, 10.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▍   | 399/625 [1:45:00<40:36, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▍   | 400/625 [1:45:11<41:05, 10.96s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▍   | 401/625 [1:45:22<40:33, 10.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▍   | 402/625 [1:45:32<39:53, 10.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 64%|██████▍   | 403/625 [1:45:43<39:24, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 65%|██████▍   | 404/625 [1:45:54<39:37, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 65%|██████▍   | 405/625 [1:46:05<39:51, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 65%|██████▍   | 406/625 [1:46:15<39:13, 10.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 65%|██████▌   | 407/625 [1:46:26<38:44, 10.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 65%|██████▌   | 408/625 [1:46:36<38:19, 10.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 65%|██████▌   | 409/625 [1:46:48<38:51, 10.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 66%|██████▌   | 410/625 [1:46:58<38:34, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 66%|██████▌   | 411/625 [1:47:09<38:06, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 66%|██████▌   | 412/625 [1:47:19<37:40, 10.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 66%|██████▌   | 413/625 [1:47:30<37:37, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 66%|██████▌   | 414/625 [1:47:41<38:13, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 66%|██████▋   | 415/625 [1:47:53<38:51, 11.10s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 67%|██████▋   | 416/625 [1:48:04<38:02, 10.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 67%|██████▋   | 417/625 [1:48:14<37:23, 10.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 67%|██████▋   | 418/625 [1:48:25<37:11, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 67%|██████▋   | 419/625 [1:48:36<37:34, 10.94s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 67%|██████▋   | 420/625 [1:48:47<37:03, 10.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 67%|██████▋   | 421/625 [1:48:57<36:27, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 422/625 [1:49:08<36:01, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 423/625 [1:49:19<36:12, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 424/625 [1:49:30<36:19, 10.85s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 425/625 [1:49:40<35:44, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 426/625 [1:49:51<35:17, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 427/625 [1:50:01<34:53, 10.57s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 68%|██████▊   | 428/625 [1:50:12<35:25, 10.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 69%|██████▊   | 429/625 [1:50:23<35:10, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 69%|██████▉   | 430/625 [1:50:33<34:42, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 69%|██████▉   | 431/625 [1:50:44<34:20, 10.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 69%|██████▉   | 432/625 [1:50:55<34:07, 10.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 69%|██████▉   | 433/625 [1:51:06<34:40, 10.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 69%|██████▉   | 434/625 [1:51:17<34:17, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 70%|██████▉   | 435/625 [1:51:27<33:46, 10.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 70%|██████▉   | 436/625 [1:51:37<33:22, 10.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 70%|██████▉   | 437/625 [1:51:49<34:18, 10.95s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 70%|███████   | 438/625 [1:52:00<34:19, 11.01s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 70%|███████   | 439/625 [1:52:11<33:36, 10.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 70%|███████   | 440/625 [1:52:21<33:06, 10.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 71%|███████   | 441/625 [1:52:32<32:38, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 71%|███████   | 442/625 [1:52:43<32:55, 10.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 71%|███████   | 443/625 [1:52:54<32:48, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 71%|███████   | 444/625 [1:53:04<32:15, 10.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 71%|███████   | 445/625 [1:53:15<31:54, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 71%|███████▏  | 446/625 [1:53:25<31:36, 10.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 447/625 [1:53:36<32:01, 10.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 448/625 [1:53:47<31:45, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 449/625 [1:53:58<31:26, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 450/625 [1:54:08<31:02, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 451/625 [1:54:19<30:58, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 452/625 [1:54:30<31:33, 10.95s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 72%|███████▏  | 453/625 [1:54:42<31:29, 10.99s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 73%|███████▎  | 454/625 [1:54:52<30:59, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 73%|███████▎  | 455/625 [1:55:03<30:26, 10.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 73%|███████▎  | 456/625 [1:55:14<30:26, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 73%|███████▎  | 457/625 [1:55:25<30:40, 10.96s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 73%|███████▎  | 458/625 [1:55:36<30:14, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 73%|███████▎  | 459/625 [1:55:47<30:17, 10.95s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 74%|███████▎  | 460/625 [1:55:57<29:43, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 74%|███████▍  | 461/625 [1:56:08<29:53, 10.94s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 74%|███████▍  | 462/625 [1:56:20<29:52, 11.00s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 74%|███████▍  | 463/625 [1:56:30<29:16, 10.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 74%|███████▍  | 464/625 [1:56:41<28:50, 10.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 74%|███████▍  | 465/625 [1:56:52<28:53, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 75%|███████▍  | 466/625 [1:57:03<29:15, 11.04s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 75%|███████▍  | 467/625 [1:57:14<28:53, 10.97s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 75%|███████▍  | 468/625 [1:57:24<28:20, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 75%|███████▌  | 469/625 [1:57:35<28:01, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 75%|███████▌  | 470/625 [1:57:46<28:03, 10.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 75%|███████▌  | 471/625 [1:57:58<28:25, 11.08s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▌  | 472/625 [1:58:08<27:53, 10.94s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▌  | 473/625 [1:58:19<27:22, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▌  | 474/625 [1:58:29<26:53, 10.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▌  | 475/625 [1:58:41<27:10, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▌  | 476/625 [1:58:52<27:08, 10.93s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▋  | 477/625 [1:59:02<26:40, 10.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 76%|███████▋  | 478/625 [1:59:13<26:37, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 77%|███████▋  | 479/625 [1:59:24<26:10, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 77%|███████▋  | 480/625 [1:59:35<26:29, 10.96s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 77%|███████▋  | 481/625 [1:59:46<26:31, 11.05s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 77%|███████▋  | 482/625 [1:59:57<25:56, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 77%|███████▋  | 483/625 [2:00:08<25:36, 10.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 77%|███████▋  | 484/625 [2:00:19<26:06, 11.11s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 78%|███████▊  | 485/625 [2:00:30<25:38, 10.99s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 78%|███████▊  | 486/625 [2:00:41<25:08, 10.85s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 78%|███████▊  | 487/625 [2:00:51<24:44, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 78%|███████▊  | 488/625 [2:01:02<24:47, 10.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 78%|███████▊  | 489/625 [2:01:14<25:06, 11.08s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 78%|███████▊  | 490/625 [2:01:24<24:31, 10.90s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 79%|███████▊  | 491/625 [2:01:35<24:02, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 79%|███████▊  | 492/625 [2:01:45<23:40, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 79%|███████▉  | 493/625 [2:01:57<24:02, 10.93s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 79%|███████▉  | 494/625 [2:02:07<23:44, 10.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 79%|███████▉  | 495/625 [2:02:18<23:33, 10.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 79%|███████▉  | 496/625 [2:02:29<23:09, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|███████▉  | 497/625 [2:02:39<22:51, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|███████▉  | 498/625 [2:02:51<23:04, 10.90s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|███████▉  | 499/625 [2:03:02<22:49, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|████████  | 500/625 [2:03:12<22:25, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|████████  | 501/625 [2:03:23<22:03, 10.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|████████  | 502/625 [2:03:33<21:54, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 80%|████████  | 503/625 [2:03:45<22:37, 11.13s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 81%|████████  | 504/625 [2:03:56<22:03, 10.93s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 81%|████████  | 505/625 [2:04:06<21:38, 10.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 81%|████████  | 506/625 [2:04:17<21:16, 10.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 81%|████████  | 507/625 [2:04:28<21:25, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 81%|████████▏ | 508/625 [2:04:39<21:10, 10.86s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 81%|████████▏ | 509/625 [2:04:49<20:46, 10.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 82%|████████▏ | 510/625 [2:05:00<20:25, 10.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 82%|████████▏ | 511/625 [2:05:11<20:15, 10.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 82%|████████▏ | 512/625 [2:05:22<20:32, 10.90s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 82%|████████▏ | 513/625 [2:05:33<20:12, 10.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 82%|████████▏ | 514/625 [2:05:43<19:49, 10.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 82%|████████▏ | 515/625 [2:05:54<19:31, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 83%|████████▎ | 516/625 [2:06:04<19:23, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 83%|████████▎ | 517/625 [2:06:16<19:36, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 83%|████████▎ | 518/625 [2:06:26<19:17, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 83%|████████▎ | 519/625 [2:06:37<18:55, 10.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 83%|████████▎ | 520/625 [2:06:47<18:37, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 83%|████████▎ | 521/625 [2:06:58<18:38, 10.75s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▎ | 522/625 [2:07:10<18:41, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▎ | 523/625 [2:07:20<18:22, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▍ | 524/625 [2:07:31<18:01, 10.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▍ | 525/625 [2:07:42<18:11, 10.91s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▍ | 526/625 [2:07:53<18:00, 10.91s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▍ | 527/625 [2:08:04<18:01, 11.04s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 84%|████████▍ | 528/625 [2:08:15<17:39, 10.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 85%|████████▍ | 529/625 [2:08:25<17:15, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 85%|████████▍ | 530/625 [2:08:36<16:54, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 85%|████████▍ | 531/625 [2:08:47<16:53, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 85%|████████▌ | 532/625 [2:08:58<16:54, 10.91s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 85%|████████▌ | 533/625 [2:09:09<16:31, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 85%|████████▌ | 534/625 [2:09:19<16:13, 10.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 86%|████████▌ | 535/625 [2:09:30<15:56, 10.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 86%|████████▌ | 536/625 [2:09:41<15:58, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 86%|████████▌ | 537/625 [2:09:52<15:53, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 86%|████████▌ | 538/625 [2:10:02<15:33, 10.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 86%|████████▌ | 539/625 [2:10:13<15:16, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 86%|████████▋ | 540/625 [2:10:23<15:00, 10.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 87%|████████▋ | 541/625 [2:10:34<15:04, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 87%|████████▋ | 542/625 [2:10:45<14:54, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 87%|████████▋ | 543/625 [2:10:56<14:36, 10.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 87%|████████▋ | 544/625 [2:11:06<14:20, 10.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 87%|████████▋ | 545/625 [2:11:17<14:09, 10.62s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 87%|████████▋ | 546/625 [2:11:28<14:15, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 547/625 [2:11:39<14:21, 11.05s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 548/625 [2:11:50<13:58, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 549/625 [2:12:00<13:38, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 550/625 [2:12:11<13:23, 10.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 551/625 [2:12:23<13:28, 10.93s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 552/625 [2:12:33<13:13, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 88%|████████▊ | 553/625 [2:12:44<12:54, 10.76s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 89%|████████▊ | 554/625 [2:12:54<12:38, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 89%|████████▉ | 555/625 [2:13:05<12:31, 10.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 89%|████████▉ | 556/625 [2:13:16<12:33, 10.92s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 89%|████████▉ | 557/625 [2:13:27<12:13, 10.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 89%|████████▉ | 558/625 [2:13:37<11:57, 10.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 89%|████████▉ | 559/625 [2:13:48<11:42, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 90%|████████▉ | 560/625 [2:13:59<11:40, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 90%|████████▉ | 561/625 [2:14:10<11:33, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 90%|████████▉ | 562/625 [2:14:21<11:16, 10.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 90%|█████████ | 563/625 [2:14:31<11:01, 10.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 90%|█████████ | 564/625 [2:14:41<10:46, 10.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 90%|█████████ | 565/625 [2:14:53<10:48, 10.82s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 91%|█████████ | 566/625 [2:15:04<10:36, 10.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 91%|█████████ | 567/625 [2:15:14<10:21, 10.71s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 91%|█████████ | 568/625 [2:15:25<10:08, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 91%|█████████ | 569/625 [2:15:36<10:09, 10.88s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 91%|█████████ | 570/625 [2:15:47<10:02, 10.96s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 91%|█████████▏| 571/625 [2:15:58<09:44, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 572/625 [2:16:08<09:29, 10.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 573/625 [2:16:19<09:14, 10.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 574/625 [2:16:30<09:12, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 575/625 [2:16:41<09:01, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 576/625 [2:16:51<08:46, 10.74s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 577/625 [2:17:02<08:31, 10.66s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 92%|█████████▏| 578/625 [2:17:12<08:19, 10.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 93%|█████████▎| 579/625 [2:17:24<08:20, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 93%|█████████▎| 580/625 [2:17:34<08:05, 10.79s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 93%|█████████▎| 581/625 [2:17:45<07:50, 10.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 93%|█████████▎| 582/625 [2:17:55<07:37, 10.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 93%|█████████▎| 583/625 [2:18:06<07:26, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 93%|█████████▎| 584/625 [2:18:17<07:25, 10.87s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 94%|█████████▎| 585/625 [2:18:28<07:11, 10.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 94%|█████████▍| 586/625 [2:18:38<06:57, 10.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 94%|█████████▍| 587/625 [2:18:49<06:44, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 94%|█████████▍| 588/625 [2:19:00<06:35, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 94%|█████████▍| 589/625 [2:19:11<06:31, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 94%|█████████▍| 590/625 [2:19:22<06:17, 10.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 95%|█████████▍| 591/625 [2:19:32<06:03, 10.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 95%|█████████▍| 592/625 [2:19:43<05:55, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 95%|█████████▍| 593/625 [2:19:54<05:48, 10.90s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 95%|█████████▌| 594/625 [2:20:05<05:33, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 95%|█████████▌| 595/625 [2:20:15<05:20, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 95%|█████████▌| 596/625 [2:20:26<05:07, 10.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▌| 597/625 [2:20:37<05:00, 10.73s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▌| 598/625 [2:20:48<04:52, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▌| 599/625 [2:20:58<04:38, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▌| 600/625 [2:21:09<04:26, 10.65s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▌| 601/625 [2:21:19<04:14, 10.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▋| 602/625 [2:21:30<04:07, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 96%|█████████▋| 603/625 [2:21:41<03:56, 10.77s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 97%|█████████▋| 604/625 [2:21:52<03:44, 10.68s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 97%|█████████▋| 605/625 [2:22:02<03:32, 10.63s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 97%|█████████▋| 606/625 [2:22:13<03:21, 10.59s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 97%|█████████▋| 607/625 [2:22:24<03:14, 10.80s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 97%|█████████▋| 608/625 [2:22:35<03:03, 10.78s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 97%|█████████▋| 609/625 [2:22:45<02:50, 10.67s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 98%|█████████▊| 610/625 [2:22:55<02:39, 10.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 98%|█████████▊| 611/625 [2:23:06<02:28, 10.60s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 98%|█████████▊| 612/625 [2:23:18<02:23, 11.01s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 98%|█████████▊| 613/625 [2:23:28<02:10, 10.83s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 98%|█████████▊| 614/625 [2:23:39<01:57, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 98%|█████████▊| 615/625 [2:23:50<01:46, 10.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 99%|█████████▊| 616/625 [2:24:01<01:38, 10.89s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 99%|█████████▊| 617/625 [2:24:12<01:26, 10.81s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 99%|█████████▉| 618/625 [2:24:22<01:14, 10.70s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 99%|█████████▉| 619/625 [2:24:32<01:03, 10.61s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 99%|█████████▉| 620/625 [2:24:43<00:53, 10.69s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


 99%|█████████▉| 621/625 [2:24:54<00:43, 10.84s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


100%|█████████▉| 622/625 [2:25:05<00:32, 10.72s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


100%|█████████▉| 623/625 [2:25:15<00:21, 10.64s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


100%|█████████▉| 624/625 [2:25:26<00:10, 10.58s/it]

DataParallel Parameters: 56.574595M
DataParallel Parameters: 56.574595M


100%|█████████▉| 624/625 [2:25:37<00:14, 14.00s/it]

