Ref:

Most of the functions to preprocess data is from (Diffpose). And the structure of the denoiser is based from (Diffpose). Our team improved the beta scheduling in the diffusion generative model. And we modified and implemented the idea from (EDM) instead of the traditional (DDIM) to generate steps in the diffusion model.

(Diffpose) https://github.com/GONGJIA0208/Diffpose

(EDM) https://github.com/NVlabs/edm/tree/main

(DDIM) https://github.com/ermongroup/ddim

In [1]:
import os
import logging
import time
import glob
import argparse

import os.path as path
import numpy as np
import tqdm
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import scipy.sparse as sp
import copy, math
import torch.optim as optim
from torch.nn import init
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset
from functools import reduce

In [2]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/E533-3D-Pose-Estimates-and-Diffusion-Method')

Mounted at /content/drive


In [3]:
from common.h36m_dataset import Human36mDataset
from common.data_utils import fetch_me, read_3d_data_me, create_2d_data
from common.loss import mpjpe, p_mpjpe
from common.utils import test_calculation, define_error_list, print_error

# Define parameters

In [4]:
# Model:
config_model = {"hid_dim": 96,
    "emd_dim": 96,
    "coords_dim": [5,5],
    "num_layer": 5,
    "n_head": 4,
    "dropout": 0.25,
    "n_pts": 17}

In [5]:
# Training:
batch_size = 1024
n_epochs = 30
num_workers = 32

In [6]:
# Testing:
test_times = 1
test_timesteps = 2
test_num_diffusion_timesteps = 24

In [7]:
# Optimizer:
lr = 0.00002
lr_gamma = 0.9
eps = 0.00000001
amsgrad = False
decay = 60
grad_clip = 1.0

In [8]:
# diffusion:
beta_schedule_1 = "linear"
beta_schedule_2 = "cosine"
beta_start = 0.0001
beta_end = 0.001
num_diffusion_timesteps = 51

In [9]:
eta = 0.0   # eta used to control the variances of sigma

In [10]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Create adjacent matrix

In [11]:
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

In [12]:
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [13]:
def adj_mx_from_edges(num_pts, edges, sparse=True):
    edges = np.array(edges, dtype=np.int32)
    data, i, j = np.ones(edges.shape[0]), edges[:, 0], edges[:, 1]
    adj_mx = sp.coo_matrix((data, (i, j)), shape=(num_pts, num_pts), dtype=np.float32)

    # build symmetric adjacency matrix
    adj_mx = adj_mx + adj_mx.T.multiply(adj_mx.T > adj_mx) - adj_mx.multiply(adj_mx.T > adj_mx)
    adj_mx = normalize(adj_mx + sp.eye(adj_mx.shape[0]))
    if sparse:
        adj_mx = sparse_mx_to_torch_sparse_tensor(adj_mx)
    else:
        adj_mx = torch.tensor(adj_mx.todense(), dtype=torch.float)
    return adj_mx

# GCN diffusion model

## ChebNet convolution

In [14]:
class ChebConv(nn.Module):
    """
    The ChebNet convolution operation.

    :param in_c: int, number of input channels.
    :param out_c: int, number of output channels.
    :param K: int, the order of Chebyshev Polynomial.
    """
    def __init__(self, in_c, out_c, K, bias=True, normalize=True):
        super(ChebConv, self).__init__()
        self.normalize = normalize

        self.weight = nn.Parameter(torch.Tensor(K + 1, 1, in_c, out_c))  # [K+1, 1, in_c, out_c]
        init.xavier_normal_(self.weight)

        if bias:
            self.bias = nn.Parameter(torch.Tensor(1, 1, out_c))
            init.zeros_(self.bias)
        else:
            self.register_parameter("bias", None)

        self.K = K + 1

    def forward(self, inputs, graph):
        """
        :param inputs: the input data, [B, N, C]
        :param graph: the graph structure, [N, N]
        :return: convolution result, [B, N, D]
        """
        L = ChebConv.get_laplacian(graph, self.normalize)  # [N, N]
        mul_L = self.cheb_polynomial(L).unsqueeze(1)   # [K, 1, N, N]

        result = torch.matmul(mul_L, inputs)  # [K, B, N, C]

        result = torch.matmul(result, self.weight)  # [K, B, N, D]
        result = torch.sum(result, dim=0) + self.bias  # [B, N, D]

        return result

    def cheb_polynomial(self, laplacian):
        """
        Compute the Chebyshev Polynomial, according to the graph laplacian.

        :param laplacian: the graph laplacian, [N, N].
        :return: the multi order Chebyshev laplacian, [K, N, N].
        """
        N = laplacian.size(0)  # [N, N]
        multi_order_laplacian = torch.zeros([self.K, N, N], device=laplacian.device, dtype=torch.float)  # [K, N, N]
        multi_order_laplacian[0] = torch.eye(N, device=laplacian.device, dtype=torch.float)

        if self.K == 1:
            return multi_order_laplacian
        else:
            multi_order_laplacian[1] = laplacian
            if self.K == 2:
                return multi_order_laplacian
            else:
                for k in range(2, self.K):
                    multi_order_laplacian[k] = 2 * torch.mm(laplacian, multi_order_laplacian[k-1]) - \
                                               multi_order_laplacian[k-2]

        return multi_order_laplacian

    @staticmethod
    def get_laplacian(graph, normalize):
        """
        return the laplacian of the graph.

        :param graph: the graph structure without self loop, [N, N].
        :param normalize: whether to used the normalized laplacian.
        :return: graph laplacian.
        """
        if normalize:

            D = torch.diag(torch.sum(graph, dim=-1) ** (-1 / 2))
            L = torch.eye(graph.size(0), device=graph.device, dtype=graph.dtype) - torch.mm(torch.mm(D, graph), D)
        else:
            D = torch.diag(torch.sum(graph, dim=-1))
            L = D - graph
        return L

## Define Multiheaded attention

In [15]:
class MultiHeadedAttention(nn.Module):

    def __init__(self, h, d_model, dropout=0.1):
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        self.d_k = d_model // h
        self.h = h
        self.linears = nn.ModuleList([copy.deepcopy(nn.Linear(d_model, d_model)) for _ in range(4)])
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, query, key, value, mask=None):

        if mask is not None:
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)

        Q, K, V = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
                   for l, x in zip(self.linears, (query, key, value))]

        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        scores = scores.masked_fill(mask == 0, -1e9)
        p_attn = F.softmax(scores, dim=-1)
        p_attn = self.dropout(p_attn)
        x = torch.matmul(p_attn, V)
        self.attn = p_attn

        x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)

        return self.linears[-1](x)

## Define GCN layer

In [16]:
class LAM_Gconv(nn.Module):

    def __init__(self, in_features, out_features, activation=nn.ReLU(inplace=True)):
        super(LAM_Gconv, self).__init__()
        self.fc = nn.Linear(in_features=in_features, out_features=out_features)
        self.activation = activation

    def laplacian(self, A_hat):
        D_hat = (torch.sum(A_hat, 0) + 1e-5) ** (-0.5)
        L = D_hat * A_hat * D_hat
        return L

    def laplacian_batch(self, A_hat):
        batch, N = A_hat.shape[:2]
        D_hat = (torch.sum(A_hat, 1) + 1e-5) ** (-0.5)
        L = D_hat.view(batch, N, 1) * A_hat * D_hat.view(batch, 1, N)
        return L

    def forward(self, X, A):
        batch = X.size(0)
        A_hat = A.unsqueeze(0).repeat(batch, 1, 1)
        X = self.fc(torch.bmm(self.laplacian_batch(A_hat), X))
        if self.activation is not None:
            X = self.activation(X)
        return X

## Define graphnet

In [17]:
class GraphNet(nn.Module):

    def __init__(self, in_features=2, out_features=2, n_pts=21):
        super(GraphNet, self).__init__()

        self.A_hat = Parameter(torch.eye(n_pts).float(), requires_grad=True)
        self.gconv1 = LAM_Gconv(in_features, in_features * 2)
        self.gconv2 = LAM_Gconv(in_features * 2, out_features, activation=None)

    def forward(self, X):
        X_0 = self.gconv1(X, self.A_hat)
        X_1 = self.gconv2(X_0, self.A_hat)
        return X_1

## Define residual differentiation

### Define _GraphConv

In [18]:
class _GraphConv(nn.Module):
    def __init__(self, input_dim, output_dim, p_dropout=None):
        super(_GraphConv, self).__init__()

        self.gconv = ChebConv(input_dim, output_dim, K=2)
        self.relu = nn.ReLU()

        if p_dropout is not None:
            self.dropout = nn.Dropout(p_dropout)
        else:
            self.dropout = None

    def forward(self, x, adj):
        x = self.gconv(x, adj)
        if self.dropout is not None:
            x = self.dropout(self.relu(x))

        x = self.relu(x)
        return x

In [19]:
class _ResChebGC_diff(nn.Module):
    def __init__(self, adj, input_dim, output_dim, emd_dim, hid_dim, p_dropout):
        super(_ResChebGC_diff, self).__init__()
        self.adj = adj
        self.gconv1 = _GraphConv(input_dim, hid_dim, p_dropout)
        self.gconv2 = _GraphConv(hid_dim, output_dim, p_dropout)
        ### time embedding ###
        self.temb_proj = torch.nn.Linear(emd_dim,hid_dim)

    def forward(self, x, temb):
        residual = x
        out = self.gconv1(x, self.adj)
        out = out + self.temb_proj(temb*torch.sigmoid(temb))[:, None, :]
        out = self.gconv2(out, self.adj)
        return residual + out

## Define encoder for self-attention

In [20]:
class LayerNorm(nn.Module):

    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        # features=layer.size=512
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2

In [21]:
class SublayerConnection(nn.Module):

    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))

In [22]:
class GraAttenLayer(nn.Module):
    "Encoder is made up of self-attn and feed forward (defined below)"

    def __init__(self, size, self_attn, feed_forward, dropout):
        super(GraAttenLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = nn.ModuleList([copy.deepcopy(SublayerConnection(size, dropout)) for _ in range(2)])
        self.size = size

    def forward(self, x, mask):
        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
        return self.sublayer[1](x, self.feed_forward)

## Define timestep embedding

In [23]:
def SinusoidalEmbeddings(timesteps, embedding_dim):

    half_dim = embedding_dim // 2
    emb = math.log(10000) / (half_dim - 1)
    emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
    emb = emb.to(device=timesteps.device)
    emb = timesteps.float()[:, None] * emb[None, :]
    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
    return emb


# Build GCN diffusion model

In [24]:
class GCNdiff(nn.Module):
    def __init__(self, adj, config):
        super(GCNdiff, self).__init__()

        self.adj = adj
        self.config = config
        ### load gcn configuration ###
        self.hid_dim, self.emd_dim, self.coords_dim, num_layers, n_head, dropout, n_pts = \
            config['hid_dim'], config['emd_dim'], config['coords_dim'], \
                config['num_layer'], config['n_head'], config['dropout'], config['n_pts']

        self.hid_dim = self.hid_dim
        self.emd_dim = self.hid_dim*4

        ### Generate Graphformer  ###
        self.n_layers = num_layers

        _gconv_input = ChebConv(in_c=self.coords_dim[0], out_c=self.hid_dim, K=2)
        _gconv_layers = []
        _attention_layer = []

        dim_model = self.hid_dim
        c = copy.deepcopy
        attn = MultiHeadedAttention(n_head, dim_model)
        gcn = GraphNet(in_features=dim_model, out_features=dim_model, n_pts=n_pts)

        for i in range(num_layers):
            _gconv_layers.append(_ResChebGC_diff(adj=adj, input_dim=self.hid_dim, output_dim=self.hid_dim,
                emd_dim=self.emd_dim, hid_dim=self.hid_dim, p_dropout=0.1))
            _attention_layer.append(GraAttenLayer(dim_model, c(attn), c(gcn), dropout))

        self.gconv_input = _gconv_input
        self.gconv_layers = nn.ModuleList(_gconv_layers)
        self.atten_layers = nn.ModuleList(_attention_layer)
        self.gconv_output = ChebConv(in_c=dim_model, out_c=self.coords_dim[1], K=2)


        ### diffusion configuration  ###
        self.temb = nn.Module()
        self.temb.dense = nn.ModuleList([
            torch.nn.Linear(self.hid_dim,self.emd_dim),
            torch.nn.Linear(self.emd_dim,self.emd_dim),
        ])


    def forward(self, x, mask, t, cemd):
        # timestep embedding
        temb = SinusoidalEmbeddings(t, self.hid_dim)
        temb = self.temb.dense[0](temb)
        temb = temb*torch.sigmoid(temb)
        temb = self.temb.dense[1](temb)

        out = self.gconv_input(x, self.adj)
        for i in range(self.n_layers):
            out = self.atten_layers[i](out, mask)
            out = self.gconv_layers[i](out, temb)
        out = self.gconv_output(out, self.adj)
        return out

# Create diffusion model

In [25]:
def create_diffusion_model():
    edges = torch.tensor([[0, 1], [1, 2], [2, 3],
                        [0, 4], [4, 5], [5, 6],
                        [0, 7], [7, 8], [8, 9], [9,10],
                        [8, 11], [11, 12], [12, 13],
                        [8, 14], [14, 15], [15, 16]], dtype=torch.long)
    adj = adj_mx_from_edges(num_pts=17, edges=edges, sparse=False)
    model_diff = GCNdiff(adj.to(device), config_model).to(device)
    model_diff = torch.nn.DataParallel(model_diff)
    return model_diff

In [26]:
model_diff = create_diffusion_model()

# GCN Pose model

In [27]:
class _ResChebGC(nn.Module):
    def __init__(self, adj, input_dim, output_dim, hid_dim, p_dropout):
        super(_ResChebGC, self).__init__()
        self.adj = adj
        self.gconv1 = _GraphConv(input_dim, hid_dim, p_dropout)
        self.gconv2 = _GraphConv(hid_dim, output_dim, p_dropout)

    def forward(self, x):
        residual = x
        out = self.gconv1(x, self.adj)
        out = self.gconv2(out, self.adj)
        return residual + out

In [28]:
class GCNpose(nn.Module):
    def __init__(self, adj, config):
        super(GCNpose, self).__init__()

        self.adj = adj
        self.config = config
        ### load gcn configuration ###
        self.hid_dim, self.emd_dim, self.coords_dim, num_layers, n_head, dropout, n_pts = \
            config['hid_dim'], config['emd_dim'], config['coords_dim'], \
                config['num_layer'], config['n_head'], config['dropout'], config['n_pts']

        self.hid_dim = self.hid_dim
        self.emd_dim = self.hid_dim*4

        ### Generate Graphformer  ###
        self.n_layers = num_layers

        _gconv_input = ChebConv(in_c=self.coords_dim[0], out_c=self.hid_dim, K=2)
        _gconv_layers = []
        _attention_layer = []

        dim_model = self.hid_dim
        c = copy.deepcopy
        attn = MultiHeadedAttention(n_head, dim_model)
        gcn = GraphNet(in_features=dim_model, out_features=dim_model, n_pts=n_pts)

        for i in range(num_layers):
            _gconv_layers.append(_ResChebGC(adj=adj, input_dim=self.hid_dim, output_dim=self.hid_dim,
                                            hid_dim=self.hid_dim, p_dropout=0.1))
            _attention_layer.append(GraAttenLayer(dim_model, c(attn), c(gcn), dropout))

        self.gconv_input = _gconv_input
        self.gconv_layers = nn.ModuleList(_gconv_layers)
        self.atten_layers = nn.ModuleList(_attention_layer)
        self.gconv_output = ChebConv(in_c=dim_model, out_c=3, K=2)


        ### diffusion configuration  ###
        self.temb = nn.Module()
        self.temb.dense = nn.ModuleList([
            torch.nn.Linear(self.hid_dim,self.emd_dim),
            torch.nn.Linear(self.emd_dim,self.emd_dim),
        ])


    def forward(self, x, mask):
        out = self.gconv_input(x, self.adj)
        for i in range(self.n_layers):
            out = self.atten_layers[i](out, mask)
            out = self.gconv_layers[i](out)
        out = self.gconv_output(out, self.adj)
        return out

# Create pose model

In [29]:
def create_pose_model():

        # [input dimension u v, output dimension x y z]
        config_model['coords_dim'] = [2,3]
        edges = torch.tensor([[0, 1], [1, 2], [2, 3],
                            [0, 4], [4, 5], [5, 6],
                            [0, 7], [7, 8], [8, 9], [9,10],
                            [8, 11], [11, 12], [12, 13],
                            [8, 14], [14, 15], [15, 16]], dtype=torch.long)
        adj = adj_mx_from_edges(num_pts=17, edges=edges, sparse=False)
        model_pose = GCNpose(adj.to(device), config_model).to(device)
        model_pose = torch.nn.DataParallel(model_pose)
        logging.info('initialize model randomly')
        return model_pose

In [30]:
model_pose = create_pose_model()

# Prepare data

In [31]:
dataset = Human36mDataset("./data/data_3d_h36m.npz")
subjects_train = ['S1', 'S5', 'S6', 'S7', 'S8']
subjects_test = ['S9', 'S11']
dataset = read_3d_data_me(dataset)
keypoints = create_2d_data("./data/data_2d_h36m_cpn_ft_h36m_dbb_gmm.npz", dataset)

# Train the network

## Generate pose

In [32]:
class PoseGenerator_gmm(Dataset):
    def __init__(self, poses_3d, poses_2d_gmm, actions, camerapara):
        assert poses_3d is not None

        self._poses_3d = np.concatenate(poses_3d)
        self._poses_2d_gmm = np.concatenate(poses_2d_gmm)
        self._actions = reduce(lambda x, y: x + y, actions)
        self._camerapara = np.concatenate(camerapara)
        self._kernel_n = self._poses_2d_gmm.shape[2]

        self._poses_3d[:,:,:] = self._poses_3d[:,:,:]-self._poses_3d[:,:1,:]

        assert self._poses_3d.shape[0] == self._poses_2d_gmm.shape[0] and self._poses_3d.shape[0] == len(self._actions)
        print('Generating {} poses...'.format(len(self._actions)))

    def __getitem__(self, index):
        out_pose_3d = self._poses_3d[index]
        out_pose_2d_gmm = self._poses_2d_gmm[index]
        out_action = self._actions[index]
        out_camerapara = self._camerapara[index]

        # randomly select a kernel from gmm
        out_pose_2d_kernel = np.zeros([out_pose_2d_gmm.shape[0],out_pose_2d_gmm.shape[2]])
        for i in range(out_pose_2d_gmm.shape[0]):
            out_pose_2d_kernel[i] = out_pose_2d_gmm[i,np.random.choice(self._kernel_n, 1, p=out_pose_2d_gmm[i,:,0]).item()]

        # generate uvxyz and uvxyz noise scale
        kernel_mean = out_pose_2d_kernel[:,1:3]
        kernel_variance = out_pose_2d_kernel[:,3:]

        out_pose_uvxyz = np.concatenate((kernel_mean,out_pose_3d),axis=1)
        out_pose_noise_scale = np.concatenate((kernel_variance,np.ones(out_pose_3d.shape)),axis=1)

        out_pose_uvxyz = torch.from_numpy(out_pose_uvxyz).float()
        out_pose_noise_scale = torch.from_numpy(out_pose_noise_scale).float()
        out_pose_2d = torch.from_numpy(kernel_mean).float()
        out_pose_3d = torch.from_numpy(out_pose_3d).float()
        out_camerapara = torch.from_numpy(out_camerapara).float()

        return out_pose_uvxyz, out_pose_noise_scale, out_pose_2d, out_pose_3d, out_action, out_camerapara

    def __len__(self):
        return len(self._actions)

In [33]:
cudnn.benchmark = True
best_p1, best_epoch = 1000, 0
stride = 1

In [34]:
poses_train, poses_train_2d, actions_train, camerapara_train\
            = fetch_me(subjects_train, dataset, keypoints, None, stride)

In [35]:
data_loader = train_loader = data.DataLoader(
            PoseGenerator_gmm(poses_train, poses_train_2d, actions_train, camerapara_train),
            batch_size=batch_size, shuffle=True,\
                num_workers=num_workers, pin_memory=True)

Generating 1559752 poses...




## Implement exponential moving average(EMA) in the DDIM model

In [36]:
class EMAHelper(object):
    def __init__(self, mu=0.999):
        self.mu = mu
        self.shadow = {}

    def register(self, module):
        if isinstance(module, nn.DataParallel):
            module = module.module
        for name, param in module.named_parameters():
            if param.requires_grad:
                self.shadow[name] = param.data.clone()

    def update(self, module):
        if isinstance(module, nn.DataParallel):
            module = module.module
        for name, param in module.named_parameters():
            if param.requires_grad:
                self.shadow[name].data = (
                    1. - self.mu) * param.data + self.mu * self.shadow[name].data

    def ema(self, module):
        if isinstance(module, nn.DataParallel):
            module = module.module
        for name, param in module.named_parameters():
            if param.requires_grad:
                param.data.copy_(self.shadow[name].data)

    def ema_copy(self, module):
        if isinstance(module, nn.DataParallel):
            inner_module = module.module
            module_copy = type(inner_module)(
                inner_module.config).to(inner_module.config.device)
            module_copy.load_state_dict(inner_module.state_dict())
            module_copy = nn.DataParallel(module_copy)
        else:
            module_copy = type(module)(module.config).to(module.config.device)
            module_copy.load_state_dict(module.state_dict())
        # module_copy = copy.deepcopy(module)
        self.ema(module_copy)
        return module_copy

    def state_dict(self):
        return self.shadow

    def load_state_dict(self, state_dict):
        self.shadow = state_dict

In [37]:
ema_helper = EMAHelper()
ema_helper.register(model_diff)

In [38]:
optimizer = optim.Adam(model_diff.parameters(), lr=lr, weight_decay=0.000,
                          betas=(0.9, 0.999), amsgrad=amsgrad,
                          eps=eps)

In [39]:
start_epoch, step = 0, 0
lr_init, decay, gamma = lr, decay, lr_gamma

## Computes and stores the average and current value

In [40]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def lr_decay(optimizer, step, lr, decay_step, gamma):
    lr = lr * gamma ** (step / decay_step)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

## Generate Diffusion sequence parameters

In [46]:
def get_beta_schedule(beta_schedule, *, beta_start, beta_end, num_diffusion_timesteps):


    if beta_schedule == "cosine":
        x = np.linspace(beta_start, beta_end, num_diffusion_timesteps+1, dtype=np.float64)
        alphas = np.cos(((x / num_diffusion_timesteps) + 0.008) / (1 + 0.008) * math.pi * 0.5) ** 2
        alphas = alphas / alphas[0]
        betas = 1 - (alphas[1:] / alphas[:-1])
        betas = np.clip(betas, 0.01, 0.99)
    elif beta_schedule == "linear":
        betas = np.linspace(
            beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
        )
    assert betas.shape == (num_diffusion_timesteps,)
    return betas

In [45]:
# Generate Diffusion sequence parameters using linear beta schedule
betas_1 = get_beta_schedule(
    beta_schedule=beta_schedule_2,
    beta_start=beta_start,
    beta_end=beta_end,
    num_diffusion_timesteps=num_diffusion_timesteps,
)
betas_1 = torch.from_numpy(betas_1).float().to(device)
num_timesteps = betas_1.shape[0]

TypeError: ignored

In [47]:
# Generate Diffusion sequence parameters using cosine beta schedule
betas_2 = get_beta_schedule(
    beta_schedule=beta_schedule_2,
    beta_start=beta_start,
    beta_end=beta_end,
    num_diffusion_timesteps=num_diffusion_timesteps,
)
betas_2 = torch.from_numpy(betas_2).float().to(device)
num_timesteps = betas_2.shape[0]

In [48]:
src_mask = torch.tensor([[[True, True, True, True, True, True, True, True, True, True,
                                True, True, True, True, True, True, True]]]).to(device)

## Generalized steps

In [49]:
def compute_alpha(beta, t):
    beta = torch.cat([torch.zeros(1).to(beta.device), beta], dim=0)
    a = (1 - beta).cumprod(dim=0).index_select(0, t + 1).view(-1, 1, 1)
    return a

In [50]:
def generalized_steps(x, src_mask, seq, model, b, **kwargs):
    with torch.no_grad():
        n = x.size(0)
        seq_next = [-1] + list(seq[:-1])
        x0_preds = []
        xs = [x]
        for i, j in zip(reversed(seq), reversed(seq_next)):
            t = (torch.ones(n) * i).to(device)
            next_t = (torch.ones(n) * j).to(device)
            at = compute_alpha(b, t.long())
            at_next = compute_alpha(b, next_t.long())
            xt = xs[-1]
            et = model(xt, src_mask, t.float(), 0)
            x0_t = (xt - et * (1 - at).sqrt()) / at.sqrt()
            x0_preds.append(x0_t)
            c1 = (
                kwargs.get("eta", 0) * ((1 - at / at_next) * (1 - at_next) / (1 - at)).sqrt()
            )
            c2 = ((1 - at_next) - c1 ** 2).sqrt()
            xt_next = at_next.sqrt() * x0_t + c1 * torch.randn_like(x) + c2 * et
            xs.append(xt_next)

    return xs, x0_preds

## Use second order method to generate steps

In [51]:
def generalized_steps_edm(x, src_mask, seq, model, b):
    with torch.no_grad():
        n = x.size(0)
        seq_next = [-1] + list(seq[:-1])
        x0_preds = []
        xs = [x]
        for i, j in zip(reversed(seq), reversed(seq_next)):
            t = (torch.ones(n) * i).to(device)
            gamma = min(40 / num_diffusion_timesteps, np.sqrt(2) - 1) if 0.003 <= i <= 0.007 else 0
            t_hat = i + gamma * i
            next_t = (torch.ones(n) * j).to(device)
            at = compute_alpha(b, t.long())
            at_next = compute_alpha(b, next_t.long())
            xt = xs[-1]
            x_hat = xt + np.sqrt(t_hat ** 2 - i ** 2) * 1.003 * torch.randn_like(x)

            denoised = model(x_hat, src_mask, t.float(), 0)

            d_cur = (xt - denoised * (1 - at).sqrt())
            a_ratio = at_next / at
            x_next = a_ratio.sqrt() * d_cur

            # Apply 2nd order correction.
            if i < num_diffusion_timesteps - 1:
                denoised_2 = model(x_next, src_mask, next_t.float(), 0)
                eps = ((1-at_next+at)/(1-at)-1/(a_ratio*(1-at))).sqrt()
                drift = (1-at_next-(1-at_next+at)/(1-at)+1/(a_ratio*(1-at))).sqrt()
                x_next += eps * torch.randn_like(x) + drift*denoised_2

            xs.append(x_next)

    return xs, x0_preds

In [52]:
def generalized_steps_edm1(x, src_mask, seq, model, b):
    with torch.no_grad():
        n = x.size(0)
        seq_next = [-1] + list(seq[:-1])
        x0_preds = []
        xs = [x]
        for i, j in zip(reversed(seq), reversed(seq_next)):
            t = (torch.ones(n) * i).to(device)
            gamma = min(40 / num_diffusion_timesteps, np.sqrt(2) - 1) if 0.003 <= i <= 0.007 else 0
            t_hat = i + gamma * i
            next_t = (torch.ones(n) * j).to(device)
            at = compute_alpha(b, t.long())
            at_next = compute_alpha(b, next_t.long())
            xt = xs[-1]
            x_hat = xt + np.sqrt(t_hat ** 2 - i ** 2) * 1.003 * torch.randn_like(x)


            denoised = model(x_hat, src_mask, t.float(), 0)
            x_diff = (xt - denoised) / t_hat
            x_next = x_hat + (j - t_hat) * x_diff

            # Apply 2nd order correction.
            if i != seq[1] and i != seq[0]:
                denoised_2 = model(x_next, src_mask, next_t.float(), 0)
                d_prime = (x_next - denoised_2) / j
                x_next = x_hat + (j - t_hat) * (0.5 * x_diff + 0.5 * d_prime)

            xs.append(x_next)

    return xs, x0_preds

## Define mpjpe

In [53]:
def mpjpe(predicted, target):
    """
    Mean per-joint position error (i.e. mean Euclidean distance),
    often referred to as "Protocol #1" in many papers.
    """
    return torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1))

In [54]:
def p_mpjpe(predicted, target):
    """
    Pose error: MPJPE after rigid alignment (scale, rotation, and translation),
    often referred to as "Protocol #2" in many papers.
    """
    assert predicted.shape == target.shape

    muX = np.mean(target, axis=1, keepdims=True)
    muY = np.mean(predicted, axis=1, keepdims=True)

    X0 = target - muX
    Y0 = predicted - muY

    normX = np.sqrt(np.sum(X0 ** 2, axis=(1, 2), keepdims=True))
    normY = np.sqrt(np.sum(Y0 ** 2, axis=(1, 2), keepdims=True))

    X0 /= normX
    Y0 /= normY

    H = np.matmul(X0.transpose(0, 2, 1), Y0)
    U, s, Vt = np.linalg.svd(H)
    V = Vt.transpose(0, 2, 1)
    R = np.matmul(V, U.transpose(0, 2, 1))

    # Avoid improper rotations (reflections), i.e. rotations with det(R) = -1
    sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1))
    V[:, :, -1] *= sign_detR
    s[:, -1] *= sign_detR.flatten()
    R = np.matmul(V, U.transpose(0, 2, 1))  # Rotation

    tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2)

    a = tr * normX / normY  # Scale
    t = muX - a * np.matmul(muY, R)  # Translation

    # Perform rigid transformation on the input
    predicted_aligned = a * np.matmul(predicted, R) + t

    # Return MPJPE
    return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape) - 1))

## Find the best epoch with best MPJPE

In [55]:
def test_hyber(betas = betas_1, test_times=1, test_timesteps=2, test_num_diffusion_timesteps=24, device=device, is_train=False, edm=False, edm_1=False):
    cudnn.benchmark = True

    test_times, test_timesteps, test_num_diffusion_timesteps, stride = \
        test_times, test_timesteps, test_num_diffusion_timesteps, 1

    poses_valid, poses_valid_2d, actions_valid, camerapara_valid = \
        fetch_me(subjects_test, dataset, keypoints, None, stride)
    data_loader = valid_loader = data.DataLoader(
        PoseGenerator_gmm(poses_valid, poses_valid_2d, actions_valid, camerapara_valid),
        batch_size=batch_size, shuffle=False,
        num_workers=num_workers, pin_memory=True)


    data_start = time.time()
    data_time = 0

    # Switch to test mode
    torch.set_grad_enabled(False)
    model_diff.eval()
    model_pose.eval()

    skip = test_num_diffusion_timesteps // test_timesteps
    seq = range(0, test_num_diffusion_timesteps, skip)

    epoch_loss_3d_pos = AverageMeter()
    epoch_loss_3d_pos_procrustes = AverageMeter()
    test_action_list = ['Directions','Discussion','Eating','Greeting','Phoning','Photo','Posing','Purchases','Sitting',\
        'SittingDown','Smoking','Waiting','WalkDog','Walking','WalkTogether']
    action_error_sum = define_error_list(test_action_list)

    for i, (_, input_noise_scale, input_2d, targets_3d, input_action, camera_para) in enumerate(data_loader):
        data_time += time.time() - data_start

        input_noise_scale, input_2d, targets_3d = \
            input_noise_scale.to(device), input_2d.to(device), targets_3d.to(device)

        # build uvxyz
        inputs_xyz = model_pose(input_2d, src_mask)
        inputs_xyz[:, :, :] -= inputs_xyz[:, :1, :]
        input_uvxyz = torch.cat([input_2d,inputs_xyz],dim=2)

        # generate distribution
        input_uvxyz = input_uvxyz.repeat(test_times,1,1)
        input_noise_scale = input_noise_scale.repeat(test_times,1,1)
        # select diffusion step
        t = torch.ones(input_uvxyz.size(0)).type(torch.LongTensor).to(device)*test_num_diffusion_timesteps

        # prepare the diffusion parameters
        x = input_uvxyz.clone()
        e = torch.randn_like(input_uvxyz)
        b = betas
        e = e*input_noise_scale
        a = (1-b).cumprod(dim=0).index_select(0, t).view(-1, 1, 1)
        # x = x * a.sqrt() + e * (1.0 - a).sqrt()

        if edm:
          output_uvxyz = generalized_steps_edm(x, src_mask, seq, model_diff, betas, eta=eta)
        elif edm_1:
          output_uvxyz = generalized_steps_edm1(x, src_mask, seq, model_diff, betas, eta=eta)
        else:
          output_uvxyz = generalized_steps(x, src_mask, seq, model_diff, betas, eta=eta)
        output_uvxyz = output_uvxyz[0][-1]
        output_uvxyz = torch.mean(output_uvxyz.reshape(test_times,-1,17,5),0)
        output_xyz = output_uvxyz[:,:,2:]
        output_xyz[:, :, :] -= output_xyz[:, :1, :]
        targets_3d[:, :, :] -= targets_3d[:, :1, :]
        epoch_loss_3d_pos.update(mpjpe(output_xyz, targets_3d).item() * 100.0, targets_3d.size(0))
        epoch_loss_3d_pos_procrustes.update(p_mpjpe(output_xyz.cpu().numpy(), targets_3d.cpu().numpy()).item() * 100.0, targets_3d.size(0))\

        data_start = time.time()

        action_error_sum = test_calculation(output_xyz, targets_3d, input_action, action_error_sum, None, None)

        if i%100 == 0 and i != 0:
            print('({batch}/{size}) Data: {data:.6f}s | MPJPE: {e1: .4f} | P-MPJPE: {e2: .4f}'\
                    .format(batch=i + 1, size=len(data_loader), data=data_time, e1=epoch_loss_3d_pos.avg,\
                        e2=epoch_loss_3d_pos_procrustes.avg))
            print('sum ({batch}/{size}) Data: {data:.6f}s | MPJPE: {e1: .4f} | P-MPJPE: {e2: .4f}'\
            .format(batch=i + 1, size=len(data_loader), data=data_time, e1=epoch_loss_3d_pos.avg,\
                e2=epoch_loss_3d_pos_procrustes.avg))

    p1, p2 = print_error(None, action_error_sum, is_train)

    return p1, p2

### Model 1(linear beta schedule)

In [None]:
'''
loss_list_beta_1 = []
p1_list_beta_1 = []
p2_list_beta_1 = []
'''

In [None]:
'''
for epoch in range(start_epoch, n_epochs):
    data_start = time.time()
    data_time = 0

    # Switch to train mode
    torch.set_grad_enabled(True)
    model_diff.train()

    epoch_loss_diff = AverageMeter()

    for i, (targets_uvxyz, targets_noise_scale, _, targets_3d, _, _) in enumerate(data_loader):
        data_time += time.time() - data_start
        step += 1

        # to cuda
        targets_uvxyz, targets_noise_scale, targets_3d = \
            targets_uvxyz.to(device), targets_noise_scale.to(device), targets_3d.to(device)

        # generate nosiy sample based on seleted time t and beta
        n = targets_3d.size(0)
        x = targets_uvxyz
        e = torch.randn_like(x)
        b = betas_1
        t = torch.randint(low=0, high=num_timesteps,
                          size=(n // 2 + 1,)).to(device)
        t = torch.cat([t, num_timesteps - t - 1], dim=0)[:n]
        e = e*(targets_noise_scale)
        a = (1-b).cumprod(dim=0).index_select(0, t).view(-1, 1, 1)
        # generate x_t (refer to DDIM equation)
        x = x * a.sqrt() + e * (1.0 - a).sqrt()

        # predict noise
        output_noise = model_diff(x, src_mask, t.float(), 0)
        loss_diff = (e - output_noise).square().sum(dim=(1, 2)).mean(dim=0)

        optimizer.zero_grad()
        loss_diff.backward()

        torch.nn.utils.clip_grad_norm_(
            model_diff.parameters(), grad_clip)
        optimizer.step()

        epoch_loss_diff.update(loss_diff.item(), n)

        ema_helper.update(model_diff)

        if i%100 == 0 and i != 0:
            print('| Epoch{:0>4d}: {:0>4d}/{:0>4d} | Step {:0>6d} | Data: {:.6f} | Loss: {:.6f} |'\
                .format(epoch, i+1, len(data_loader), step, data_time, epoch_loss_diff.avg))

    data_start = time.time()

    if epoch % decay == 0:
        lr_now = lr_decay(optimizer, epoch, lr_init, decay, gamma)



    print('test the performance of current model')

    p1, p2 = test_hyber(is_train=True)

    if p1 < best_p1:
        best_p1 = p1
        best_epoch = epoch
    print('| Best Epoch: {:0>4d} MPJPE: {:.2f} | Epoch: {:0>4d} MPJEPE: {:.2f} PA-MPJPE: {:.2f} |'\
        .format(best_epoch, best_p1, epoch, p1, p2))

    loss_list_beta_1.append(epoch_loss_diff.avg)
    p1_list_beta_1.append(p1)
    p2_list_beta_1.append(p2)
'''

Process Process-30:
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4e3c372950>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1442, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.10/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/usr/lib/python3.10/selectors.py", line 416, in select
    fd_event_list = self._selector.poll(timeout)
KeyboardInterrupt: 


KeyboardInterrupt: ignored

In [None]:
'''
with open('loss1.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in loss_list_beta_1))
with open('p1_1.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p1_list_beta_1))
with open('p2_1.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p2_list_beta_1))
'''

### Model 2(cosine beta schedule)

In [56]:
loss_list_beta_2 = []
p1_list_beta_2 = []
p2_list_beta_2 = []

In [None]:
for epoch in range(start_epoch, n_epochs):
    data_start = time.time()
    data_time = 0

    # Switch to train mode
    torch.set_grad_enabled(True)
    model_diff.train()

    epoch_loss_diff = AverageMeter()

    for i, (targets_uvxyz, targets_noise_scale, _, targets_3d, _, _) in enumerate(data_loader):
        data_time += time.time() - data_start
        step += 1

        # to cuda
        targets_uvxyz, targets_noise_scale, targets_3d = \
            targets_uvxyz.to(device), targets_noise_scale.to(device), targets_3d.to(device)

        # generate nosiy sample based on seleted time t and beta
        n = targets_3d.size(0)
        x = targets_uvxyz
        e = torch.randn_like(x)
        b = betas_1
        t = torch.randint(low=0, high=num_timesteps,
                          size=(n // 2 + 1,)).to(device)
        t = torch.cat([t, num_timesteps - t - 1], dim=0)[:n]
        e = e*(targets_noise_scale)
        a = (1-b).cumprod(dim=0).index_select(0, t).view(-1, 1, 1)
        # generate x_t (refer to DDIM equation)
        x = x * a.sqrt() + e * (1.0 - a).sqrt()

        # predict noise
        output_noise = model_diff(x, src_mask, t.float(), 0)
        loss_diff = (e - output_noise).square().sum(dim=(1, 2)).mean(dim=0)

        optimizer.zero_grad()
        loss_diff.backward()

        torch.nn.utils.clip_grad_norm_(
            model_diff.parameters(), grad_clip)
        optimizer.step()

        epoch_loss_diff.update(loss_diff.item(), n)

        ema_helper.update(model_diff)

        if i%100 == 0 and i != 0:
            print('| Epoch{:0>4d}: {:0>4d}/{:0>4d} | Step {:0>6d} | Data: {:.6f} | Loss: {:.6f} |'\
                .format(epoch, i+1, len(data_loader), step, data_time, epoch_loss_diff.avg))

    data_start = time.time()

    if epoch % decay == 0:
        lr_now = lr_decay(optimizer, epoch, lr_init, decay, gamma)



    print('test the performance of current model')

    p1, p2 = test_hyber(is_train=True)

    if p1 < best_p1:
        best_p1 = p1
        best_epoch = epoch
    print('| Best Epoch: {:0>4d} MPJPE: {:.2f} | Epoch: {:0>4d} MPJEPE: {:.2f} PA-MPJPE: {:.2f} |'\
        .format(best_epoch, best_p1, epoch, p1, p2))

    loss_list_beta_2.append(epoch_loss_diff.avg)
    p1_list_beta_2.append(p1)
    p2_list_beta_2.append(p2)




| Epoch0000: 0101/1524 | Step 000101 | Data: 2055.982456 | Loss: 61.300082 |
| Epoch0000: 0201/1524 | Step 000201 | Data: 5925.267459 | Loss: 52.932366 |
| Epoch0000: 0301/1524 | Step 000301 | Data: 11616.311369 | Loss: 47.880242 |
| Epoch0000: 0401/1524 | Step 000401 | Data: 19180.343270 | Loss: 44.329915 |
| Epoch0000: 0501/1524 | Step 000501 | Data: 28575.217030 | Loss: 41.695045 |
| Epoch0000: 0601/1524 | Step 000601 | Data: 39768.241064 | Loss: 39.659411 |
| Epoch0000: 0701/1524 | Step 000701 | Data: 52749.220656 | Loss: 38.046888 |
| Epoch0000: 0801/1524 | Step 000801 | Data: 67519.959991 | Loss: 36.734080 |
| Epoch0000: 0901/1524 | Step 000901 | Data: 84096.649071 | Loss: 35.646042 |
| Epoch0000: 1001/1524 | Step 001001 | Data: 102475.016578 | Loss: 34.709231 |
| Epoch0000: 1101/1524 | Step 001101 | Data: 122656.485221 | Loss: 33.892972 |
| Epoch0000: 1201/1524 | Step 001201 | Data: 144631.592710 | Loss: 33.176843 |
| Epoch0000: 1301/1524 | Step 001301 | Data: 168454.569002 | Lo

In [None]:
with open('loss2.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in loss_list_beta_2))
with open('p1_2.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p1_list_beta_2))
with open('p2_2.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p2_list_beta_2))

### Model 3(Use edm to replace DDIM)

In [None]:
'''
loss_list_beta_1_edm = []
p1_list_beta_1_edm = []
p2_list_beta_1_edm = []
'''

In [None]:
'''
for epoch in range(start_epoch, n_epochs):
    data_start = time.time()
    data_time = 0

    # Switch to train mode
    torch.set_grad_enabled(True)
    model_diff.train()

    epoch_loss_diff = AverageMeter()

    for i, (targets_uvxyz, targets_noise_scale, _, targets_3d, _, _) in enumerate(data_loader):
        data_time += time.time() - data_start
        step += 1

        # to cuda
        targets_uvxyz, targets_noise_scale, targets_3d = \
            targets_uvxyz.to(device), targets_noise_scale.to(device), targets_3d.to(device)

        # generate nosiy sample based on seleted time t and beta
        n = targets_3d.size(0)
        x = targets_uvxyz
        e = torch.randn_like(x)
        b = betas_1
        t = torch.randint(low=0, high=num_timesteps,
                          size=(n // 2 + 1,)).to(device)
        t = torch.cat([t, num_timesteps - t - 1], dim=0)[:n]
        e = e*(targets_noise_scale)
        a = (1-b).cumprod(dim=0).index_select(0, t).view(-1, 1, 1)
        # generate x_t (refer to DDIM equation)
        x = x * a.sqrt() + e * (1.0 - a).sqrt()

        # predict noise
        output_noise = model_diff(x, src_mask, t.float(), 0)
        loss_diff = (e - output_noise).square().sum(dim=(1, 2)).mean(dim=0)

        optimizer.zero_grad()
        loss_diff.backward()

        torch.nn.utils.clip_grad_norm_(
            model_diff.parameters(), grad_clip)
        optimizer.step()

        epoch_loss_diff.update(loss_diff.item(), n)

        ema_helper.update(model_diff)

        if i%100 == 0 and i != 0:
            print('| Epoch{:0>4d}: {:0>4d}/{:0>4d} | Step {:0>6d} | Data: {:.6f} | Loss: {:.6f} |'\
                .format(epoch, i+1, len(data_loader), step, data_time, epoch_loss_diff.avg))

    data_start = time.time()

    if epoch % decay == 0:
        lr_now = lr_decay(optimizer, epoch, lr_init, decay, gamma)



    print('test the performance of current model')

    p1, p2 = test_hyber(edm=True, is_train=True)

    if p1 < best_p1:
        best_p1 = p1
        best_epoch = epoch
    print('| Best Epoch: {:0>4d} MPJPE: {:.2f} | Epoch: {:0>4d} MPJEPE: {:.2f} PA-MPJPE: {:.2f} |'\
        .format(best_epoch, best_p1, epoch, p1, p2))

    loss_list_beta_2.append(epoch_loss_diff.avg)
    p1_list_beta_2.append(p1)
    p2_list_beta_2.append(p2)
'''

In [None]:
'''
with open('loss3.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in loss_list_beta_1_edm))
with open('p1_3.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p1_list_beta_1_edm))
with open('p2_3.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p2_list_beta_1_edm))
'''

### Model 4(Use edm to replace DDIM substitute test, not recorded)

In [56]:
'''
loss_list_beta_1_edm1 = []
p1_list_beta_1_edm1 = []
p2_list_beta_1_edm1 = []
'''

In [57]:
'''
for epoch in range(start_epoch, n_epochs):
    data_start = time.time()
    data_time = 0

    # Switch to train mode
    torch.set_grad_enabled(True)
    model_diff.train()

    epoch_loss_diff = AverageMeter()

    for i, (targets_uvxyz, targets_noise_scale, _, targets_3d, _, _) in enumerate(data_loader):
        data_time += time.time() - data_start
        step += 1

        # to cuda
        targets_uvxyz, targets_noise_scale, targets_3d = \
            targets_uvxyz.to(device), targets_noise_scale.to(device), targets_3d.to(device)

        # generate nosiy sample based on seleted time t and beta
        n = targets_3d.size(0)
        x = targets_uvxyz
        e = torch.randn_like(x)
        b = betas_1
        t = torch.randint(low=0, high=num_timesteps,
                          size=(n // 2 + 1,)).to(device)
        t = torch.cat([t, num_timesteps - t - 1], dim=0)[:n]
        e = e*(targets_noise_scale)
        a = (1-b).cumprod(dim=0).index_select(0, t).view(-1, 1, 1)
        # generate x_t (refer to DDIM equation)
        x = x * a.sqrt() + e * (1.0 - a).sqrt()

        # predict noise
        output_noise = model_diff(x, src_mask, t.float(), 0)
        loss_diff = (e - output_noise).square().sum(dim=(1, 2)).mean(dim=0)

        optimizer.zero_grad()
        loss_diff.backward()

        torch.nn.utils.clip_grad_norm_(
            model_diff.parameters(), grad_clip)
        optimizer.step()

        epoch_loss_diff.update(loss_diff.item(), n)

        ema_helper.update(model_diff)

        if i%100 == 0 and i != 0:
            print('| Epoch{:0>4d}: {:0>4d}/{:0>4d} | Step {:0>6d} | Data: {:.6f} | Loss: {:.6f} |'\
                .format(epoch, i+1, len(data_loader), step, data_time, epoch_loss_diff.avg))

    data_start = time.time()

    if epoch % decay == 0:
        lr_now = lr_decay(optimizer, epoch, lr_init, decay, gamma)



    print('test the performance of current model')

    p1, p2 = test_hyber(edm_1=True, is_train=True)

    if p1 < best_p1:
        best_p1 = p1
        best_epoch = epoch
    print('| Best Epoch: {:0>4d} MPJPE: {:.2f} | Epoch: {:0>4d} MPJEPE: {:.2f} PA-MPJPE: {:.2f} |'\
        .format(best_epoch, best_p1, epoch, p1, p2))

    loss_list_beta_2.append(epoch_loss_diff.avg)
    p1_list_beta_2.append(p1)
    p2_list_beta_2.append(p2)
'''



| Epoch0000: 0101/1524 | Step 000101 | Data: 7921.032238 | Loss: 60.319590 |
| Epoch0000: 0201/1524 | Step 000201 | Data: 28929.829143 | Loss: 52.301847 |
| Epoch0000: 0301/1524 | Step 000301 | Data: 63085.757681 | Loss: 47.434076 |
| Epoch0000: 0401/1524 | Step 000401 | Data: 110074.526793 | Loss: 44.078708 |
| Epoch0000: 0501/1524 | Step 000501 | Data: 169961.596749 | Loss: 41.506972 |
| Epoch0000: 0601/1524 | Step 000601 | Data: 243180.954394 | Loss: 39.503878 |
| Epoch0000: 0701/1524 | Step 000701 | Data: 329524.776556 | Loss: 37.908922 |
| Epoch0000: 0801/1524 | Step 000801 | Data: 429253.489155 | Loss: 36.595162 |
| Epoch0000: 0901/1524 | Step 000901 | Data: 541940.036229 | Loss: 35.498611 |
| Epoch0000: 1001/1524 | Step 001001 | Data: 668121.819482 | Loss: 34.555452 |
| Epoch0000: 1101/1524 | Step 001101 | Data: 807183.029080 | Loss: 33.732240 |
| Epoch0000: 1201/1524 | Step 001201 | Data: 959304.238593 | Loss: 33.004564 |
| Epoch0000: 1301/1524 | Step 001301 | Data: 1124643.471

LinAlgError: ignored

In [None]:
'''
with open('loss4.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in loss_list_beta_1_edm1))
with open('p1_4.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p1_list_beta_1_edm1))
with open('p2_4.txt', 'w') as file:
    file.write('\n'.join(str(l) for l in p2_list_beta_1_edm1))
'''