In [1]:
# 读取一个 h5 图片数据集，便于后面使用
# liyi，2019/5/19

import os
import h5py
import numpy as np

path = '/home/liyi/video-pred/pytorch_video_pred/data/comma/train/'
# print(sorted(os.listdir(path))[0])
batch_size = 6
idx = range(batch_size)
files = ['%010d.h5'% i for i in idx]
inputs = []
for file in files:
    f = h5py.File(path+file, 'r')
    sample = dict(f)['image'].value.astype(np.float32)
    inputs.append(sample)
    f.close()
    
inputs = np.array(inputs)
print(inputs.shape)  # NDHWC 5/22

(6, 30, 160, 320, 3)


  from ._conv import register_converters as _register_converters


In [2]:
# 获取超参数，方便后面调试
# from base_model.py
# liyi，2019/5/19

from tensorflow.contrib.training import HParams

def get_hparams(hparams_dict=None):
    hparams = dict(
        context_frames=-1,
        sequence_length=-1,
        repeat=1,
    )
    hparams.update(hparams_dict or {})
    return HParams(**hparams)

get_hparams()

HParams([('context_frames', -1), ('repeat', 1), ('sequence_length', -1)])

In [3]:
# 为下面测试 posterior 准备超参数 5/22
hparams = dict(
    dataset='bair',
    input_dir='/home/liyi/video-pred/video_prediction/data/comma_m/train',
    model='savp',
    model_hparams_dict='hparams/bair_action_free/ours_savp/model_hparams.json',
    
    l1_weight=1.0,
    l2_weight=0.0,
    n_layers=3,  # 3改为5 5/21
    ndf=32,
    norm_layer='instance',
    use_same_discriminator=False,
    ngf=32,
    downsample_layer='conv_pool2d',
    upsample_layer='upsample_conv2d',
    activation_layer='relu',  # for generator only
    transformation='cdna',
    kernel_size=(5, 5),
    dilation_rate=(1, 1),
    where_add='all',
    use_tile_concat=True,
    learn_initial_state=False,
    rnn='lstm',
    conv_rnn='lstm',
    conv_rnn_norm_layer='instance',
    num_transformed_images=4,
    last_frames=1,
    prev_image_background=True,
    first_image_background=True,
    last_image_background=False,
    last_context_image_background=False,
    context_images_background=False,
    generate_scratch_image=True,
    dependent_mask=True,
    schedule_sampling='inverse_sigmoid',
    schedule_sampling_k=900.0,
    schedule_sampling_steps=(0, 100000),
    use_e_rnn=False,
    learn_prior=False,
    nz=8,
    num_samples=8,
    nef=64,   ### 64改为32 5/21
    use_rnn_z=True,
    ablation_conv_rnn_norm=False,
    ablation_rnn=False,
)

parsed_hparams = get_hparams(hparams_dict=hparams)

In [None]:
# 测试 prior
# from module.py
# liyi,2019/5/22

import functools
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from video_prediction.utils.max_sv import spectral_normed_weight
from video_prediction.layers.conv import Conv2d, Conv3d
from video_prediction.models.modules import Encoder,Dense

class Prior(nn.Module):
    ### 改写自savp_model.py prior_fn 5/16
    def __init__(self, input_shape, hparams):
        super(Prior, self).__init__()
        self.hparams = hparams
        self.encoder = Encoder(input_shape, nef=hparams.nef, n_layers=hparams.n_layers)### input_shape需要根据hparmas修改 5/16
        self.dense0 = Dense(input_shape, units=hparams.nef * 4)
        if hparams.rnn == 'lstm':
            self.rnn = nn.LSTM(hidden_size=hparams.nef * 4)
        elif hparams.rnn == 'gru':
            self.rnn = nn.GRU(hidden_size=hparams.nef * 4)
        else:
            raise NotImplementedError
        self.dense1 = Dense(input_shape=hparams.nef*4, units=hparams.nz)  ### input_shape要改 5/16
        self.dense2 = Dense(input_shape=hparams.nef*4, units=hparams.nz)  ### input_shape要改 5/16
        
    def forward(inputs):
        ### inputs应当是 NCHW 5/16
        outputs = {}
        ### 将连续的两帧图片在channel维度上级联 5/16
        ### context_frams 需要根据 ... 5/16
        inputs = torch.cat([inputs[:self.hparams.context_frames - 1], inputs[1:self.hparams.context_frames]], dim=-3)  
        ### 加入 action uncompleted ... 
        h = self.encoder(inputs)
        h_zeros = torch.zeros(sizes = torch.cat(
            [[self.hparams.sequence_length - self.hparams.context_frames], h.size[1:]], axis=0))
        
        h = torch.cat([h, h_zeros], axis=0)
        h = self.dense0(h)
        h = self.rnn(h)
        z_mu = self.dense1(h)
        outputs['z_mu'] = z_mu
        z_log_sigma_sq = self.dense2(h)
        z_log_sigma_sq = torch.clamp(z_log_sigma_sq, -10,10)
        outputs['z_log_sigma_sq'] = z_log_sigma_sq
        return outputs
        

In [6]:
# 测试 posterior
# from modules.py
# liyi,2019/5/22
# input 为 DNCHW 5/22
# output 为 D-1,N,nz[=8] 5/22

import functools
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from video_prediction.utils.max_sv import spectral_normed_weight
from video_prediction.layers.conv import Conv2d, Conv3d
from video_prediction.models.modules import Encoder,Dense

class Posterior(nn.Module):
    ### 改写自savp_model.py posterior_fn 5/15
    ### input 为 DNCHW 5/19
    ### output 为 D-1,N,nz[=8] 5/21
    def __init__(self, input_shape, hparams):
        super(Posterior, self).__init__()
        self.input_shape = list(input_shape)
        self.use_e_rnn = hparams.use_e_rnn  ### 默认false 5/19
        
        #input_shape = list(input_shape)
        #input_shape=[np.prod(input_shape[0:2])]+[input_shape[-3]*2]+input_shape[-2:]
        #print(input_shape)
        self.encoder = Encoder(input_shape=[np.prod(self.input_shape[0:2])]+[self.input_shape[-3]*2]+self.input_shape[-2:],
                               nef=hparams.nef, n_layers=hparams.n_layers)
        out_shape = [np.prod(self.input_shape[0:2]),
                     hparams.nef * min(4, 2**(hparams.n_layers-1))]
        print(out_shape)
        self.dense1 = Dense(input_shape=out_shape, units=hparams.nz)  ### input_shape要改 5/15
        self.dense2 = Dense(input_shape=out_shape, units=hparams.nz)  ### input_shape要改 5/15
        
    def forward(self, inputs):
        ### inputs应当是 NDCHW 5/16
        outputs = {}
        inputs = torch.cat([inputs[:-1], inputs[1:]], dim=-3) ### 将连续的两帧图片在channel维度上级联 5/16
        inputs = inputs.reshape([-1]+list(inputs.shape[-3:]))  ### 变为 NCHW 5/22
        print(inputs.shape)
        ### 加入 action uncompleted ... 
        h = self.encoder(inputs)['output']
        if self.use_e_rnn:
            h = self.dense0(h)
            h = self.rnn(h)
        z_mu = self.dense1(h).reshape([self.input_shape[0]-1]+[self.input_shape[1]]+[-1])
        outputs['z_mu'] = z_mu
        z_log_sigma_sq = self.dense2(h).reshape([self.input_shape[0]-1]+[self.input_shape[1]]+[-1])
        z_log_sigma_sq = torch.clamp(z_log_sigma_sq, -10,10)
        outputs['z_log_sigma_sq'] = z_log_sigma_sq
        return outputs
    
images = torch.tensor(np.transpose(inputs,[1,0,4,2,3]))  # to DNCHW 5/22
net = Posterior(input_shape=images.shape, hparams=parsed_hparams)
output = net.forward(images)
for k in output.keys():
    print(k, output[k].shape)


[180, 256]
torch.Size([174, 6, 160, 320])
z_mu torch.Size([29, 6, 8])
z_log_sigma_sq torch.Size([29, 6, 8])


In [3]:
# 测试encoder
# from modules.py
# liyi，2019/5/22

import functools
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from video_prediction.utils.max_sv import spectral_normed_weight
from video_prediction.layers.conv import Conv2d, Conv3d

class Encoder(nn.Module):
    ### conv2d的in_channels是否为3存疑 5/8
    ### nef 为 encoder 的 filter 个数 5/9
    ### conv2d 要求 input_shape = NCHW
    ### input_shape = (ND/DN,CHW) 5/22
    ### output_shape = (ND/DN,min(2**(n_layers-1),4)),只有三个维度 5/21
    def __init__(self, input_shape, nef=64, n_layers=3):
        super(Encoder, self).__init__()
        self.input_shape = input_shape
        self.conv = {}
        self.norm = {}
        self.conv0 = nn.Conv2d(in_channels=self.input_shape[-3], 
                               out_channels=nef, kernel_size=4, stride=2, padding=(1,1))
        def make_sequence(in_channel, i):
            out_channel = nef * min(2**i, 4)
            return [nn.Conv2d(
                        in_channels=in_channel,
                        out_channels=out_channel, 
                        kernel_size=4, stride=2,
                        padding=(1,1)),
                      nn.InstanceNorm2d(
                        num_features=out_channel,
                        eps=1e-6)], out_channel
        
        self.model_list = nn.ModuleList()
        in_channel = nef
        for i in range(1, n_layers):
            sequence, in_channel = make_sequence(in_channel, i)
            self.model_list += sequence
        
    def forward(self, inputs):
        ### inputs 应当是 NCHW 5/8
        outputs = {}
        output = self.conv0(inputs)
        output = F.leaky_relu(output, negative_slope=0.2)
        n = 0
        outputs['encoder_%d'%n] = output     ### for visualization 5/8
        for model in self.model_list:
            n += 1
            output = model(output)
            output = F.leaky_relu(output, negative_slope=0.2)
            outputs['encoder_%d'%n] = output
        print('conv: ',output.shape)
        output = F.avg_pool2d(output, output.shape[2:])
        print('pool: ',output.shape)
        output.squeeze_(dim=-2)  # 对HW两个维度squeeze
        output.squeeze_(dim=-1)
        outputs['output'] = output
        return outputs
    
inputs = torch.tensor(np.transpose(inputs,axes=[0,1,4,2,3])) # NDHWC to NDCHW 5/22
print(inputs.shape)
net = Encoder(input_shape=inputs[0].shape)
outputs = net(inputs[0])
print(outputs['output'].shape)

torch.Size([6, 30, 320, 3, 160])
conv:  torch.Size([30, 256, 1, 20])
pool:  torch.Size([30, 256, 1, 1])
torch.Size([30, 256])




In [9]:
import tensorflow as tf
ndims = 4
x = tf.zeros([2,12,160,320,3])

sess = tf.Session()
print(x1.get_shape())

shape = tf.shape(x)
print(sess.run(shape))

x1 = tf.concat([[-1], shape[-(ndims-1):]], axis=0)
print(sess.run(x1))
print(x1.get_shape())

x1.set_shape([ndims])
print(sess.run(x1))
print(x1.get_shape())

#sess = tf.Session()
#print(sess.run(tf.shape(x1), feed_dict={x1:[0,1,2,3]}))
#x1.set_shape([2,2])
#print(sess.run(tf.shape(x1)))
#print(sess.run(tf.shape(x1), feed_dict={x1:[[0,1],[2,3]]}))

(4,)
[  2  12 160 320   3]
[ -1 160 320   3]
(4,)
[ -1 160 320   3]
(4,)


In [10]:
import torch
a = torch.tensor([[1,2,4],[2,3,4]])
b = a.reshape([-1,2])
print(a)
print(b)
print(b.shape)
b = a.view([1,6])
print(b)

tensor([[1, 2, 4],
        [2, 3, 4]])
tensor([[1, 2],
        [4, 2],
        [3, 4]])
torch.Size([3, 2])
tensor([[1, 2, 4, 2, 3, 4]])
