# How often do you want to save output images during training. image_save_iter: 5 # How often do you want to save trained models. #snapshot_save_epoch: 5 # How often do you want to log the training stats. logging_iter: 100 # Number of training epochs. max_epoch: 80 # Number of epochs training single frame generator. single_frame_epoch: 0 # How often to double the number of training frames in each clip. num_epochs_temporal_step: 20 # Trainer options. trainer: type: imaginaire.trainers.vid2vid amp: O1 model_average: True model_average_beta: 0.999 model_average_start_iteration: 500 model_average_batch_norm_estimation_iteration: 0 num_videos_to_test: 64 num_frames_per_video: 10 gan_mode: hinge gan_relativistic: False perceptual_loss: mode: 'vgg19' layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'] weights: [0.03125, 0.0625, 0.125, 0.25, 1.0] num_scales: 3 loss_weight: gan: 1.0 feature_matching: 10.0 temporal_gan: 0.0 perceptual: 10.0 flow: 10.0 init: type: xavier gain: 0.02 # optimization option gen_opt: type: adam lr: 0.0002 adam_beta1: 0.5 adam_beta2: 0.999 lr_policy: iteration_mode: False type: step step_size: 60 gamma: 0.1 dis_opt: type: adam lr: 0.0002 adam_beta1: 0.5 adam_beta2: 0.999 lr_policy: iteration_mode: False type: step step_size: 60 gamma: 0.1 # Model options. gen: type: imaginaire.generators.vid2vid num_layers: 7 num_downsamples_img: 4 num_filters: 8 # was 32 max_num_filters: 256 #1024 kernel_size: 3 activation_norm_type: spatially_adaptive activation_norm_params: activation_norm_type: instance num_filters: 0 kernel_size: 1 weight_norm_type: spectral style_dims: 256 use_segmap_as_input: True flow: num_filters: 8 # was 32 max_num_filters: 256 #1024 num_downsamples: 5 num_res_blocks: 6 activation_norm_type: instance weight_norm_type: spectral flow_output_multiplier: 40 generate_raw_output: False multi_spade_combine: num_layers: 3 embed: arch: unet num_filters: 8 # was 32 num_downsamples: 5 kernel_size: 3 weight_norm_type: spectral embed: use_embed: True arch: encoderdecoder num_filters: 8 #was 32 num_downsamples: 5 kernel_size: 3 weight_norm_type: spectral dis: type: imaginaire.discriminators.fs_vid2vid image: num_filters: 16 # was 64 max_num_filters: 64 # 512 num_discriminators: 2 num_layers: 3 weight_norm_type: none activation_norm_type: instance flow_network: type: imaginaire.third_party.flow_net.flow_net data: name: 'kitti' type: imaginaire.datasets.paired_videos num_frames_G: 3 num_frames_D: 3 num_workers: 4 input_types: - images: ext: png num_channels: 3 interpolator: BILINEAR normalize: True - seg_maps: ext: png num_channels: 35 interpolator: NEAREST normalize: False input_image: - images input_labels: - seg_maps train: roots: - datasets/kitti/lmdb/train batch_size: 2 initial_sequence_length: 2 #was 4 max_sequence_length: 8 #was 4 augmentations: resize_smallest_side: 512 random_scale_limit: 0.2 horizontal_flip: True random_crop_h_w: 512, 1024 val: roots: - datasets/kitti/lmdb/val batch_size: 1 augmentations: resize_h_w: 512, 1024 horizontal_flip: False # Inference options. #pretrained_weight: 1b2M5rU740vBurLQ9iDP2kb4sP5HAb-Jx test_data: name: 'cityscapes' type: imaginaire.datasets.paired_videos num_workers: 4 paired: True input_types: - seg_maps: ext: png num_channels: 35 interpolator: NEAREST normalize: False - images: ext: png num_channels: 3 interpolator: BILINEAR normalize: True input_image: - images input_labels: - seg_maps test: is_lmdb: False roots: - projects/vid2vid/test_data/cityscapes batch_size: 1 augmentations: resize_h_w: 512, 1024 horizontal_flip: False