# How often do you want to save output images during training.
image_save_iter: 5
# How often do you want to save trained models.
#snapshot_save_epoch: 5
# How often do you want to log the training stats.
logging_iter: 100
# Number of training epochs.
max_epoch: 80
# Number of epochs training single frame generator.
single_frame_epoch: 0
# How often to double the number of training frames in each clip.
num_epochs_temporal_step: 20

# Trainer options.
trainer:
    type: imaginaire.trainers.vid2vid
    amp: O1
    model_average: True
    model_average_beta: 0.999
    model_average_start_iteration: 500
    model_average_batch_norm_estimation_iteration: 0
    num_videos_to_test: 64
    num_frames_per_video: 10    
    
    gan_mode: hinge
    gan_relativistic: False
    perceptual_loss:
        mode: 'vgg19'
        layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
        weights: [0.03125, 0.0625, 0.125, 0.25, 1.0]
        num_scales: 3
    loss_weight:
        gan: 1.0
        feature_matching: 10.0        
        temporal_gan: 0.0
        perceptual: 10.0
        flow: 10.0
    init:
        type: xavier
        gain: 0.02

# optimization option
gen_opt:
    type: adam
    lr: 0.0002
    adam_beta1: 0.5
    adam_beta2: 0.999
    lr_policy:
        iteration_mode: False
        type: step
        step_size: 60
        gamma: 0.1
dis_opt:
    type: adam
    lr: 0.0002
    adam_beta1: 0.5
    adam_beta2: 0.999
    lr_policy:
        iteration_mode: False
        type: step
        step_size: 60
        gamma: 0.1


# Model options.
gen:  
    type: imaginaire.generators.vid2vid
    num_layers: 7
    num_downsamples_img: 4
    num_filters: 8 # was 32
    max_num_filters: 256 #1024
    kernel_size: 3    
    activation_norm_type: spatially_adaptive
    activation_norm_params:
        activation_norm_type: instance
        num_filters: 0
        kernel_size: 1
    weight_norm_type: spectral
    style_dims: 256
    use_segmap_as_input: True
    flow:            
        num_filters: 8 # was 32
        max_num_filters: 256 #1024
        num_downsamples: 5
        num_res_blocks: 6
        activation_norm_type: instance
        weight_norm_type: spectral
        flow_output_multiplier: 40
        generate_raw_output: False
        multi_spade_combine:
            num_layers: 3
            embed:
                arch: unet
                num_filters: 8 # was 32
                num_downsamples: 5
                kernel_size: 3                
                weight_norm_type: spectral
    embed:
        use_embed: True
        arch: encoderdecoder
        num_filters: 8 #was 32
        num_downsamples: 5
        kernel_size: 3
        weight_norm_type: spectral
     
dis:
    type: imaginaire.discriminators.fs_vid2vid
    image:
        num_filters: 16 # was 64
        max_num_filters: 64 # 512
        num_discriminators: 2
        num_layers: 3
        weight_norm_type: none
        activation_norm_type: instance    
flow_network:
    type: imaginaire.third_party.flow_net.flow_net

data:    
    name: 'kitti'    
    type: imaginaire.datasets.paired_videos
    num_frames_G: 3
    num_frames_D: 3

    num_workers: 4
    input_types:
        - images:
            ext: png
            num_channels: 3
            interpolator: BILINEAR
            normalize: True
        - seg_maps:
            ext: png
            num_channels: 35
            interpolator: NEAREST
            normalize: False
    
    input_image:
        - images
    input_labels:
        - seg_maps
    
    train:        
        roots:
            - datasets/kitti/lmdb/train
        batch_size: 2
        initial_sequence_length: 2 #was 4 
        max_sequence_length: 8 #was 4       
        augmentations:
            resize_smallest_side: 512
            random_scale_limit: 0.2
            horizontal_flip: True
            random_crop_h_w: 512, 1024
    val:        
        roots:
            - datasets/kitti/lmdb/val
        batch_size: 1        
        augmentations:            
            resize_h_w: 512, 1024
            horizontal_flip: False

# Inference options.
#pretrained_weight: 1b2M5rU740vBurLQ9iDP2kb4sP5HAb-Jx

test_data:
    name: 'cityscapes'    
    type: imaginaire.datasets.paired_videos
    num_workers: 4
    paired: True    
    input_types:
        - seg_maps:
            ext: png
            num_channels: 35
            interpolator: NEAREST
            normalize: False
        - images:
            ext: png
            num_channels: 3
            interpolator: BILINEAR
            normalize: True

    input_image:
        - images
    input_labels:
        - seg_maps
    
    test:
        is_lmdb: False
        roots:            
            - projects/vid2vid/test_data/cityscapes
        batch_size: 1        
        augmentations:            
            resize_h_w: 512, 1024
            horizontal_flip: False