## RESULTS GENERATED ON A GTX GeForce 1060 6gb version (no fp16 support)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import os
import pix2pix
import cv2
import time
from pix2pix import models
import numpy as np
import tqdm
from torch2trt import torch2trt
from pix2pix import config
from skimage.metrics import structural_similarity, peak_signal_noise_ratio

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
gt_path = 'videos/input_video_gt.mp4'
noisy_path = 'videos/input_video_noisy.mp4'

In [5]:
class img_dataset(torch.utils.data.Dataset):
    def __init__(self, imgs):
        self.imgs = imgs

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.to_list()
        
        inp = torch.Tensor(self.imgs[idx])
        inp = (inp / (255 / 2)) - 1
        inp = inp.permute((2, 0, 1))
        
        return inp    

In [6]:
def load_model_as_trt(model_state_dict_path, max_bs, num_init_filters):
    model_param_dict = dict(input_nc=3, output_nc=3, 
                            num_init_filters=num_init_filters)
    model = models.unet(norm_layer=torch.nn.InstanceNorm2d, **model_param_dict)
    model = torch.nn.DataParallel(model)
    model.load_state_dict(torch.load(model_state_dict_path))
    model = model.module
    model.eval()
    
    print ('number of params in model: ', sum(p.numel() for p in model.parameters()))
    print ('converting model to trt, this will take a bit of time')
    
    model_trt = torch2trt(model, [torch.ones(1, 3, 256, 256).to(device)], max_batch_size=max_bs)    
    print ('done')
    
    return model_trt

In [7]:
def get_frames_for_vid(vid_path):
    vid = cv2.VideoCapture(vid_path)
    frame_list = []
    
    while vid.isOpened():
        ret, frame = vid.read()

        if ret == False:
            break

        frame_list.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 
    
    vid.release()    
    
    return frame_list

def get_metrics_save_comparison_vid(gt_path, predicted_path, noisy_path, op_save_path):
    gt_frames = get_frames_for_vid(gt_path)
    pred_frames = get_frames_for_vid(predicted_path)
    noisy_frames = get_frames_for_vid(noisy_path)
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    output_vid_writer = cv2.VideoWriter(op_save_path, fourcc, fps, (width * 3, height))
        
    # print (len(pred_frames), len(gt_frames), len(noisy_frames))
    
    ssim_list = []
    psnr_list = []
    
    for fp, fg, fi in tqdm.tqdm(zip(pred_frames, gt_frames, noisy_frames), total = len(pred_frames)):
        cv2.putText(fi, "INPUT", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0], 2)
        cv2.putText(fp, "PREDICTED", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0], 2)
        cv2.putText(fg, "GROUD TRUTH", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0], 2)
        
        ssim_list.append(structural_similarity(fp, fg, multichannel=True, dynamic_range=255))
        psnr_list.append(peak_signal_noise_ratio(fp, fg, data_range=255))
        
        combined = np.hstack([fi, fp, fg])
        output_vid_writer.write(cv2.cvtColor(combined, cv2.COLOR_RGB2BGR))  
        
    output_vid_writer.release()
    print ('comparison video written to', op_save_path)
    
    return np.mean(ssim_list), np.mean(psnr_list)

In [8]:
def predict_on_video(model_trt, input_vid_reader, output_vid_writer, batch_size):
    frame_list = []
    
    while input_vid_reader.isOpened():
        ret, frame = input_vid_reader.read()

        if ret == False:
            break

        frame_list.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 
    
    with torch.no_grad():
        img_dataloader = torch.utils.data.DataLoader(img_dataset(frame_list), 
                                                 batch_size = batch_size,
                                                 shuffle = False,
                                                num_workers = 1)

        preds_list = []
    
        for ip in tqdm.tqdm(img_dataloader, total = len(img_dataloader)):    
            ip = ip.to(device)
            preds = model_trt(ip).detach().cpu()

            preds_list.extend(list(preds))
            torch.cuda.empty_cache()
    
    for pred in preds_list:
        pred = ((pred + 1) / 2).permute(1, 2, 0)
        pred = (pred.numpy() * 255).astype('uint8')

        output_vid_writer.write(cv2.cvtColor(pred, cv2.COLOR_RGB2BGR))  
    
    output_vid_writer.release()

# model with num_init_filters = 32

In [9]:
state_dict_path = '../../models/pix2pix_32_13112021_183033/checkpoints/gen_epoch_12_pix2pix_32_13112021_183033.pt'
num_init_filters = 32
# with max_bs > 256, I get errors when converting to a tensort model on my GPU
max_bs = 256

model = load_model_as_trt(model_state_dict_path=state_dict_path,
          num_init_filters=num_init_filters,
          max_bs = max_bs)

number of params in model:  13603328
converting model to trt, this will take a bit of time
done


In [15]:
time_list = []
benchmark_num = 5
output_path = 'videos/predicted_final_32.mp4'

for i in range(0, benchmark_num):
    input_vid_reader = cv2.VideoCapture(noisy_path)
    fps = input_vid_reader.get(cv2.CAP_PROP_FPS)
    width = int(input_vid_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(input_vid_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    output_vid_writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    
    start = time.time()
    predict_on_video(model_trt=model, input_vid_reader=input_vid_reader, 
                     output_vid_writer=output_vid_writer, batch_size = 128)
    end = time.time()

    time_list.append(end - start)
    torch.cuda.empty_cache()

print (f'wrote final predicted video to {output_path}')
mean_ssim, mean_psnr = get_metrics_save_comparison_vid(gt_path=gt_path, predicted_path=output_path, noisy_path=noisy_path, op_save_path = 'videos/predicted_final_32_comparison_vid.mp4')

100%|██████████| 12/12 [00:04<00:00,  2.83it/s]
100%|██████████| 12/12 [00:04<00:00,  2.88it/s]
100%|██████████| 12/12 [00:04<00:00,  2.80it/s]
100%|██████████| 12/12 [00:04<00:00,  2.85it/s]
100%|██████████| 12/12 [00:04<00:00,  2.71it/s]


wrote final predicted video to videos/predicted_final_32.mp4


100%|██████████| 1500/1500 [00:23<00:00, 63.43it/s]


comparison video written to videos/predicted_final_32_comparison_vid.mp4


In [16]:
print (f'Avg time taken ({benchmark_num} runs) for a video of 1 min @ 25fps(256 pix by 256 pix): {np.mean(time_list):.3f} seconds')
print (f'mean SSIM: {mean_ssim:.3f} mean PSNR: {mean_psnr:.3f}')

Avg time taken (5 runs) for a video of 1 min @ 25fps(256 pix by 256 pix): 6.360 seconds
mean SSIM: 0.744 mean PSNR: 22.692


# model with num_init_filters = 64

In [17]:
del model
torch.cuda.empty_cache()

In [18]:
state_dict_path = '../../models/pix2pix_64_11112021_115558/checkpoints/gen_epoch_2_pix2pix_64_11112021_115558.pt'
num_init_filters = 64
# with max_bs > 128, I get errors when converting to a tensort model on my GPU
max_bs = 128

model = load_model_as_trt(model_state_dict_path=state_dict_path,
          num_init_filters=num_init_filters,
          max_bs = max_bs)

number of params in model:  54404096
converting model to trt, this will take a bit of time
done


In [19]:
time_list = []
benchmark_num = 5
output_path = 'videos/predicted_final_64.mp4'

for i in range(0, benchmark_num):
    input_vid_reader = cv2.VideoCapture(noisy_path)
    fps = input_vid_reader.get(cv2.CAP_PROP_FPS)
    width = int(input_vid_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(input_vid_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    output_vid_writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    
    start = time.time()
    predict_on_video(model_trt=model, input_vid_reader=input_vid_reader, 
                     output_vid_writer=output_vid_writer, batch_size = 128)
    end = time.time()

    time_list.append(end - start)
    torch.cuda.empty_cache()

print (f'wrote final predicted video to {output_path}')
mean_ssim, mean_psnr = get_metrics_save_comparison_vid(gt_path=gt_path, predicted_path=output_path, noisy_path=noisy_path, op_save_path = 'videos/predicted_final_64_comparison_vid.mp4')

100%|██████████| 12/12 [00:10<00:00,  1.11it/s]
100%|██████████| 12/12 [00:10<00:00,  1.12it/s]
100%|██████████| 12/12 [00:10<00:00,  1.12it/s]
100%|██████████| 12/12 [00:10<00:00,  1.12it/s]
100%|██████████| 12/12 [00:10<00:00,  1.12it/s]


wrote final predicted video to videos/predicted_final_64.mp4


100%|██████████| 1500/1500 [00:24<00:00, 62.21it/s]

comparison video written to videos/predicted_final_64_comparison_vid.mp4





In [20]:
print (f'Avg time taken ({benchmark_num} runs) for a video of 1 min @ 25fps(256 pix by 256 pix): {np.mean(time_list):.3f} seconds')
print (f'mean SSIM: {mean_ssim:.3f} mean PSNR: {mean_psnr:.3f}')

Avg time taken (5 runs) for a video of 1 min @ 25fps(256 pix by 256 pix): 12.729 seconds
mean SSIM: 0.739 mean PSNR: 22.543
