### CUDA info

In [None]:
import torch
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

### Cloning repo

In [1]:
! git clone https://BertyWooster:PASSWORD@github.com/BertyWooster/PSFRGAN_COPY.git

Cloning into 'PSFRGAN_COPY'...
remote: Enumerating objects: 213, done.[K
remote: Counting objects: 100% (213/213), done.[K
remote: Compressing objects: 100% (164/164), done.[K
remote: Total 213 (delta 40), reused 208 (delta 35), pack-reused 0[K
Receiving objects: 100% (213/213), 2.80 MiB | 10.73 MiB/s, done.
Resolving deltas: 100% (40/40), done.


In [2]:
%cd /content/PSFRGAN_COPY/

/content/PSFRGAN_COPY


In [None]:
! pip install -r requirements.txt

### Loading wghts and data

In [None]:
from google_drive_downloader import GoogleDriveDownloader as gdd

gdd.download_file_from_google_drive(file_id='1UhzMSORSul88iVfSYQYuEal83lEoNN7l',
                                    dest_path='/content/PSFRGAN_COPY/pretrain_models/mmod_human_face_detector.dat',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id='1UiHeUHVNKiEq_s5dcBv6jez6TqrAduMg',
                                    dest_path='/content/PSFRGAN_COPY/pretrain_models/shape_predictor_5_face_landmarks.dat',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id='1UcTeI_-YmQeNypHW98VRuIF13xEPmPWm',
                                    dest_path='/content/PSFRGAN_COPY/pretrain_models/FFHQ_template.npy',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id='1Uxb-nw04fyjABEaZCgeYgGrR4fIu7VUf',
                                    dest_path='/content/PSFRGAN_COPY/pretrain_models/psfrgan_latest_net_G.pth',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id='1Unn2eznkclqUhK_o50YUyaV7eoTE4Su3',
                                    dest_path='/content/PSFRGAN_COPY/pretrain_models/parse_multi_iter_90000.pth',
                                    unzip=False)


In [None]:
# load test data

! mkdir /content/test_data/
! mkdir /content/test_data/vid/

mkt_file_ID = "1oYSyd_6CCZrmQoZ15WRjEQoJtiQY8YPo"
mkt_fo_static_file_ID = "1w2kVvWLpQD6CkjeINxxt_WUv4oYVC3MB"
mkt_fo_active_file_ID = "1zTQ8t-xyGEaDuOm6Z1Pc4_ezMIhS6wZ2"
mkt_fo_active_file_512_ID = "1wBAcpnYrYStpUfi1A_pV0nJvlQmKmdLQ"
start_ID = "1q0P6o7ucKYBjSNK8P5_6QIPM5iXD_tJl"


gdd.download_file_from_google_drive(file_id=mkt_file_ID,
                                    dest_path='/content/test_data/vid/mkt.mp4',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id=mkt_fo_static_file_ID,
                                    dest_path='/content/test_data/vid/mkt_fo_static.mp4',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id=mkt_fo_active_file_ID,
                                    dest_path='/content/test_data/vid/mkt_fo_active.mp4',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id=mkt_fo_active_file_512_ID,
                                    dest_path='/content/test_data/vid/mkt_fo_active_512.mp4',
                                    unzip=False)

gdd.download_file_from_google_drive(file_id=start_ID,
                                    dest_path='/content/test_data/vid/start.mp4',
                                    unzip=False)

### Краткий пайплайн покадровой обработки - по скорости не оптимизирован

In [8]:
%cd /content/PSFRGAN_COPY/
from tqdm import tqdm
import dlib
import os
import cv2
import numpy as np 
from tqdm import tqdm
from skimage import transform as trans
from skimage import io

import time
import torch
from utils import utils
from options.test_options import TestOptions
from models import create_model

/content/PSFRGAN_COPY


In [23]:
# functions 

def detect_and_align_faces(img, face_detector, lmk_predictor, template_path, template_scale=2, size_threshold=999):
    align_out_size = (512, 512)
    ref_points = np.load(template_path) / template_scale
        
    # Detect landmark points
    face_dets = face_detector(img, 1)
    assert len(face_dets) > 0, 'No faces detected'

    aligned_faces = []
    tform_params = []
    for det in face_dets:
        if isinstance(face_detector, dlib.cnn_face_detection_model_v1):
            rec = det.rect # for cnn detector
        else:
            rec = det
        if rec.width() > size_threshold or rec.height() > size_threshold: 
            print('Face is too large')
            break
        landmark_points = lmk_predictor(img, rec) 
        single_points = []
        for i in range(5):
            single_points.append([landmark_points.part(i).x, landmark_points.part(i).y])
        single_points = np.array(single_points)
        tform = trans.SimilarityTransform()
        tform.estimate(single_points, ref_points)
        tmp_face = trans.warp(img, tform.inverse, output_shape=align_out_size, order=3)
        aligned_faces.append(tmp_face*255)
        tform_params.append(tform)
    return [aligned_faces, tform_params]

class Opt:
  def __init__(self):
    self.model = "enhance"
    self.device = "cuda"
    self.gpu_ids = [0]
    self.isTrain = False
    self.checkpoints_dir = "./check_points"
    self.name = "experiment_name"
    self.Pnorm = "bn"
    self.data_device = "cuda"
    self.Gin_size = 512
    self.Gout_size = 512
    self.Gnorm = 'spade'
    self.D_num = 3
    self.parse_net_weight = "./pretrain_models/parse_multi_iter_90000.pth"
    self.psfr_net_weight = "./pretrain_models/psfrgan_latest_net_G.pth"


def def_models():
    opt = Opt()
    model = create_model(opt)
    model.load_pretrain_models()
    model.netP.to(opt.device)
    model.netG.to(opt.device)
    return model


def enhance_faces(LQ_faces, model):
    hq_faces = []
    lq_parse_maps = []
    for lq_face in tqdm(LQ_faces):
        with torch.no_grad():
            lq_tensor = torch.tensor(lq_face.transpose(2, 0, 1)) / 255. * 2 - 1
            lq_tensor = lq_tensor.unsqueeze(0).float().to(model.device)
            parse_map, _ = model.netP(lq_tensor)
            parse_map_onehot = (parse_map == parse_map.max(dim=1, keepdim=True)[0]).float()
            _, output_SR = model.netG(lq_tensor, parse_map_onehot)
        hq_faces.append(utils.tensor_to_img(output_SR))
        lq_parse_maps.append(utils.color_parse_map(parse_map_onehot)[0])
    return hq_faces, lq_parse_maps


def past_faces_back(img, hq_faces, tform_params, upscale=1):
    h, w = img.shape[:2]
    img = cv2.resize(img, (int(w*upscale), int(h*upscale)), interpolation=cv2.INTER_CUBIC)
    for hq_img, tform in tqdm(zip(hq_faces, tform_params), total=len(hq_faces)):
        tform.params[0:2,0:2] /= upscale
        back_img = trans.warp(hq_img/255., tform, output_shape=[int(h*upscale), int(w*upscale)], order=3) * 255
        
        # blur mask to avoid border artifacts
        mask = (back_img == 0) 
        mask = cv2.blur(mask.astype(np.float32), (5,5))
        mask = (mask > 0)
        img = img * mask + (1 - mask) * back_img 
    return img.astype(np.uint8)


def save_imgs(img_list, save_dir):
    for idx, img in enumerate(img_list):
        save_path = os.path.join(save_dir, '{:03d}.jpg'.format(idx))
        io.imsave(save_path, img.astype(np.uint8))

class VideoProcesser:
  def __init__(self):
    self.working_dir = "/content/w_dir/"
    self.res_dir = "/content/results/"
    self.face_detector = dlib.cnn_face_detection_model_v1('./pretrain_models/mmod_human_face_detector.dat')
    self.lmk_predictor = dlib.shape_predictor('./pretrain_models/shape_predictor_5_face_landmarks.dat')
    self.template_path = './pretrain_models/FFHQ_template.npy'
    self.enhance_model = def_models()
    os.system("mkdir " + self.working_dir)
    os.system("mkdir " + self.res_dir)
  
  def read_vid(self, vid_path, to_512=False):
    self.v_name = vid_path.split("/")[-1]
    os.system("cp " + vid_path + " " + self.working_dir)
    if to_512:
      os.system("ffmpeg -i " + self.working_dir + self.v_name + " -vf scale=512:512 " + self.working_dir + "tmp.mp4")
      os.system("rm -rf " + self.working_dir + self.v_name)
      os.system("mv " + self.working_dir + "tmp.mp4 " + self.working_dir + self.v_name)

  def preprocess(self):
    os.system("ffmpeg -i " + self.working_dir + self.v_name + " " + self.working_dir + "%d.jpg")
    os.system("ffmpeg -i " + self.working_dir + self.v_name + " " + self.working_dir + self.v_name[:-4] + ".wav")

  def process(self):
    align_time = []
    enhance_time = []
    n_frames = [x for x in os.listdir(self.working_dir) if x.count(".jpg")==1]
    for i in tqdm(range(len(n_frames))):
      im_name = str(i+1)+".jpg"
      img = dlib.load_rgb_image(self.working_dir + im_name)
      
      t_al1 = time.time()
      aligned_faces, tform_params = detect_and_align_faces(img, self.face_detector, self.lmk_predictor, self.template_path)
      t_al2 = time.time()
      align_time.append(t_al2-t_al1)

      t_en1 = time.time()
      hq_faces, lq_parse_maps = enhance_faces(aligned_faces, self.enhance_model)
      t_en2 = time.time()
      enhance_time.append(t_en2-t_en1)

      hq_img = past_faces_back(img, hq_faces, tform_params, upscale=1)
      final_save_path = self.res_dir+im_name
      io.imsave(final_save_path, hq_img) 
      # TODO here you have to set orig fps
    os.system("ffmpeg -framerate 30 -i " + self.res_dir + "%d.jpg " + self.res_dir + "p_" + self.v_name)
    os.system("ffmpeg -i " + self.res_dir + "p_" + self.v_name + " -i " + self.working_dir + self.v_name[:-4]+'.wav' + " " + self.res_dir + "fp_" + self.v_name)
    os.system("rm -rf " + self.res_dir + "*.jpg*")
    align_mean = np.mean(np.array(align_time))
    align_sum = np.sum(np.array(align_time))
    enhance_mean = np.mean(np.array(enhance_time))
    enhance_sum = np.sum(np.array(enhance_time))
    print("align fps ±", 1./align_mean)
    print("total align is", align_sum)
    print("enhance fps ±", 1./enhance_mean)
    print("enhance align is", enhance_sum)


In [25]:
%%time
# ! rm -rf /content/w_dir
# ! rm -rf /content/results

CPU times: user 4.22 ms, sys: 52.6 ms, total: 56.8 ms
Wall time: 316 ms


### set vid path and process it!

In [None]:
%cd /content/PSFRGAN_COPY/
processor = VideoProcesser()
processor.read_vid('/content/test_data/vid/start.mp4', to_512=True)
processor.preprocess()
processor.process()