In [1]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.append('../')

import src.models as models
import src.transform as transform
import src.video as video
import os
import logging
from tqdm import tqdm
import numpy as np

  warn(


In [15]:
# Init logging
logger = logging.getLogger('styleTransfer')
logger.setLevel(logging.INFO)
logger.addHandler(logging.FileHandler("my_log.log", mode='w'))

# Video Extraction

In [9]:
# Clean the stuff from the last run
video.clean_dir('../res/atla/')
n_frames, fps = video.extract_video('../res/atla.mp4', '../res/atla/', n_frames=6000)

100%|██████████| 6000/6000 [02:28<00:00, 40.44it/s]


Successfully read and saved  5999 frames


# Running Style Transfer

In [3]:
vgg19 = models.vgg19(weights=models.vgg_19_default_weights).features.eval()
mean = models.vgg19_normalization_mean
std = models.vgg19_normalization_std

In [4]:
# Greatest entry with frame < end_frame is the current step. frame
sigmoid = lambda t, f: 1 - 1 / (1 + np.exp(-(10*t/f - 5)))
linear = lambda t, f: 1 - t/f


res_path = '../res/style'
keypoints = [
    # start_frame, End Frame, path to style image 1,   path to style image 2,             interpolation function
    (0, 500,      'starry_night.jpg',        'mona_lisa.jpg',        sigmoid),
    (500, 1000,      'mona_lisa.jpg',           'mosaic1.jpg',          sigmoid),
    (1000, 1800,      'mosaic1.jpg',             'picasso.jpg',          sigmoid),
    (1800, 2600,      'picasso.jpg',             'the_scream.jpg',       sigmoid),
    (2600, 4000,      'the_scream.jpg',          'stained_glass.jpg',    sigmoid),
    (4000, 4500,      'stained_glass.jpg',       'mosaic2.jpg', sigmoid),
    (4500, 5000,       'mosaic2.jpg', None, linear) # Until the end
]

In [13]:
video.clean_dir('../res/output')

In [16]:
frame = 0
for img in tqdm(os.listdir('../res/atla')):

    # Only consider the jpg files output by the video decoder
    if not img.endswith('.jpg'):
        continue

    img_num = int(img[5:-4])
    # Skip the first 1850
    if img_num < 1850:
        continue

    # Every other frame is identical so dont waste computation time (still advance the frame counter tho)
    if img_num % 2 == 0:
        frame += 1
        continue

    # End the video after zuko passes tf out
    if img_num > 5159:
        break

    # Find the relevant keypoint
    kp = keypoints[0]
    for k in keypoints:
        if k[0] > frame:
            break
        kp = k

    start_frame, end_frame, img1, img2, f = kp
    t = frame - start_frame
    lamb = f(t, end_frame - start_frame)

    logger.info(f'Doing Frame {frame} with lamb={lamb} on styles {img1}, {img2}')

    assert lamb >= 0 and lamb <= 1

    content_img = transform.load_image(os.path.join('../res/atla', img))
    style_img_1 = transform.load_image(os.path.join(res_path, img1))
    style_img_2 = None
    if img2 is not None:
        style_img_2 = transform.load_image(os.path.join(res_path, img2))
        assert style_img_1.size() == style_img_2.size()
    assert content_img.size() == style_img_1.size()

    output = transform.run_style_transfer(vgg19, 
                             mean,
                             std,
                             content_img,
                             style_img_1,
                             style_img_2,
                             lamb=lamb)
    output_img = transform.to_PIL(output)
    output_img.save(f'../res/output/frame{frame:04d}.png')
    frame +=1

 38%|███▊      | 2264/6000 [1:14:50<11:10:24, 10.77s/it]

# Video Encoding

In [None]:
video.encode_video('../res/output', '../output.mp4', fps=fps/2)

In [None]:
# Don't need to store the video in a lossy format 3 times
video.clean_dir('../res/atla')
video.clean_dir('../res/output')