# Dynamic optimization

## Intro

![title](res/FLOWCHART_videodynopt_v0.1.png)

## Init

In [1]:
# imports
import os #to access system folders
import subprocess #to access ffmpeg in the system
import shutil #to remove directories
import numpy as np #easy vector operations
import math #for operation with infinite
from scipy.optimize import curve_fit #fittin of the curve
import json #to handle json files
import matplotlib.pyplot as pl #to display plots
import tkinter as tk #to import file
from tkinter import filedialog #to open import dialog

#constants
PARAM_AVC = {"crfs": 52, "starting_range": [17,28], "lib": "libx264", "container": "mp4", "add_param": ""}
PARAM_HEVC = {"crfs": 52, "starting_range": [22,33], "lib": "libx265", "container": "mp4", "add_param": ""}
PARAM_VP9 = {"crfs": 64, "starting_range": [15,35], "lib": "libvpx-vp9", "container": "webm", "add_param": "-b:v 0"}

#variables
codec = "avc" #values: "avc", "hevc", "vp9", "av1", "vvc"
raw_width = 1280
raw_height = 720
raw_fps = 24

#input file
root = tk.Tk()
root.withdraw()
source_path = os.path.relpath(filedialog.askopenfilename())
source_name = os.path.basename(source_path).split('.')[0]
REF_PATH = "test_vids/tempRAW_refs/" #raw files for each shot
[os.remove(REF_PATH+f) for f in os.listdir(REF_PATH)] #clean temp_refs folder
DIST_PATH = "test_vids/temp_encoded/" #encoded files for each shot
[shutil.rmtree(DIST_PATH+f) for f in os.listdir(DIST_PATH)] #clean temp_encoded folder

#assessment files path
tm_file = "rd_results/template.json"
rd_file = "rd_results/" + source_name + ".json"
if not os.path.isfile(rd_file):
    with open(rd_file, 'w') as f:
        pass
VMAF_LOGS = "rd_results/vmaf_logs"

#shot detection
TIME_LOGS = "shot_detection.log"
shot_th = 1 #change shot threshold
num_scenes = 0
tot_duration = 0.0

#output file
file_list = ""
FILE_LIST_PATH = "shot_list.txt"
OUT_PATH = "test_vids/OPT_vids/"

current_point = None #the current optimum point
new_point = 0 #new value to compare with current_point
#all computed points, by row: crf, bitrate, vmaf, psnr
res_matrix = {"crf": None, "bitrate": None, "vmaf": None, "psnr": None} 

flag_target = True #values True (quality) or False (bitrate)
quality_metric = "vmaf" #values "vmaf", "psnr", "ssim", "mssim"
#TODO implement more quality metrics
target_bitrate = [12000000]
target_quality = [96]

print("init done")

init done


## Shot change detection

In [2]:
#an empty json structure is generated to be filled and to store computed values
def init_res_matrix(x):
    res_matrix["crf"] = np.arange(0,x,1).tolist()
    inf_matrix = np.zeros(x) #infinity to avoid considering zero as a point
    inf_matrix[inf_matrix == 0] = math.inf
    res_matrix["bitrate"] = inf_matrix.tolist()
    res_matrix["vmaf"] = inf_matrix.tolist()
    res_matrix["psnr"] = inf_matrix.tolist()
    
#detect shot changes in the scene and split it into shots
def shot_change_detection(p):
    start_t = 0.0
    end_t = 0.0
    #return when the shot changes
    det = f"ffmpeg -i {p} -filter_complex:v \"select='gt(scene,{shot_th})', \
        metadata=print:file={TIME_LOGS}\" -f null -"
    subprocess.call(det, shell=True)
    #get the total duration for the last cut
    idu = f"ffprobe -v error -select_streams v:0 -show_entries format:stream -print_format json {p}"
    dta = json.loads(subprocess.run(idu.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout)
    duration = dta['format']['duration']
    with open("shot_detection.log", 'r') as r:
        tm_log = r.read().splitlines()[::2]
    tm_log.append("end pts_time:" + str(duration))
    n = len(tm_log)
    for i,l in enumerate(tm_log): #for each cut
        #create a folder for each scene
        new_dir = str(i)
        new_path = os.path.join(DIST_PATH, new_dir)
        os.mkdir(new_path)
        
        #cut the video
        end_t = l.split("pts_time:",1)[1]
        cut = f"ffmpeg -ss {start_t} -to {end_t} -i {p} \
            -pix_fmt yuv420p {REF_PATH}scene{str(i).zfill(7)}.yuv"
        subprocess.call(cut, shell=True)
        start_t = end_t
    return n

In [3]:
struct_points = [] #structure of target points for json file
struct_shots = [] #structure of shots for json file

if source_path.endswith(".yuv"):
    print("yuv input")
elif source_path.endswith(".y4m"):
    print("y4m input")
else:
    print("No such an input type")
    exit()

num_scenes = shot_change_detection(source_path)
    
#init values based on the selected output codec
if codec == "avc":
    s_cod = PARAM_AVC
    init_res_matrix(PARAM_AVC["crfs"])
elif codec == "hevc":
    s_cod = PARAM_HEVC
    init_res_matrix(PARAM_HEVC["crfs"])
elif codec == "vp9":
    s_cod = PARAM_VP9
    init_res_matrix(PARAM_VP9["crfs"])
else:
    print("No such an codec")
    exit()

min_range_crf = s_cod["starting_range"][0]
max_range_crf = s_cod["starting_range"][1]

with open(tm_file, 'r') as f:
    o_data = json.load(f)
    
    #add source name and results matrix
    o_data["content"] = source_name
    o_data["versions"][0]["codec"] = codec
    o_data["versions"][0]["width"] = raw_width
    o_data["versions"][0]["height"] = raw_height
    o_data["versions"][0]["fps"] = raw_fps
    o_data["versions"][0]["shots"][0]["assessment"] = res_matrix
    
    #add emplty target points
    base_point = o_data["versions"][0]["shots"][0]["opt_points"][0]
    y = lambda x: target_quality if x else target_bitrate
    for i in range(0, len(y(flag_target))):
        base_point["target"] = y(flag_target)[i]
        struct_points.append(base_point.copy())
    o_data["versions"][0]["shots"][0]["opt_points"] = struct_points
    
    #add empty shots
    base_shot = o_data["versions"][0]["shots"][0]
    for i in range(0, num_scenes):
        base_shot["index"] = i #assign index to shots in json file
        struct_shots.append(base_shot.copy())
    o_data["versions"][0]["shots"] = struct_shots

with open(rd_file, 'w') as w:
    json.dump(o_data, w, separators=(',',': '))

y4m input


ffmpeg version N-106635-g83e1a1de88 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/home/ubuntu/ffmpeg_build --pkg-config-flags=--static --extra-cflags=-I/home/ubuntu/ffmpeg_build/include --extra-ldflags=-L/home/ubuntu/ffmpeg_build/lib --extra-libs='-lpthread -lm' --ld=g++ --bindir=/home/ubuntu/bin --enable-gpl --enable-gnutls --enable-libaom --enable-libass --enable-libfdk-aac --enable-libfreetype --enable-libmp3lame --enable-libopus --enable-libsvtav1 --enable-libdav1d --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libvmaf --enable-version3 --enable-nonfree
  libavutil      57. 24.101 / 57. 24.101
  libavcodec     59. 26.100 / 59. 26.100
  libavformat    59. 22.100 / 59. 22.100
  libavdevice    59.  6.100 / 59.  6.100
  libavfilter     8. 33.100 /  8. 33.100
  libswscale      6.  6.100 /  6.  6.100
  libswresample   4.  6.100 /  4.  6.100
  libpostproc    56.  5.100 / 56.  5.100

## Optimization
Find the shot encoded to a certain crf that has the closest quality or rate to the target

In [4]:
#store the quality and rate results for each shot at each encoded crf
def save_results(index, crf, bitrate, vmaf, psnr):
    with open(rd_file, 'r') as f:
        o_data = json.load(f)
        print(crf, bitrate, vmaf, psnr)
        print(o_data["versions"][0]["shots"][index]["assessment"]["crf"][crf])
        print("------")
        o_data["versions"][0]["shots"][index]["assessment"]["crf"][crf] = crf
        o_data["versions"][0]["shots"][index]["assessment"]["bitrate"][crf] = bitrate
        o_data["versions"][0]["shots"][index]["assessment"]["vmaf"][crf] = vmaf
        o_data["versions"][0]["shots"][index]["assessment"]["psnr"][crf] = psnr
    with open(rd_file, 'w') as w:
        json.dump(o_data, w, separators=(',',': '))
    return o_data["versions"][0]["shots"][index]["assessment"]

def save_opt(index, target, opt):
    with open(rd_file, 'r') as f:
        o_data = json.load(f)
        o_data["versions"][0]["shots"][index]["opt_points"][target]["crf"] = int(opt)
    with open(rd_file, 'w') as w:
        json.dump(o_data, w, separators=(',',': '))

#linear interpolation of the target and the weight alpha between sx and dx
def interpolate(mat, sx, dx):
    #black sequences may have same quality values
    alpha = (mat[target_name][sx] - target) / (mat[target_name][sx] - mat[target_name][dx])
    new_point = round(mat["crf"][sx] - alpha * (mat["crf"][sx] - mat["crf"][dx]))
    return new_point

In [5]:
shot_index = 0
point_index = 0
same_val = False

for shot in sorted(os.listdir(REF_PATH)): #for each shot
    print("init computing -scene" + str(shot_index))
    while not current_point == new_point: #if no convergence
        if point_index == 0: #if there are no points to compare
            new_point = max_range_crf #encode at the upper value iof the starting range
            
        #encoding
        add_info = s_cod["add_param"]
        lib = s_cod["lib"]
        out = DIST_PATH + str(shot_index) + "/" + str(new_point) + "_" + codec.upper() + \
                "." + s_cod["container"]
        enc = f"ffmpeg -f rawvideo -video_size {raw_width}x{raw_height} \
            -r {raw_fps} -pixel_format yuv420p -i {REF_PATH+shot} -c:v {lib} \
            -crf {new_point} {add_info} {out}"
        subprocess.call(enc, shell=True)
        
        #quality assessment
        c_vmaf = f"ffmpeg -f rawvideo -r {raw_fps} -video_size {raw_width}x{raw_height} -i {REF_PATH+shot} \
            -i {out} \
            -lavfi \"[0:v]setpts=PTS-STARTPTS[ref];\
                     [1:v]scale={raw_width}x{raw_height}:flags=bicubic, setpts=PTS-STARTPTS[dist];\
                     [dist][ref]libvmaf=feature=name=psnr:log_path={VMAF_LOGS}:log_fmt=json\" \
            -f null -" #|name=float_ssim|name=float_ms_ssim to compute the other metrics
        subprocess.call(c_vmaf, shell=True)
        
        #extract quality and rate values
        with open(VMAF_LOGS, 'r') as r:
            i_data = json.load(r)
        vmaf = i_data["pooled_metrics"]["vmaf"]["mean"]
        psnr = (6*i_data["pooled_metrics"]["psnr_y"]["mean"] + \
                i_data["pooled_metrics"]["psnr_cb"]["mean"] + i_data["pooled_metrics"]["psnr_cr"]["mean"])/8
        info = f"ffprobe -v error -select_streams v:0 -show_entries format:stream -print_format json {out}"
        cout = json.loads(subprocess.run(info.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout)
        bitrate = int(cout['format']['bit_rate'])
    
        #TODO: results must be weighted based on duration (?)
        print(shot_index, new_point)
        res_matrix = save_results(shot_index, new_point, bitrate, vmaf, psnr)
        
        if flag_target:
            target_name = "vmaf"
            target = target_quality[0] #TODO: support more targets
        else:
            target_name = "bitrate"
            target = target_bitrate[0]
            
        if point_index == 0: #if there are no points to compare (first loop)
            current_point = max_range_crf #the current optimal point is the first one
            new_point = min_range_crf #in the next loop encode at the lower end of the starting range
        else:
            #element-wise difference between the metric and its target value
            difference = np.asarray(abs(np.asarray(res_matrix[target_name]) - target))
            #the minimum difference = the element with the index closer to the target
            i_first_min = np.argmin(difference)
            nd_diff = difference.copy()
            nd_diff[i_first_min] = np.inf #replace the minimum with inf
            i_second_min = np.argmin(nd_diff) #find the second minimum
            current_point = i_first_min #the index of the point closer to the target
            
            if res_matrix[target_name][i_first_min] == res_matrix[target_name][i_second_min]:
                current_point = max(i_first_min,i_second_min)
                new_point = current_point
            else: #swap the values of the two ends if the lower end is bigger than the upper end
                if res_matrix[target_name][i_first_min] > res_matrix[target_name][i_second_min]:
                    sx_end = res_matrix[target_name][i_first_min]
                    dx_end = res_matrix[target_name][i_second_min]
                    i_sx_end = i_first_min
                    i_dx_end = i_second_min
                elif res_matrix[target_name][i_first_min] < res_matrix[target_name][i_second_min]:
                    sx_end = res_matrix[target_name][i_second_min]
                    dx_end = res_matrix[target_name][i_first_min]
                    i_sx_end = i_second_min
                    i_dx_end = i_first_min
                else:
                    print("ERROR")
                
                if target < sx_end and target > dx_end: #if the target is in the range
                    new_point = interpolate(res_matrix, i_sx_end, i_dx_end)
                elif target > sx_end: #if the target is out of the range in the left side
                    new_point = 0 #if no other points in this direction had been stored encode at the min crf
                    i = i_sx_end - 1
                    while new_point == 0 and i>= 0:
                        #the first point you find is the new lower end of the range
                        if not res_matrix[target_name][i] == math.inf:
                            new_point = interpolate(res_matrix, i, i_sx_end)
                        i -= 1
                elif target < dx_end: #if the target is out of the range in the right side
                    new_point = s_cod["crfs"]-1 #if there's no other points, encode at the max crf
                    i = i_dx_end + 1
                    while new_point == s_cod["crfs"]-1 and i <= s_cod["crfs"]-1:
                        #the first point you find is the new upper end of the range
                        if not res_matrix[target_name][i] == math.inf:
                            new_point = interpolate(res_matrix, i_dx_end, i)
                        i += 1
                else:
                    print("target == computed val")
        point_index += 1
    
    save_opt(shot_index, 0, current_point)
    current_point = None
    new_point = 0
    shot_index += 1
    point_index = 0

init computing -scene0


ffmpeg version N-106635-g83e1a1de88 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/home/ubuntu/ffmpeg_build --pkg-config-flags=--static --extra-cflags=-I/home/ubuntu/ffmpeg_build/include --extra-ldflags=-L/home/ubuntu/ffmpeg_build/lib --extra-libs='-lpthread -lm' --ld=g++ --bindir=/home/ubuntu/bin --enable-gpl --enable-gnutls --enable-libaom --enable-libass --enable-libfdk-aac --enable-libfreetype --enable-libmp3lame --enable-libopus --enable-libsvtav1 --enable-libdav1d --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libvmaf --enable-version3 --enable-nonfree
  libavutil      57. 24.101 / 57. 24.101
  libavcodec     59. 26.100 / 59. 26.100
  libavformat    59. 22.100 / 59. 22.100
  libavdevice    59.  6.100 / 59.  6.100
  libavfilter     8. 33.100 /  8. 33.100
  libswscale      6.  6.100 /  6.  6.100
  libswresample   4.  6.100 /  4.  6.100
  libpostproc    56.  5.100 / 56.  5.100

0 28
28 856127 91.749778 41.672702875000006
28
------


ffmpeg version N-106635-g83e1a1de88 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/home/ubuntu/ffmpeg_build --pkg-config-flags=--static --extra-cflags=-I/home/ubuntu/ffmpeg_build/include --extra-ldflags=-L/home/ubuntu/ffmpeg_build/lib --extra-libs='-lpthread -lm' --ld=g++ --bindir=/home/ubuntu/bin --enable-gpl --enable-gnutls --enable-libaom --enable-libass --enable-libfdk-aac --enable-libfreetype --enable-libmp3lame --enable-libopus --enable-libsvtav1 --enable-libdav1d --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libvmaf --enable-version3 --enable-nonfree
  libavutil      57. 24.101 / 57. 24.101
  libavcodec     59. 26.100 / 59. 26.100
  libavformat    59. 22.100 / 59. 22.100
  libavdevice    59.  6.100 / 59.  6.100
  libavfilter     8. 33.100 /  8. 33.100
  libswscale      6.  6.100 /  6.  6.100
  libswresample   4.  6.100 /  4.  6.100
  libpostproc    56.  5.100 / 56.  5.100

0 17
17 3565672 97.872308 47.77079775
17
------


ffmpeg version N-106635-g83e1a1de88 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/home/ubuntu/ffmpeg_build --pkg-config-flags=--static --extra-cflags=-I/home/ubuntu/ffmpeg_build/include --extra-ldflags=-L/home/ubuntu/ffmpeg_build/lib --extra-libs='-lpthread -lm' --ld=g++ --bindir=/home/ubuntu/bin --enable-gpl --enable-gnutls --enable-libaom --enable-libass --enable-libfdk-aac --enable-libfreetype --enable-libmp3lame --enable-libopus --enable-libsvtav1 --enable-libdav1d --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libvmaf --enable-version3 --enable-nonfree
  libavutil      57. 24.101 / 57. 24.101
  libavcodec     59. 26.100 / 59. 26.100
  libavformat    59. 22.100 / 59. 22.100
  libavdevice    59.  6.100 / 59.  6.100
  libavfilter     8. 33.100 /  8. 33.100
  libswscale      6.  6.100 /  6.  6.100
  libswresample   4.  6.100 /  4.  6.100
  libpostproc    56.  5.100 / 56.  5.100

0 20
20 2407793 97.106049 46.11791375
20
------


ffmpeg version N-106635-g83e1a1de88 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/home/ubuntu/ffmpeg_build --pkg-config-flags=--static --extra-cflags=-I/home/ubuntu/ffmpeg_build/include --extra-ldflags=-L/home/ubuntu/ffmpeg_build/lib --extra-libs='-lpthread -lm' --ld=g++ --bindir=/home/ubuntu/bin --enable-gpl --enable-gnutls --enable-libaom --enable-libass --enable-libfdk-aac --enable-libfreetype --enable-libmp3lame --enable-libopus --enable-libsvtav1 --enable-libdav1d --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libvmaf --enable-version3 --enable-nonfree
  libavutil      57. 24.101 / 57. 24.101
  libavcodec     59. 26.100 / 59. 26.100
  libavformat    59. 22.100 / 59. 22.100
  libavdevice    59.  6.100 / 59.  6.100
  libavfilter     8. 33.100 /  8. 33.100
  libswscale      6.  6.100 /  6.  6.100
  libswresample   4.  6.100 /  4.  6.100
  libpostproc    56.  5.100 / 56.  5.100

0 22
22 1856342 96.340484 45.007554875
22
------


## Encode opt video
Put together all the individually encoded shots

In [6]:
with open(rd_file, 'r') as f:
    o_data = json.load(f)
for i in range(0,num_scenes):
    opt_crf = o_data["versions"][0]["shots"][i]["opt_points"][0]["crf"]
    file_list = file_list + "file '" + DIST_PATH + str(i) + "/" \
    + str(opt_crf) + "_" + codec.upper() + "." + s_cod["container"] + "' \n"
with open(FILE_LIST_PATH, 'w') as w:
    w.write(file_list)
mux = f"ffmpeg -f concat -i {FILE_LIST_PATH} -c copy {OUT_PATH+source_name}." + s_cod["container"]
subprocess.call(mux, shell=True)

ffmpeg version N-106635-g83e1a1de88 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/home/ubuntu/ffmpeg_build --pkg-config-flags=--static --extra-cflags=-I/home/ubuntu/ffmpeg_build/include --extra-ldflags=-L/home/ubuntu/ffmpeg_build/lib --extra-libs='-lpthread -lm' --ld=g++ --bindir=/home/ubuntu/bin --enable-gpl --enable-gnutls --enable-libaom --enable-libass --enable-libfdk-aac --enable-libfreetype --enable-libmp3lame --enable-libopus --enable-libsvtav1 --enable-libdav1d --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libvmaf --enable-version3 --enable-nonfree
  libavutil      57. 24.101 / 57. 24.101
  libavcodec     59. 26.100 / 59. 26.100
  libavformat    59. 22.100 / 59. 22.100
  libavdevice    59.  6.100 / 59.  6.100
  libavfilter     8. 33.100 /  8. 33.100
  libswscale      6.  6.100 /  6.  6.100
  libswresample   4.  6.100 /  4.  6.100
  libpostproc    56.  5.100 / 56.  5.100

0

## TODO: Curve fitting
When the upper search has tested 3 points, given these 3 RQ points, discover the polynomian or logarithmic function that describes their trend. Repeat this when a new point is computed. Measure the error between the approximation and the actual implementation (lagrangian search above) and assess whether and when it may be useful to speed up the search process, by reducing the number of test to encode before the optimum.