# getting ready
安装相关依赖

step1 下载onnx模型与测试视频


In [None]:
!gdown --fuzzy https://github.com/BabitMF/bmf/releases/download/files/models.tar.gz
!gdown --fuzzy https://github.com/BabitMF/bmf/releases/download/files/files.tar.gz
!tar xzvf models.tar.gz
!tar xzvf files.tar.gz

step2 安装BMF、onnxruntime-module

In [None]:
!pip install BabitMF
!pip3 install onnxruntime


step3 获取BMF源码，找到demo模块，测试modules和model文件可以正常使用

In [None]:
!git clone https://github.com/BabitMF/bmf.git

In [None]:
!cp /content/bmf/bmf/demo/aesthetic_assessment/*.py .

In [None]:
import bmf
import sys
import onnxruntime as ort
from module_utils import SyncModule
import aesmod_module
import onnxruntime as ort
import os.path as osp
model_dir = osp.join(osp.abspath(osp.dirname('__file__')), 'models')
aesmod_ort_model_path = osp.realpath(osp.join(model_dir, 'aes_transonnx_update3.onnx'))
print(aesmod_ort_model_path)
ort_session = ort.InferenceSession(aesmod_ort_model_path)

# source code

##aesmod_module.py


*   func get_logger()
*   func flex_resize_aesv2()
*   class Aesmod
*   class BMFAesmod


##module_utils.py


*   class SyncModule


##main.py
main program for calling bmf api and visualize output
*   func segment_decode_ticks()
*   func get_duration()



In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
### aesmod_module.py

from module_utils import SyncModule
import os
import time
import json
import pdb
import os.path as osp
import numpy as np

os.environ["OMP_NUM_THREADS"] = "8"
import onnxruntime as ort
import torch
import logging
import cv2


def get_logger():
    return logging.getLogger("main")


LOGGER = get_logger()


def flex_resize_aesv2(img, desired_size=[448, 672], pad_color=[0, 0, 0]):
    old_h, old_w = img.shape[:2]  # old_size is in (height, width) format
    if desired_size[0] >= desired_size[1]:
        if old_h < old_w:  # rotate the honrizontal video
            img = np.rot90(img, k=1, axes=(1, 0))
    else:
        if old_h > old_w:  # rotate the vertical video
            img = np.rot90(img, k=1, axes=(1, 0))
    old_h, old_w = img.shape[:2]

    if old_w / old_h > (desired_size[1] / desired_size[0]):
        ratio = desired_size[0] / old_h
    else:
        ratio = desired_size[1] / old_w
    img = cv2.resize(img, None, fx=ratio, fy=ratio)
    h, w, _ = img.shape
    h_crop = (h - desired_size[0]) // 2
    w_crop = (w - desired_size[1]) // 2
    img = img[h_crop:h_crop + desired_size[0],
              w_crop:w_crop + desired_size[1], :]
    return img


class Aesmod:

    def __init__(self, model_path, model_version, output_path):
        self._frm_idx = 0
        self._frm_scores = []
        self._model_version = model_version
        self._output_path = output_path

        # model_dir = osp.join(osp.abspath(osp.dirname("__file__")), "models")
        # aesmod_ort_model_path = osp.realpath(
        #    osp.join(model_dir, "aes_transonnx_update3.onnx")
        # )
        self.use_gpu = False
        aesmod_ort_model_path = model_path
        print(aesmod_ort_model_path)
        LOGGER.info("loading aesthetic ort inference session")
        self.ort_session = ort.InferenceSession(aesmod_ort_model_path)

        self.resize_reso = [672, 448]

    def preprocess(self, frame):
        frame = flex_resize_aesv2(frame)
        # print('using flex_resize_aesv2', frame.shape)
        frame = (frame.astype(np.float32) / 255.0 -
                 np.array([0.485, 0.456, 0.406], dtype="float32")) / (np.array(
                     [0.229, 0.224, 0.225], dtype="float32"))
        frame = np.transpose(frame, (2, 0, 1))
        frame = np.expand_dims(frame, 0)
        return frame

    @staticmethod
    def tensor_to_list(tensor):
        if tensor.requires_grad:
            return tensor.detach().cpu().flatten().tolist()
        else:
            return tensor.cpu().flatten().tolist()

    @staticmethod
    def score_pred_mapping(raw_scores, raw_min=2.60, raw_max=7.42):
        pred_score = np.clip(
            np.sum([x * (i + 1) for i, x in enumerate(raw_scores)]), raw_min,
            raw_max)
        pred_score = np.sqrt((pred_score - raw_min) / (raw_max - raw_min)) * 100
        return float(np.clip(pred_score, 0, 100.0))

    def process(self, frames):
        frames = [
            frame
            if frame.flags["C_CONTIGUOUS"] else np.ascontiguousarray(frame)
            for frame in frames
        ]
        frame = self.preprocess(frames[0])
        print("after preprocess shape", frame.shape)
        if not frame.flags["C_CONTIGUOUS"]:
            frame = np.ascontiguousarray(frame, dtype=np.float32)

        t1 = time.time()
        if self.use_gpu:
            with torch.no_grad():
                input_batch = torch.from_numpy(frame).contiguous().cuda()
                preds, _ = self.trt_model(input_batch)
                raw_score = self.tensor_to_list(preds)
        else:

            raw_score = self.ort_session.run(None, {"input": frame})
            raw_score = raw_score[0][0]
        score = self.score_pred_mapping(raw_score)
        self._frm_scores.append(score)
        self._frm_idx += 1
        t2 = time.time()
        LOGGER.info(f"[Aesmod] inference time: {(t2 - t1) * 1000:0.1f} ms")
        return frames[0]

    def clean(self):
        nr_score = round(np.mean(self._frm_scores), 2)
        results = {
            "aesthetic": nr_score,
            "aesthetic_version": self._model_version
        }
        LOGGER.info(f"overall prediction {json.dumps(results)}")
        with open(self._output_path, "w") as outfile:
            json.dump(results, outfile, indent=4, ensure_ascii=False)


class BMFAesmod(SyncModule):

    def __init__(self, node=None, option=None):
        output_path = option.get("output_path", 0)
        model_version = option.get("model_version", "v1.0")
        model_path = option.get("model_path",
                                "./models/aes_transonnx_update3.onnx")
        self._nrp = Aesmod(model_path, model_version, output_path)
        SyncModule.__init__(self,
                            node,
                            nb_in=1,
                            in_fmt="rgb24",
                            out_fmt="rgb24")

    def core_process(self, frames):
        return self._nrp.process(frames)

    def clean(self):
        self._nrp.clean()

In [None]:
!cat module_utils.py

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import bmf
import cv2, os, sys

def get_duration(video_path):
    capture = cv2.VideoCapture(video_path)
    fps = capture.get(cv2.CAP_PROP_FPS)      # OpenCV2 version 2 used "CV_CAP_PROP_FPS"
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count / fps
    capture.release()
    return duration

def segment_decode_ticks(video_path, seg_dur=4.0, lv1_dur_thres=24.0, max_dur=1000):
    '''
        bmf module new decode duration ticks
        - 0 < Duration <= 24s, 抽帧间隔r=1, 抽帧0~24帧
        - 24s < Duration <= 600s 分片抽取, 抽帧间隔r=1, 抽帧24帧
            - 6个4s切片, 共计6x4=24帧
        - duration > 600s, 分8片抽帧r=1, 抽帧数量32帧
            - (600, inf), 8个4s切片, 共计8x4=32帧
        最大解码长度 max_dur: 1000s
    '''
    duration = get_duration(video_path)
    duration_ticks = []
    if duration < lv1_dur_thres:
        return dict()
    elif duration <= 600:  # medium duration
        seg_num = 6
        seg_intev = (duration - seg_num * seg_dur) / (seg_num - 1)
        if seg_intev < 0.5:
            duration_ticks.extend([0, duration])
        else:
            for s_i in range(seg_num):
                seg_init = s_i * (seg_dur + seg_intev)
                seg_end = seg_init + seg_dur
                duration_ticks.extend([round(seg_init, 3), round(seg_end, 3)])
    else:  # long duration
        seg_num = 8
        seg_intev = (min(duration, max_dur) - seg_num * seg_dur) / (seg_num - 1)
        for s_i in range(seg_num):
            seg_init = s_i * (seg_dur + seg_intev)
            seg_end = seg_init + seg_dur
            duration_ticks.extend([round(seg_init, 3), round(seg_end, 3)])
    return {'durations': duration_ticks}


if __name__ == "__main__":
  input_path = "files/bbb_360_20s.mp4"
  outp_path = 'res2.json'

  option = dict()
  option['output_path'] = outp_path
  print('option',option)
  duration_segs = segment_decode_ticks(input_path)
  decode_params = {'input_path': input_path, 'video_params': {'extract_frames': {'fps': 1}}}
  decode_params.update(duration_segs)
  print('decode_params',decode_params)
  # module process

  py_module_path = os.path.abspath(os.path.dirname(os.path.dirname('__file__')))
  py_entry = '__main__.BMFAesmod'
  print(py_module_path, py_entry)

  streams = bmf.graph().decode(decode_params)
  video_stream = streams['video'].module('aesmod_module',
                                        option,
                                        py_module_path,
                                        py_entry)
  video_stream.run()



In [None]:
!cat res2.json

res2.json的aesthetic给视频打分。评分区间映射至[0, 100]，分数越高代表画面的美学水平越高;  其中在AVA数据集上好坏分类的阈值5分映射为70.5分。
在对画面的分析中，美学是一个重要的维度。对于画面客观质量相似的画面，美学维度的评价能够进一步丰富对人类感知的拟合。对于图像美感的评价综合了色彩相关（亮度/饱和度/色彩丰富度...）、构图相关（三分线构图/对称性/前景背景对比）、语义相关（主题是否明确）、画质相关（纹理是否丰富清晰）等多个维度，同时除了摄影经验之外，图像的审美质量还受到情感和个人偏好的影响，例如对不同内容类型或风格的偏好。综上实际上的预测精度与主观感受仍然相差较大。