In [2]:
import os
import pickle

def load_videos(folder, version):
    path = os.path.join(folder, 'videos-v{}.pkl'.format(version))
    with open(path, 'rb') as f:
        return pickle.load(f)
    raise IOError('No Video Data File', folder)


def save_videos(folder, videos, version):
    # 整体存储
    path = os.path.join(folder, 'videos-v{}.pkl'.format(version))
    f = open(path, 'wb')
    pickle.dump(videos, f)
    f.close()

In [1]:
from tools.cut_editor import load_train_model
net = load_train_model()
net

ConcatDropoutNet(
  (conv1): Conv2d(10, 12, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(12, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1568, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=1, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (dropout2): Dropout(p=0.25, inplace=False)
)

In [76]:
import os
import math
import numpy as np

view_map = {
    0: 0, # long-shot
    1: 1, # full-shot
    2: 1, # full-shot
    3: 2, # mid-shot
    4: 2, # mid-shot
    5: 2, # mid-shot
    6: 3, # close-shot
    7: 3, # close-shot
    8: 3, # close-shot
    9: 5  # none
}

direct_map = {
    0: 0, # left
    1: 1, # half-left
    2: 2, # center
    3: 3, # half-right
    4: 4, # right
    5: 6, # back
    6: 8  # none
}

class VShot:

    def __init__(self, video, start, end):
        self.video = video
        self.start = start
        self.end = end
        self.len = end - start
        self.vi = -1
        self.playable = True
        self.calc_features()

    def calc_features(self):
        frames = [frame for frame in self.video.frames[self.start:self.end]]
        self.view = np.array([view_map[frame.view] for frame in frames])
        self.view_mean = self.view.mean()
        self.direct = np.array([direct_map[frame.direction] for frame in frames])
        self.direct_mean = self.direct.mean()
        # self.pose = [frame.pose for frame in frames]
        # self.motion = [frame.motion for frame in frames]
        self.roi = [frame.roi for frame in frames]
        self.roi_mean = self.calc_smooth_rois().mean()
        self.flow = [frame.flow for frame in frames]
        self.flow_crop = self.calc_flow_crop()
        self.flow_row = [frame.flow_row for frame in frames]
        self.data = [frame.data for frame in frames]

    def view_match(self, view):
        return abs(view - self.view_mean) <= 1

    def view_match(self, pvshot):
        view_diff = np.absolute(self.view - pvshot.view)
        view_diff_mean = view_diff.mean()
        return view_diff_mean <= 1

    def direct_match(self, direct):
        return abs(direct - self.direct_mean) <= 1

    def direct_match(self, pvshot):
        direct_diff = np.absolute(self.direct - pvshot.direct)
        direct_diff_mean = direct_diff.mean()
        return direct_diff_mean <= 1

    def calc_smooth_rois(self):
        w = self.video.width
        rois = np.array([(roi[1]+roi[3])/2/w for roi in self.roi])
        rois = np.where(rois > 0.0, rois, 0.5)
        N = 7
        hN = N // 2
        weights = np.hanning(N)
        smooth_rois = np.convolve(weights/weights.sum(), rois, mode='full')[hN:-hN]
        smooth_rois[0:hN] = rois[0:hN]
        smooth_rois[-hN:] = rois[-hN:]
        return smooth_rois

    def calc_flow_crop(self):
        flow_crop = []
        for flow in self.flow:
            mid = math.floor(self.roi_mean * flow.shape[1])
            if flow.shape[1] <= 11:
                flow_crop.append((mid, flow))
                continue
            if (mid - 5) < 0:
                start = 0
                end = start + 11
            elif (mid + 5) >= flow.shape[1]:
                end = flow.shape[1]
                start = end - 11
                mid = mid - start
            else:
                start = mid - 5
                end = mid + 5 + 1
                mid = mid - start
            flow_crop.append((mid, flow[:, start:end]))
        return flow_crop

    def calc_flow_diff(self, pvshot):
        diff = 0.0
        for (v1_mid, v1_flow), (v2_mid, v2_flow) in zip(self.flow_crop, pvshot.flow_crop):
            # v1_mid, v1_flow = VShot.calc_frame_flow(frame1, mroi_1)
            # v2_mid, v2_flow = VShot.calc_frame_flow(frame2, mroi_2)
        #     print(v1_mid, v1_flow.shape)
        #     print(v2_mid, v2_flow.shape)
            if v1_flow.shape[1] < v2_flow.shape[1]:
                radius = v1_flow.shape[1] // 2
                if (v2_mid - radius) < 0:
                    v2_start = 0
                    v2_end = v2_start + v1_flow.shape[1]
                elif (v2_mid + radius) >= v2_flow.shape[1]:
                    v2_end = v2_flow.shape[1]
                    v2_start = v2_end - v1_flow.shape[1]
                else:
                    v2_start = v2_mid - radius
                    v2_end = v2_start + v1_flow.shape[1]
                v2_flow = v2_flow[:, v2_start:v2_end]
        #         print("New v2_flow:", v2_start, v2_end)
            elif v1_flow.shape[1] > v2_flow.shape[1]:
                radius = v2_flow.shape[1] // 2
                if (v1_mid - radius) < 0:
                    v1_start = 0
                    v1_end = v1_start + v2_flow.shape[1]
                elif (v1_mid + radius) >= v1_flow.shape[1]:
                    v1_end = v1_flow.shape[1]
                    v1_start = v1_end - v2_flow.shape[1]
                else:
                    v1_start = v1_mid - radius
                    v1_end = v1_start + v2_flow.shape[1]
                v1_flow = v1_flow[:, v1_start:v1_end]
        #         print("New v1_flow:", v1_start, v1_end)
            x_diff = v1_flow[:,:,0] - v2_flow[:,:,0]
            y_diff = v1_flow[:,:,1] - v2_flow[:,:,1]
            diff += np.mean(np.sqrt((x_diff * x_diff + y_diff * y_diff)))
        return diff

    @staticmethod
    def calc_flow_diff_of_pair(vshots):
        diff = 0.0
        for i, ivshot in enumerate(vshots):
            for j, jvshot in enumerate(vshots[i+1:]):
                diff += ivshot.calc_flow_diff(jvshot)
        return diff

    def get_valid_head_tail_frame(self):
        frames = self.video.frames[self.start:self.end]
        if len(frames) == 1:
            return (None, None)
        elif len(frames) == 2:
            return (frames[1], frames[1])
        elif len(frames) <= 8:
            mid = len(frames) // 2
            return (frames[mid], frames[mid])
        else:
            return (frames[3], frames[-4])

    def cross_with(self, other):
        if self.video != other.video:
            return False
        else:
            if self.end < other.start or self.start > other.end:
                return False
            else:
                return True

    def __eq__(self, other):
        video_eq = self.video == other.video
        start_eq = self.start == other.start
        end_eq = self.end == other.end
        return video_eq and start_eq and end_eq

    def __str__(self):
        return 'VShot {} {} {}-{}'.format(self.video.name, self.len, self.start, self.end)

In [90]:
import cv2
import torch
# from model.cut_editor.test_tools import get_test_loader
from tools.cut_editor import load_train_model

class VShotGroup:

    def __init__(self, videos):
        self.videos = videos
        self.vshots = []
        self.calc_vshots_of_video()
        self.cut_scores = []
        # self.calc_cut_scores_of_vshots()

    def calc_vshots_of_video(self):
        shot_lens = [1.5, 2, 3, 4, 5]
        jump = 0.5
        for video in self.videos:
            ratio = round(1 / video.sample_time_interval)
            vshots = []
            for slen in shot_lens:
                vs = self.calc_vshots_of_len(video, round(slen * ratio), round(jump * ratio))
                vshots.extend(vs)
            print(video.name + ":" + str(len(vshots)))
            # self.vshots.append(vshots)
            self.vshots.extend(vshots)
        for vi, vshots in enumerate(self.vshots):
            vshots.vi = vi

    def calc_vshots_of_len(self, video, slen, jump):
        vshots = []
        if len(video.frames) < slen:
            return vshots
        for i in range(0, len(video.frames) - slen + 1, jump):
            vshots.append(VShot(video, i, i + slen))
        return vshots

    def calc_cut_scores_of_vshots(self):
        data = []
        for i, ivshot in enumerate(self.vshots):
            _, prev_t = ivshot.get_valid_head_tail_frame()
            for j, jvshot in enumerate(self.vshots):
                curr_h, _ = jvshot.get_valid_head_tail_frame()
                data.append([prev_t.data, curr_h.data, prev_t.flow_row, curr_h.flow_row])
        hsv_data = []
        for pdata, cdata, pflow, cflow in data:
            pdata = cv2.cvtColor(pdata, cv2.COLOR_RGB2HSV)
            cdata = cv2.cvtColor(cdata, cv2.COLOR_RGB2HSV)
            hsv_data.append([pdata, cdata, pflow, cflow])
        print(len(hsv_data))
        loader = get_test_loader(hsv_data)
        net = load_train_model()
        net.eval()
        net_results = None
        with torch.no_grad():
            for i, batch in enumerate(loader):
                pdata, cdata, pflow, cflow = batch
                outputs = net(pdata, cdata, pflow, cflow)
                results = torch.sigmoid(outputs)
                if net_results is None:
                    net_results = results
                else:
                    net_results = torch.cat((net_results, results))
        assert net_results.shape[0] == len(hsv_data)
        results = net_results.view(len(self.vshots), len(self.vshots)).tolist()
        self.cut_scores = results

    def set_unplayable_vshots(self, cvshot):
        for vshot in self.vshots:
            if vshot.cross_with(cvshot):
                vshot.playable = False

    def reset_playable_vshots(self):
        for vshot in self.vshots:
            vshot.playable = True

In [93]:
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
# from skimage import io, transform

class CutEditorTestDataset(Dataset):
    """Cut editor dataset."""

    def __init__(self, data, transform=None):
        data = [(pdata.astype(np.float32), cdata.astype(np.float32), pflow.astype(np.float32), cflow.astype(np.float32)) for
                        (pdata, cdata, pflow, cflow) in data]
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        pdata, cdata, pflow, cflow = self.data[idx]

        if self.transform:
            pdata, cdata, pflow, cflow = self.transform((pdata, cdata, pflow, cflow))
        return pdata, cdata, pflow, cflow

class Rescale(object):
    """Rescale the image in a sample to a given size.
    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, data):
        pdata, cdata, pflow, cflow = data
        h, w = pdata.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        pdata = cv2.resize(pdata, (new_h, new_w), cv2.INTER_AREA)
        cdata = cv2.resize(cdata, (new_h, new_w), cv2.INTER_AREA)
        pflow = cv2.resize(pflow, (new_h, new_w), cv2.INTER_AREA)
        cflow = cv2.resize(cflow, (new_h, new_w), cv2.INTER_AREA)

        return pdata, cdata, pflow, cflow

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, data):
        pdata, cdata, pflow, cflow = data

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        pdata = pdata.transpose((2, 0, 1))
        cdata = cdata.transpose((2, 0, 1))
        pflow = pflow.transpose((2, 0, 1))
        cflow = cflow.transpose((2, 0, 1))

        return pdata, cdata, pflow, cflow


def get_test_loader(data):
    dataset = CutEditorTestDataset(data,
                                transform=transforms.Compose([
                                Rescale((76, 76)),
                                ToTensor()
                            ]))
    loader = DataLoader(dataset, batch_size=64)
    return loader

In [94]:
folder = "templates/taobao-{}".format(0)
videos = load_videos(folder, 8)
vshot_group = VShotGroup(videos)
print(len(vshot_group.vshots))
for vshot in vshot_group.vshots:
    print(vshot)

00.mp4:11
01.mp4:11
02.mp4:15
03.mp4:5
04.mp4:1
05.mp4:0
06.mp4:1
07.mp4:3
08.mp4:0
09.mp4:1
10.mp4:3
11.mp4:1
12.mp4:1
13.mp4:8
14.mp4:8
15.mp4:1
16.mp4:3
17.mp4:1
74
VShot 00.mp4 12 0-12
VShot 00.mp4 12 4-16
VShot 00.mp4 12 8-20
VShot 00.mp4 12 12-24
VShot 00.mp4 12 16-28
VShot 00.mp4 16 0-16
VShot 00.mp4 16 4-20
VShot 00.mp4 16 8-24
VShot 00.mp4 16 12-28
VShot 00.mp4 24 0-24
VShot 00.mp4 24 4-28
VShot 01.mp4 12 0-12
VShot 01.mp4 12 4-16
VShot 01.mp4 12 8-20
VShot 01.mp4 12 12-24
VShot 01.mp4 12 16-28
VShot 01.mp4 16 0-16
VShot 01.mp4 16 4-20
VShot 01.mp4 16 8-24
VShot 01.mp4 16 12-28
VShot 01.mp4 24 0-24
VShot 01.mp4 24 4-28
VShot 02.mp4 12 0-12
VShot 02.mp4 12 4-16
VShot 02.mp4 12 8-20
VShot 02.mp4 12 12-24
VShot 02.mp4 12 16-28
VShot 02.mp4 12 20-32
VShot 02.mp4 16 0-16
VShot 02.mp4 16 4-20
VShot 02.mp4 16 8-24
VShot 02.mp4 16 12-28
VShot 02.mp4 16 16-32
VShot 02.mp4 24 0-24
VShot 02.mp4 24 4-28
VShot 02.mp4 24 8-32
VShot 02.mp4 32 0-32
VShot 03.mp4 12 0-12
VShot 03.mp4 12 4-16
VS

In [97]:
vshot_group.calc_cut_scores_of_vshots()
print(vshot_group.cut_scores)

5476
[[0.000967636180575937, 1.9583640588921299e-19, 1.2554544467284316e-14, 5.382181100799244e-08, 7.829867740838381e-07, 0.000967636180575937, 1.9583640588921299e-19, 1.2554544467284316e-14, 5.382181100799244e-08, 0.000967636180575937, 1.9583640588921299e-19, 0.6449031233787537, 0.9963397979736328, 0.9989392161369324, 0.9988466501235962, 0.9981594681739807, 0.6449031233787537, 0.9963397979736328, 0.9989392161369324, 0.9988466501235962, 0.6449031233787537, 0.9963397979736328, 0.9215624332427979, 0.8828437924385071, 0.09585504978895187, 0.7467941641807556, 0.9960313439369202, 0.9274219870567322, 0.9215624332427979, 0.8828437924385071, 0.09585504978895187, 0.7467941641807556, 0.9960313439369202, 0.9215624332427979, 0.8828437924385071, 0.09585504978895187, 0.9215624332427979, 0.8342573642730713, 0.9809649586677551, 0.1542287915945053, 0.8342573642730713, 0.9809649586677551, 0.907369077205658, 0.1270100325345993, 0.6140738725662231, 0.6600825786590576, 0.6140738725662231, 0.92180913686752

In [6]:
def get_valid_head_tail_frame(vshot):
    frames = vshot.video.frames[vshot.start:vshot.end]
    if len(frames) == 1:
        return (None, None)
    elif len(frames) == 2:
        return (frames[1], frames[1])
    elif len(frames) <= 8:
        mid = len(frames) // 2
        return (frames[mid], frames[mid])
    else:
        return (frames[3], frames[-4])

In [7]:
%%time
data = []
for i, ivshot in enumerate(vshot_group.vshots):
    _, prev_t = get_valid_head_tail_frame(ivshot)
    for j, jvshot in enumerate(vshot_group.vshots):
        curr_h, _ = get_valid_head_tail_frame(jvshot)
        data.append([prev_t.data, curr_h.data, prev_t.flow_row, curr_h.flow_row])

print(len(data))

5476
CPU times: user 16.7 ms, sys: 105 µs, total: 16.8 ms
Wall time: 16.2 ms


In [9]:
import cv2
def rgb_2_hsv(data):
    new_data = []
    for pdata, cdata, pflow, cflow in data:
        pdata = cv2.cvtColor(pdata, cv2.COLOR_RGB2HSV)
        cdata = cv2.cvtColor(cdata, cv2.COLOR_RGB2HSV)
#         pdata = rgb_to_hsv(pdata)
#         cdata = rgb_to_hsv(cdata)
        new_data.append([pdata, cdata, pflow, cflow])
    return new_data

In [10]:
%%time
hsv_data = rgb_2_hsv(data)

CPU times: user 794 ms, sys: 72.1 ms, total: 866 ms
Wall time: 924 ms


In [71]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from skimage import io, transform

class CutEditorTestDataset(Dataset):
    """Cut editor dataset."""

    def __init__(self, data, transform=None):
        data = [(pdata.astype(np.float32), cdata.astype(np.float32), pflow.astype(np.float32), cflow.astype(np.float32)) for
                        (pdata, cdata, pflow, cflow) in data]
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        pdata, cdata, pflow, cflow = self.data[idx]

        if self.transform:
            pdata, cdata, pflow, cflow = self.transform((pdata, cdata, pflow, cflow))
        return pdata, cdata, pflow, cflow

class Rescale(object):
    """Rescale the image in a sample to a given size.
    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, data):
        pdata, cdata, pflow, cflow = data
        h, w = pdata.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        pdata = cv2.resize(pdata, (new_h, new_w), cv2.INTER_AREA)
        cdata = cv2.resize(cdata, (new_h, new_w), cv2.INTER_AREA)
        pflow = cv2.resize(pflow, (new_h, new_w), cv2.INTER_AREA)
        cflow = cv2.resize(cflow, (new_h, new_w), cv2.INTER_AREA)

        return pdata, cdata, pflow, cflow

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, data):
        pdata, cdata, pflow, cflow = data

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        pdata = pdata.transpose((2, 0, 1))
        cdata = cdata.transpose((2, 0, 1))
        pflow = pflow.transpose((2, 0, 1))
        cflow = cflow.transpose((2, 0, 1))

        return pdata, cdata, pflow, cflow

In [72]:
%%time
dataset = CutEditorTestDataset(hsv_data,
                               transform=transforms.Compose([
                               Rescale((76, 76)),
                               ToTensor()
                           ]))

CPU times: user 202 ms, sys: 0 ns, total: 202 ms
Wall time: 201 ms


In [73]:
print(len(dataset))

5476


In [74]:
loader = DataLoader(dataset, batch_size=256)

In [75]:
%%time
all_results = None
net.eval()
with torch.no_grad():
    for i, batch in enumerate(loader):
        pdata, cdata, pflow, cflow = batch
        outputs = net(pdata, cdata, pflow, cflow)
        results = torch.sigmoid(outputs)
    #     ones = torch.ones(results.shape)
    #     zeros = torch.zeros(results.shape)
    #     results = torch.where(results >= 0.5, ones, zeros)
        if all_results is None:
            all_results = results
        else:
            all_results = torch.cat((all_results, results))
print(all_results.shape)
print(all_results)

torch.Size([5476, 1])
tensor([[9.6764e-04],
        [1.9584e-19],
        [1.2555e-14],
        ...,
        [9.9835e-01],
        [9.9136e-01],
        [4.4049e-02]])
CPU times: user 24.2 s, sys: 2.25 s, total: 26.5 s
Wall time: 2.74 s


In [58]:
# print(all_results[0:10])
results = all_results.view(len(vshot_group.vshots), len(vshot_group.vshots)).tolist()
# print(results[0])
# print(results.shape)
# print(results.tolist())
vshot_group

tensor([[9.6764e-04],
        [1.9584e-19],
        [1.2555e-14],
        [5.3822e-08],
        [7.8299e-07],
        [9.6764e-04],
        [1.9584e-19],
        [1.2555e-14],
        [5.3822e-08],
        [9.6764e-04]])
tensor([9.6764e-04, 1.9584e-19, 1.2555e-14, 5.3822e-08, 7.8299e-07, 9.6764e-04,
        1.9584e-19, 1.2555e-14, 5.3822e-08, 9.6764e-04, 1.9584e-19, 6.4490e-01,
        9.9634e-01, 9.9894e-01, 9.9885e-01, 9.9816e-01, 6.4490e-01, 9.9634e-01,
        9.9894e-01, 9.9885e-01, 6.4490e-01, 9.9634e-01, 9.2156e-01, 8.8284e-01,
        9.5855e-02, 7.4679e-01, 9.9603e-01, 9.2742e-01, 9.2156e-01, 8.8284e-01,
        9.5855e-02, 7.4679e-01, 9.9603e-01, 9.2156e-01, 8.8284e-01, 9.5855e-02,
        9.2156e-01, 8.3426e-01, 9.8096e-01, 1.5423e-01, 8.3426e-01, 9.8096e-01,
        9.0737e-01, 1.2701e-01, 6.1407e-01, 6.6008e-01, 6.1407e-01, 9.2181e-01,
        1.1838e-02, 8.4829e-05, 1.1838e-02, 6.9355e-01, 9.8749e-01, 9.9999e-01,
        1.0000e+00, 1.0000e+00, 9.9999e-01, 9.9999e-01, 1.0

In [None]:
from matplotlib import pyplot as plt

for i, ivshot in enumerate(vshot_group.vshots):
    _, prev_t = get_valid_head_tail_frame(ivshot)
    for j, jvshot in enumerate(vshot_group.vshots):
        if ivshot.cross_with(jvshot):
            continue
        curr_h, _ = get_valid_head_tail_frame(jvshot)
#         data.append([prev_t.data, curr_h.data, prev_t.flow_row, curr_h.flow_row])
        index = i * len(vshot_group.vshots) + j
        if results[index] == 1.0:
            print(ivshot, jvshot)
            plt.figure(figsize=(8,4))
            ax1 = plt.subplot(1,2,1)
            plt.imshow(prev_t.data)
            ax1 = plt.subplot(1,2,2)
            plt.imshow(curr_h.data)
            plt.show()