In [1]:
import numpy as np
import imageio
from PIL import Image
import os

输入的数据：
- 视频(640x480)若干个
- 每一帧作为一个样本
- 同一个视频中的所有帧都是同一个标签

In [2]:
# 需要处理的视频及其对应标签
dir = 'video'
files = ['like_lh.mp4', 'like_rh.mp4', 'dislike_lh.mp4', 'dislike_rh.mp4']
file_labels = [1, 1, 0, 0]
nfile = len(files)
print('总共有{}个视频文件'.format(nfile))

总共有4个视频文件


最终需要得到的数据结果：
- images: (None, 320, 240, 3)
- labels: (None, )

需要**打乱顺序**且保持image与label对应，然后存入到一个.npz文件中

In [3]:
def cvtVid2arr(filepath, targetsize=(320,240)):
    '''
    load a video into numpy.ndarray
    
    Arguments
    filepath: string
    size: tuple (int, int)
    
    Returns
    a np.ndarray of (None, height, width, 3)    
    '''
    HEIGHT, WIDTH = targetsize
    images = []
    with imageio.get_reader(filepath) as reader:
        for _, im in enumerate(reader):
            #把imageio的图片对象转成PIL.Image的对象
            im = Image.fromarray(im)
            #缩小尺寸
            im = im.resize((WIDTH, HEIGHT))
            images.append(np.array(im))
    return np.array(images)

In [4]:
def prepVid(filepath, label, targetsize=(320,240)):
    '''
    load a video and its label into np.ndarray
    
    Arguments
    label: int
    
    Returns
    images: np.ndarray (nfrarme, HEIGHT, WIDTH, 3)
    labels: np.ndarray (nframe,)
    '''
    images = cvtVid2arr(filepath, targetsize)
    nframe,_,_,_ = images.shape
    labels = np.ones(nframe, dtype='int8') * label
    return images, labels

In [5]:
def prepAll(dir, files, file_labels, targetsize):
    assert len(files) == len(file_labels)
    images = []
    labels = []
    for i in range(len(files)):
        path = os.path.join(dir,files[i])
        imgs, lbs = prepVid(path, file_labels[i])
        images.append(imgs)
        labels.append(lbs)
    images = np.concatenate(images)
    labels = np.concatenate(labels)
    return images, labels

In [6]:
images, labels = prepAll(dir, files, file_labels, (320,240))

In [7]:
images.shape

(1458, 320, 240, 3)

In [8]:
labels.shape

(1458,)

In [9]:
np.random.seed(42)
np.random.shuffle(images)
np.random.seed(42)
np.random.shuffle(labels)

In [10]:
np.savez('data.npz', x=images, y=labels)