In [10]:
import matplotlib.pyplot as plt
from IPython.display import Image
import numpy as np
import pandas as pd
from cv2 import *
from PIL import Image
import time
import os
import pandas as pd

In [11]:
def load_video_data(movie_path, frame_indices, headless=False, background_subtract=False, randomize=False):
    data = []
    start_frame, end_frame = frame_indices
    cap = cv2.VideoCapture(movie_path)
    i = 1
    while(cap.isOpened()):
        ret, frame = cap.read()
        i += 1
        if start_frame < i < end_frame:
            if randomize:
                pass
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if background_subtract:
                frame = cv2.subtract(frame, background)
            
            if not headless:
                cv2.imshow('Frame', frame)
                time.sleep(0.05)
            
            data.append(frame)

        if i > end_frame or cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    return data, i

In [12]:
def show_data(data, verbose=False):    
    fig, axs = plt.subplots(2, 5, figsize=(15, 15))
    indices = iter(np.linspace(0, len(data)-1, 10))

    for i in range(2):
        for j in range(5):
            index = int(next(indices))
            frame = data[index]
            axs[i][j].xaxis.set_visible(False)
            axs[i][j].yaxis.set_visible(False)
            axs[i][j].imshow(frame)

In [13]:
def load_sample(subject, scene, gesture, headless=False):    
    # Get labels' path    
    subject_path_for_labels = 'subject0{}'.format(subject) if (subject < 10) else 'subject{}'.format(subject)
    scene_path_for_labels = 'Scene{}'.format(scene)
    labels_path = 'labels/{}/{}'.format(subject_path_for_labels, scene_path_for_labels)
    
    # Change directory to current directory.
    cwd = os.getcwd()
    os.chdir(labels_path)
    groups = os.listdir(os.getcwd())
    
    # Iterate through all gesture-frame pairs
    for group_csv, group_num in zip(groups, range(1, len(groups) + 1)):
        frames = pd.read_csv(group_csv, header=None)
        if gesture in frames[0].values:
            os.chdir(cwd)
            frame_indices = frames[frames[0] == gesture].values[0][1:]
            
            # Get video path
            subject_path_for_videos = 'Subject0{}'.format(subject) if subject < 10 else 'Subject{}'.format(subject)
            scene_path_for_videos = 'Scene{}'.format(scene)
            video_path = 'Videos/{}/{}/Color/rgb{}.avi'.format(subject_path_for_videos, scene_path_for_videos, group_num)
        
            # Load gesture data
            data = load_video_data(video_path, frame_indices, headless)
            return data
        
    os.chdir(cwd)    
    return None

In [14]:
data_43 = []
data_14 = []

for subject in range(1, 31):
    data_43.append(load_sample(subject, 1, 43, headless=True))
    data_14.append(load_sample(subject, 1, 14, headless=True))

data_43 = [(x[0], [1, 0]) for x in data_43]
data_14 = [(x[0], [0, 1]) for x in data_14]

In [17]:
def star_clip(clip, euclidean=False):
    
    N = len(clip)
    M = np.zeros(clip[0].shape)
    for k in range(2, N):
        w_s = k/N
        I_k_1 = clip[k-1]
        I_k = clip[k]
        if euclidean:
            frame_diff = (I_k_1 - I_k) 
        frame_diff = I_k_1 - I_k
        delta = np.abs(frame_diff) * w_s
        M += delta
    
    M_x = cv2.Sobel(M, cv2.CV_64F, 1, 0, ksize=5)
    M_y = cv2.Sobel(M, cv2.CV_64F, 0, 1, ksize=5)
    star = np.array([M, M_x, M_y])
    
    return star

In [18]:
star_43 = [(star_clip(sample[0]), sample[1]) for sample in data_43]
star_14 = [(star_clip(sample[0]), sample[1]) for sample in data_14]

star_samples = star_43 + star_14
np.random.shuffle(star_samples)

In [19]:
def prepare_batches(data, batch_size, shuffle=False):    
    batches, labels = [], []
    i = 0
    while i < len(star_samples):
        batches.append(torch.tensor([sample[0] for sample in star_samples[i:i+4]]).float())
        labels.append(torch.tensor([sample[1] for sample in star_samples[i:i+4]]).long())
        i += batch_size
    return batches, labels

In [20]:
data, labels = prepare_batches(star_samples, 4)

NameError: name 'torch' is not defined

In [288]:
# import torch
# import torchvision
# import torchvision.transforms as transforms
# import torch.nn as nn
# import torch.nn.functional as F

# class Encoder(nn.Module):
    
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 6, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
#         self.fc1 = nn.Linear(16 * 117 * 157, 500)
        
#     def encode(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(-1, 16 * 117 * 157)
#         x = self.fc1(x)
#         return x

In [21]:
from keras.layers import Dense, Flatten, Reshape, Input, InputLayer
from keras.models import Sequential, Model

def build_autoencoder(img_shape, code_size):
    # The encoder
    encoder = Sequential()
    encoder.add(InputLayer(img_shape))
    encoder.add(Flatten())
    encoder.add(Dense(code_size))

    # The decoder
    decoder = Sequential()
    decoder.add(InputLayer((code_size,)))
    decoder.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
    decoder.add(Reshape(img_shape))

    return encoder, decoder

In [40]:
encoder, decoder = build_autoencoder((3, 480, 640), 1000)

In [45]:
star_samples[0][0].shape

(3, 480, 640)