In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import torch
import os
import subprocess
import aedat
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.animation as animation
from tqdm.notebook import tqdm
from IPython.display import Video

<IPython.core.display.Javascript object>

## Load mapping + single recording using AEDAT Library
Recording is loaded into an array of datapoints in which each entry is a gesture consisting of a label and its corresponding events. [Dataset is taken from IBM](https://www.research.ibm.com/interactive/dvsgesture/)

In [3]:
mapping_df = pd.read_csv("../data/gesture_mapping.csv")
mapping_df

Unnamed: 0,action,label
0,hand_clapping,1
1,right_hand_wave,2
2,left_hand_wave,3
3,right_hand_clockwise,4
4,right_hand_counter_clockwise,5
5,left_hand_clockwise,6
6,left_hand_counter_clockwise,7
7,forearm_roll_forward,8
8,forearm_roll_backward,8
9,drums,9


<IPython.core.display.Javascript object>

In [4]:
dvs = aedat.DVSGestureData(
    "../data/DvsGesture/user01_fluorescent.aedat",
    "../data/DvsGesture/user01_fluorescent_labels.csv",
)

label = mapping_df[mapping_df.label == dvs.datapoints[0].label].action[0]

print(
    f"First datapoint shows a {label} containing {len(dvs.datapoints[0].events)} events."
)

First datapoint shows a hand_clapping containing 213025 events.


<IPython.core.display.Javascript object>

## Load single gesture event data into dataframe
An event is defined as the change in polarity of a single pixel. Meaning that a pixel can either be turned of on. The resolution of the camera is 128x128. The events are recorded asynchronously and therefore contain a timestamp that monotocally increases within the event array

Each event contains information about:
- valid: validity of event
- x: position of pixels x coordinate
- y: position of pixels y coordinate
- polarity: polarity of event on/off -> 0/1
- timestamp: timestamp in microseconds

In [5]:
data = []
for event in dvs.datapoints[0].events:
    data.append(
        [
            event.get_valid(),
            event.get_x(),
            event.get_y(),
            event.get_polarity(),
            event.get_timestamp(),
        ]
    )
df = pd.DataFrame(data=data, columns=["valid", "x", "y", "polarity", "timestamp"])
df.head(10)

Unnamed: 0,valid,x,y,polarity,timestamp
0,1,82,49,1,28
1,1,92,55,0,38
2,1,92,55,0,39
3,1,92,55,0,39
4,1,99,90,1,65
5,1,92,77,0,218
6,1,96,56,1,228
7,1,92,56,1,234
8,1,92,55,1,278
9,1,93,78,0,286


<IPython.core.display.Javascript object>

In [6]:
len(df)

213025

<IPython.core.display.Javascript object>

## Aggregate Pixels to generate frames

Since I am not sure how to generate frames out of the event data I tried to approaches for now:
- aggregate pixels into equally sized intervals and for each interval create a (128x128) tensor that contains pixel values from aggregated pixels that are set to 1
- aggregate pixels into quantiles using pandas qcut function on the timestamp and generate tensors like above

In [None]:
def generate_video(images, path):
    plt.ioff()
    for idx, img in enumerate(tqdm(images)):
        plt.imshow(img, cmap=cm.Greys_r)
        plt.savefig(f"{path}/img_{idx}.png")
        plt.close()

    subprocess.call(
        [
            "ffmpeg",
            "-framerate",
            "8",
            "-i",
            "img_%d.png",
            "-r",
            "30",
            "-pix_fmt",
            "yuv420p",
            "video.mp4",
        ],
        cwd=path,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.STDOUT,
    )

### Equally sized windows

In [None]:
def aggr_pixels_to_tensors(df, window_size=1000):
    images = []

    data_length = len(df)
    no_frames = int(data_length / window_size)

    for window in tqdm(range(no_frames)):
        tensor = torch.zeros(128, 128)

        start = window * 1000
        end = window + (window * 1000)

        for idx, row in df[start:end].iterrows():
            x = row["x"]
            y = row["y"]

            tensor[x][y] = 1

        images.append(tensor)

    return images

In [None]:
images = aggr_pixels_to_tensors(df)
len(images)

In [None]:
video_path = "../data/images_qcut/video.mp4"
if not os.path.isfile(video_path):
    generate_video(images, "../data/images_qcut")

Video(video_path, html_attributes="muted autoplay loop")

### Quantiles by timestamp

In [None]:
groups = df.groupby(pd.qcut(df.timestamp, 100))

In [None]:
groups.size().describe()

In [None]:
qcut_images = []
for name, group in tqdm(groups):
    tensor = torch.zeros(128, 128)

    for _, row in group.iterrows():
        x = row["x"]
        y = row["y"]

        tensor[x][y] = 1
    qcut_images.append(tensor)

In [None]:
qcut_video_path = "../data/images_qcut/video.mp4"
if not os.path.isfile(qcut_video_path):
    generate_video(qcut_images, "../data/images_qcut")

Video(qcut_video_path, html_attributes="muted autoplay loop")