In [1]:
import pyrealsense2 as rs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import cv2

# Configure depth and color streams
pipeline = rs.pipeline()
config = rs.config()

# Get device product line for setting a supporting resolution
pipeline_wrapper = rs.pipeline_wrapper(pipeline)
pipeline_profile = config.resolve(pipeline_wrapper)
device = pipeline_profile.get_device()
device_product_line = str(device.get_info(rs.camera_info.product_line))

found_rgb = False
for s in device.sensors:
    if s.get_info(rs.camera_info.name) == 'RGB Camera':
        found_rgb = True
        break
if not found_rgb:
    print("The demo requires Depth camera with Color sensor")
    exit(0)

config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)

if device_product_line == 'L500':
    config.enable_stream(rs.stream.color, 960, 540, rs.format.bgr8, 30)
else:
    config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

model_rgb = keras.Sequential(
    [
        layers.Conv2D(128, kernel_size=(3, 4), input_shape=(120, 160, 3), strides=(1, 1), padding='valid',
                      activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, padding="same", activation="relu"),
        layers.Conv2D(32, 3, padding="same", activation="relu"),
        layers.MaxPooling2D(),
        layers.BatchNormalization(),
        layers.Conv2D(32, 3, padding="same", activation="relu"),
        layers.MaxPooling2D(),
        layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(200, activation='relu'),
        layers.Dense(100, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(10, activation='softmax'),
    ]
)

model_rgb.summary()
model_rgb.load_weights('rgb_only_new_weights.h5')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 118, 157, 128)     4736      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 59, 78, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 59, 78, 64)        73792     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 59, 78, 32)        18464     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 29, 39, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 29, 39, 32)        128       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 29, 39, 32)        9

In [13]:
try:
    results = []
    pipeline.start(config)
    while True:
        for i in range(30):
            pipeline.wait_for_frames()
        frames = pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()
        color_frame = frames.get_color_frame()
        # Convert images to numpy arrays
        depth_image = np.asanyarray(depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())

        tmp = cv2.resize(color_image, (160, 120), interpolation=cv2.INTER_AREA)
        res = model_rgb.predict(tmp.reshape((1, tmp.shape[0], tmp.shape[1], tmp.shape[2])))

        results.append(np.argmax(res[0]))

        if len(results) == 10:
            print(results)
            print(np.unique(results[0]))
            results = []

        # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
        depth_colormap_dim = depth_colormap.shape
        color_colormap_dim = color_image.shape

        # If depth and color resolutions are different, resize color image to match depth image for display
        if depth_colormap_dim != color_colormap_dim:
            resized_color_image = cv2.resize(color_image, dsize=(depth_colormap_dim[1], depth_colormap_dim[0]),
                                             interpolation=cv2.INTER_AREA)
            images = np.hstack((resized_color_image, depth_colormap))
        else:
            images = np.hstack((color_image, depth_colormap))

        # Show images
        cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('RealSense', images)
        cv2.waitKey(1)
finally:
    cv2.destroyAllWindows()
    pipeline.stop()

[4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[4]
[4, 4, 4, 5, 5, 5, 0, 6, 5, 5]
[4]
[5, 5, 5, 5, 0, 5, 6, 6, 6, 5]
[5]
[5, 4, 4, 4, 0, 6, 8, 5, 0, 5]
[5]
[5, 5, 5, 1, 0, 0, 5, 0, 0, 0]
[5]
[0, 5, 1, 4, 5, 4, 2, 0, 5, 0]
[0]
[0, 5, 0, 0, 0, 7, 0, 0, 0, 5]
[0]
[2, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[2]
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[4]
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[4]


KeyboardInterrupt: 

In [12]:
pipeline.stop()

RuntimeError: stop() cannot be called before start()

In [2]:
model_depth = keras.models.load_model('video_depth_weights.h5')

In [4]:
model_depth.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d (Conv3D)              (None, 38, 118, 157, 8)   872       
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 19, 59, 78, 8)     0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 19, 59, 78, 16)    3472      
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 9, 29, 39, 16)     0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 9, 29, 39, 16)     64        
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 9, 29, 39, 8)      3464      
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 4, 14, 19, 8)     

In [22]:
pipeline = rs.pipeline()
config = rs.config()

# Get device product line for setting a supporting resolution
pipeline_wrapper = rs.pipeline_wrapper(pipeline)
pipeline_profile = config.resolve(pipeline_wrapper)
device = pipeline_profile.get_device()
device_product_line = str(device.get_info(rs.camera_info.product_line))

found_rgb = False
for s in device.sensors:
    if s.get_info(rs.camera_info.name) == 'RGB Camera':
        found_rgb = True
        break
if not found_rgb:
    print("The demo requires Depth camera with Color sensor")
    exit(0)

config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)

if device_product_line == 'L500':
    config.enable_stream(rs.stream.color, 960, 540, rs.format.bgr8, 30)
else:
    config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

PERCENT = 25
sequence_length = 40

colorizer = rs.colorizer()
colorizer.set_option(rs.option.color_scheme, 0)


def resize_image(img):
    width = int(img.shape[1] * PERCENT / 100)
    height = int(img.shape[0] * PERCENT / 100)
    dim = (width, height)
    resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
    return resized

In [49]:
# Start streaming
pipeline.start(config)

sequence_rgb = []
predictions_rgb = []
sequence_depth = []
predictions_depth = []
threshold = 0.7

try:
    while True:

        # Wait for a coherent pair of frames: depth and color
        frames = pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()
        color_frame = frames.get_color_frame()
        if not depth_frame or not color_frame:
            continue

        # Convert images to numpy arrays
        color_image = np.asanyarray(color_frame.get_data())

        color_image = resize_image(color_image)
        depth_image = resize_image(cv2.resize(np.asanyarray(colorizer.colorize(depth_frame).get_data()), (640, 480),
                                              interpolation=cv2.INTER_AREA))

        sequence_rgb.append(color_image)
        sequence_rgb = sequence_rgb[-sequence_length:]

        sequence_depth.append(depth_image)
        sequence_depth = sequence_depth[-sequence_length:]
        if len(sequence_depth) == 40:
            break
finally:
    # Stop streaming
    pipeline.stop()

RuntimeError: MFCreateDeviceSource(_device_attrs, &_source) returned: HResult 0x80070003: "Le chemin d’accès spécifié est introuvable."

In [28]:
tmp = np.array(sequence_depth)

In [29]:
tmp.shape

(40, 120, 160, 3)

In [50]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, classification_report

movements = np.array(['scroll_right', 'scroll_left', 'scroll_up', 'scroll_down', 'zoom_in', 'zoom_out'])
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical


In [35]:
root = 'video_dataset/depth'
label_map = {label: num for num, label in enumerate(movements)}
sequences, labels = [], []
for movement in movements:
    for dirpath, dirnames, files in os.walk(os.path.join(root, movement)):
        sequence = []
        for file_name in files:
            img = cv2.imread(os.path.join(dirpath, file_name))
            sequence.append(img)
        if len(sequence) > 0:
            sequences.append(sequence)
            labels.append(label_map[movement])
print('Image loading done! Starting train set creation...')
X = np.array(sequences)
y = to_categorical(labels).astype(int)
X_train_depth, X_val_depth, y_train_depth, y_val_depth = train_test_split(X, y, test_size=0.3)

Image loading done! Starting train set creation...


In [36]:
root = 'video_dataset/rgb'
label_map = {label: num for num, label in enumerate(movements)}
sequences, labels = [], []
for movement in movements:
    for dirpath, dirnames, files in os.walk(os.path.join(root, movement)):
        sequence = []
        for file_name in files:
            img = cv2.imread(os.path.join(dirpath, file_name))
            sequence.append(img)
        if len(sequence) > 0:
            sequences.append(sequence)
            labels.append(label_map[movement])
print('Image loading done! Starting train set creation...')
X = np.array(sequences)
y = to_categorical(labels).astype(int)
X_train_rgb, X_val_rgb, y_train_rgb, y_val_rgb = train_test_split(X, y, test_size=0.3)

Image loading done! Starting train set creation...


In [51]:
model_rgb = keras.models.load_model('video_rgb_weights.h5')
model_depth = keras.models.load_model('video_depth_weights.h5')

yhat_rgb = model_rgb.predict(X_val_rgb)
ytrue_rgb = np.argmax(y_val_rgb, axis=1).tolist()
yhat_rgb = np.argmax(yhat_rgb, axis=1).tolist()
multilabel_confusion_matrix(ytrue_rgb, yhat_rgb, labels=movement)

array([[[386,   0],
        [  0,  83]],

       [[377,   1],
        [  0,  91]],

       [[404,   0],
        [  0,  65]],

       [[389,   1],
        [  0,  79]],

       [[390,   0],
        [  1,  78]],

       [[397,   0],
        [  1,  71]]], dtype=int64)

In [53]:
print(classification_report(ytrue_rgb, yhat_rgb))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        83
           1       0.99      1.00      0.99        91
           2       1.00      1.00      1.00        65
           3       0.99      1.00      0.99        79
           4       1.00      0.99      0.99        79
           5       1.00      0.99      0.99        72

    accuracy                           1.00       469
   macro avg       1.00      1.00      1.00       469
weighted avg       1.00      1.00      1.00       469



In [61]:
yhat_depth = model_depth.predict(X_train_depth)
ytrue_depth = np.argmax(y_train_depth, axis=1).tolist()
yhat_depth = np.argmax(yhat_depth, axis=1).tolist()
multilabel_confusion_matrix(ytrue_depth, yhat_depth)

  mask &= (ar1 != a)


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [63]:
print(classification_report(ytrue_depth, yhat_depth))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       176
           1       1.00      1.00      1.00       186
           2       0.99      1.00      1.00       177
           3       1.00      0.98      0.99       188
           4       1.00      1.00      1.00       177
           5       0.99      1.00      0.99       188

    accuracy                           1.00      1092
   macro avg       1.00      1.00      1.00      1092
weighted avg       1.00      1.00      1.00      1092

