In [1]:
import pyrealsense2 as rs
import numpy as np
import cv2
import dlib
from PIL import Image

In [2]:
import torch
from models import ResNet50, mobilenet
from torchvision import transforms
from torch.utils.data import DataLoader
from dataset.RGBD_transforms import Resize

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

pretrained_model_path = "3dface_models/logs_mobileNet_v2_with_th_12-18.14-41/3dface-model.pkl"

input_channels = 4
num_of_classes = 83

model = mobilenet(input_channels, num_of_classes, pretrained=False)
model.load_state_dict(torch.load(pretrained_model_path, map_location=device))
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()

model.eval()

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(4, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [4]:
train_transform = transforms.Compose([
                    Resize(224),
                    transforms.ToTensor(),
                ])

In [5]:
def predict(image):
    tensor_RGBD = train_transform(image)
    tensor_RGBD= tensor_RGBD.to(device)  
    predictions = model(tensor_RGBD[None, ...])
    return predictions

In [6]:
# load face detection model
detector = dlib.get_frontal_face_detector()

# Create a pipeline
pipeline = rs.pipeline()

# Create a config and configure the pipeline to stream
# different resolutions of color and depth streams
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

# Start streaming
profile = pipeline.start(config)

# Create an align object
# rs.align allows us to perform alignment of depth frames to others frames
# The "align_to" is the stream type to which we plan to align depth frames.
align_to = rs.stream.color
align = rs.align(align_to)

In [7]:
try:
    while True:
        # Wait for a coherent pair of frames: depth and color
        frames = pipeline.wait_for_frames()
        
        # Align the depth frame to color frame
        aligned_frames = align.process(frames)
        
        # Get aligned frames
        aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image
        color_frame = aligned_frames.get_color_frame()

        # Validate that both frames are valid
        
        if not aligned_depth_frame or not color_frame:
            continue

        # Convert images to numpy arrays
        depth_image = np.asanyarray(aligned_depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())
        # dep_img = depth_image
        # rgb_img = color_image
        
        dets = detector(color_image, 1)
        for d in dets:
            x, y, w, h = d.left()-5, d.top()-15, d.right()+5, d.bottom()+10
            xy = x, y
            wh = w, h
            
            rgb_img = color_image[y+2:h-2, x+2:w-2]
            rgb_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2RGB)
            
            dep_img = depth_image[y+2:h-2, x+2:w-2]
            dep_img = np.expand_dims(dep_img, axis=-1)
            img = np.concatenate((rgb_img, dep_img), axis=-1)
            # print(rgb_img)
            # print(dep_img)
            # print(img)
                
            outputs = predict(img)
            acc = max(outputs[0]).item()
            if(acc<6):
                print(outputs)
                name = "unknown"
            else:
                _, preds = torch.max(outputs, 1)
                name = preds.item()
                
            cv2.rectangle(color_image, xy, wh, (255,0,0), 2)
            cv2.putText(color_image, name, (x, y-5), cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 255, 255), 2)
        
        # Show images
        cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('RealSense', color_image)
        key = cv2.waitKey(10)
        
        # Press esc or 'q' to close the image window
        if key & 0xFF == ord('q') or key == 27:
            cv2.destroyAllWindows()
            break

finally:
    # Stop streaming
    pipeline.stop()

tensor([[ 0.6106,  0.2597,  0.6266,  0.3193, -0.0812, -0.3014,  0.3155,  0.4120,
         -0.9094,  0.0177, -0.3698, -0.5289,  0.3527,  0.0596,  0.0972,  0.2254,
          0.1201,  1.1184, -0.3290, -0.4683, -0.3068,  0.4588,  0.4580,  0.0061,
         -0.0091, -0.7880,  1.2145,  1.1341, -0.0350, -0.2591,  0.1212,  0.3745,
         -0.9482,  0.1920, -0.9734, -0.7316, -0.7305, -0.3589,  0.1571,  0.2888,
         -0.3024, -0.1259,  0.8028,  0.6865,  0.5375, -0.6655,  0.2710,  0.4437,
          0.3632, -0.5059,  0.1340, -0.6138, -0.2562, -0.6788,  0.6435,  0.7338,
          0.2046, -0.2975,  0.4982, -0.0299,  0.3762,  0.3762,  0.2715,  0.9393,
         -0.4356, -0.1124,  0.3251, -0.5159, -0.2599,  0.0431,  0.1212, -0.2077,
          0.2677, -0.7414, -0.6905,  0.6630, -0.0563,  0.6082,  0.2832, -0.5069,
          0.0398, -0.5376, -0.8704]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([[ 0.6480,  0.1927,  0.6514,  0.2132, -0.1906, -0.2695,  0.2055,  0.5127,
         -0.9062,  0.0461, -0

tensor([[ 0.6098,  0.2605,  0.6273,  0.3188, -0.0819, -0.3022,  0.3158,  0.4113,
         -0.9100,  0.0183, -0.3684, -0.5291,  0.3530,  0.0599,  0.0977,  0.2245,
          0.1215,  1.1188, -0.3283, -0.4683, -0.3058,  0.4571,  0.4576,  0.0067,
         -0.0073, -0.7887,  1.2126,  1.1321, -0.0332, -0.2601,  0.1190,  0.3746,
         -0.9486,  0.1920, -0.9732, -0.7321, -0.7300, -0.3583,  0.1569,  0.2910,
         -0.3018, -0.1268,  0.8030,  0.6883,  0.5392, -0.6635,  0.2725,  0.4430,
          0.3629, -0.5052,  0.1332, -0.6137, -0.2586, -0.6811,  0.6427,  0.7326,
          0.2046, -0.2987,  0.4982, -0.0321,  0.3780,  0.3752,  0.2715,  0.9393,
         -0.4369, -0.1100,  0.3248, -0.5167, -0.2596,  0.0425,  0.1208, -0.2081,
          0.2652, -0.7419, -0.6871,  0.6644, -0.0577,  0.6094,  0.2845, -0.5077,
          0.0409, -0.5374, -0.8689]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([[ 0.6092,  0.2605,  0.6271,  0.3190, -0.0818, -0.3021,  0.3161,  0.4113,
         -0.9097,  0.0182, -0