In [1]:
import pyrealsense2 as rs
import numpy as np
import cv2
import dlib
from PIL import Image

In [2]:
import torch
from models import ResNet50, mobilenet
from torchvision import transforms
from torch.utils.data import DataLoader
from dataset.RGBD_transforms import Resize

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# pretrained_model_path = "3dface_models/predict/logs_mobileNet_v2_predict_02-14.16-26/3dface-model.pkl"
pretrained_model_path = "3dface_models/predict/logs_mobileNet_v2_predict_02-22.16-30/3dface-model.pkl"

input_channels = 4
num_of_classes = 20

model = mobilenet(input_channels, num_of_classes, pretrained=False)
model.load_state_dict(torch.load(pretrained_model_path, map_location=device))
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()

model.eval()

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(4, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [4]:
train_transform = transforms.Compose([
                    Resize(224),
                    transforms.ToTensor(),
                ])

In [5]:
def predict(image):
    tensor_RGBD = train_transform(image)
    tensor_RGBD= tensor_RGBD.to(device)  
    predictions = model(tensor_RGBD[None, ...])
    return predictions

In [6]:
# load face detection model
detector = dlib.get_frontal_face_detector()

# Create a pipeline
pipeline = rs.pipeline()

# Create a config and configure the pipeline to stream
# different resolutions of color and depth streams
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

# Start streaming
profile = pipeline.start(config)

# Create an align object
# rs.align allows us to perform alignment of depth frames to others frames
# The "align_to" is the stream type to which we plan to align depth frames.
align_to = rs.stream.color
align = rs.align(align_to)

In [7]:
list_name = [0,1,2,'Chitsanupong',4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]

In [8]:
try:
    while True:
        # Wait for a coherent pair of frames: depth and color
        frames = pipeline.wait_for_frames()
        
        # Align the depth frame to color frame
        aligned_frames = align.process(frames)
        
        # Get aligned frames
        aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image
        color_frame = aligned_frames.get_color_frame()

        # Validate that both frames are valid
        
        if not aligned_depth_frame or not color_frame:
            continue

        # Convert images to numpy arrays
        depth_image = np.asanyarray(aligned_depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())
        # dep_img = depth_image
        # rgb_img = color_image
        
        dets = detector(color_image, 1)
        for d in dets:
            x, y, w, h = d.left()-5, d.top()-15, d.right()+5, d.bottom()+10
            xy = x, y
            wh = w, h
            
            rgb_img = color_image[y+2:h-2, x+2:w-2]
            rgb_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2RGB)
            
            dep_img = depth_image[y+2:h-2, x+2:w-2]
            dep_img = np.expand_dims(dep_img, axis=-1)
            img = np.concatenate((rgb_img, dep_img), axis=-1)
            # print(rgb_img)
            # print(dep_img)
            # print(img)
                
            outputs = predict(img)
            acc = max(outputs[0]).item()
            print(outputs)
            print(acc)
            if(acc<0.6):
                # print(outputs)
                name = "unknown"
            else:
                _, preds = torch.max(outputs, 1)
                n_name = preds.item()
                name = str(list_name[n_name])

            cv2.rectangle(color_image, xy, wh, (255,0,0), 2)
            cv2.putText(color_image, name, (x, y-5), cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 255, 255), 2)
        
        # Show images
        cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('RealSense', color_image)
        key = cv2.waitKey(10)
        
        # Press esc or 'q' to close the image window
        if key & 0xFF == ord('q') or key == 27:
            cv2.destroyAllWindows()
            break

finally:
    # Stop streaming
    pipeline.stop()

tensor([[-0.1441,  0.5667, -0.4469,  0.8187, -0.1911,  0.3876, -0.2509, -0.4423,
          0.2317, -0.4720, -0.6880,  0.4425,  0.5383,  0.1220, -0.2682, -0.2361,
          0.4239, -0.2891,  0.1184,  0.0073]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.818688154220581
tensor([[-0.1398,  0.5689, -0.4431,  0.8165, -0.1917,  0.3971, -0.2494, -0.4454,
          0.2297, -0.4651, -0.6884,  0.4504,  0.5441,  0.1159, -0.2708, -0.2407,
          0.4303, -0.2908,  0.1105,  0.0074]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8164887428283691
tensor([[-0.1598,  0.5746, -0.4733,  0.8564, -0.1891,  0.3820, -0.2532, -0.4581,
          0.2395, -0.4847, -0.7128,  0.4495,  0.5552,  0.1238, -0.2698, -0.2261,
          0.4189, -0.2902,  0.1226, -0.0086]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8563883900642395
tensor([[-0.1451,  0.5678, -0.4460,  0.8201, -0.1910,  0.3892, -0.2496, -0.4427,
          0.2307, -0.4732, -0.6888,  0.4428,  0.5390,  0.1220, -0.2690, -0.2371,
        

tensor([[-0.2293,  0.6412, -0.4975,  0.8935, -0.1997,  0.4171, -0.2599, -0.4637,
          0.2084, -0.4921, -0.7030,  0.3852,  0.6153,  0.1784, -0.2844, -0.2033,
          0.4539, -0.2969,  0.1180, -0.0175]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8934888243675232
tensor([[-0.2296,  0.6402, -0.4976,  0.8929, -0.1999,  0.4171, -0.2600, -0.4638,
          0.2084, -0.4918, -0.7030,  0.3856,  0.6155,  0.1781, -0.2844, -0.2029,
          0.4538, -0.2967,  0.1181, -0.0173]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8928905129432678
tensor([[-0.2298,  0.6409, -0.4974,  0.8938, -0.1993,  0.4167, -0.2600, -0.4637,
          0.2082, -0.4924, -0.7032,  0.3847,  0.6152,  0.1782, -0.2847, -0.2028,
          0.4538, -0.2973,  0.1179, -0.0176]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8937602043151855
tensor([[-0.2294,  0.6416, -0.4974,  0.8936, -0.1996,  0.4171, -0.2598, -0.4638,
          0.2083, -0.4923, -0.7032,  0.3850,  0.6152,  0.1784, -0.2845, -0.2031,
       

tensor([[-0.2255,  0.6428, -0.4964,  0.8910, -0.1983,  0.4218, -0.2631, -0.4660,
          0.2126, -0.4889, -0.7026,  0.3853,  0.6152,  0.1774, -0.2845, -0.2064,
          0.4543, -0.2935,  0.1152, -0.0161]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8909963965415955
tensor([[-0.2274,  0.6422, -0.4946,  0.8909, -0.1952,  0.4201, -0.2605, -0.4655,
          0.2103, -0.4899, -0.7022,  0.3817,  0.6161,  0.1765, -0.2867, -0.2056,
          0.4558, -0.2963,  0.1157, -0.0172]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8908737897872925
tensor([[-0.2260,  0.6430, -0.4956,  0.8921, -0.1981,  0.4208, -0.2633, -0.4663,
          0.2126, -0.4895, -0.7033,  0.3849,  0.6156,  0.1769, -0.2847, -0.2061,
          0.4547, -0.2948,  0.1156, -0.0163]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8921366930007935
tensor([[-0.2258,  0.6430, -0.4956,  0.8921, -0.1982,  0.4211, -0.2634, -0.4662,
          0.2124, -0.4896, -0.7031,  0.3847,  0.6156,  0.1767, -0.2846, -0.2063,
       

tensor([[-0.1475,  0.5451, -0.4413,  0.8157, -0.1991,  0.4192, -0.2669, -0.4427,
          0.2262, -0.4445, -0.6930,  0.4710,  0.5841,  0.1014, -0.2591, -0.2324,
          0.4643, -0.2865,  0.1012, -0.0039]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8157116174697876
tensor([[-0.1470,  0.5446, -0.4425,  0.8158, -0.1984,  0.4174, -0.2689, -0.4437,
          0.2274, -0.4451, -0.6929,  0.4690,  0.5814,  0.1019, -0.2573, -0.2320,
          0.4638, -0.2859,  0.1011, -0.0043]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8158321380615234
tensor([[-0.1468,  0.5447, -0.4419,  0.8150, -0.1986,  0.4176, -0.2685, -0.4434,
          0.2275, -0.4447, -0.6928,  0.4689,  0.5814,  0.1015, -0.2575, -0.2318,
          0.4636, -0.2859,  0.1014, -0.0045]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8149985074996948
tensor([[-0.1481,  0.5432, -0.4415,  0.8160, -0.1983,  0.4174, -0.2683, -0.4436,
          0.2275, -0.4458, -0.6928,  0.4683,  0.5821,  0.1012, -0.2577, -0.2316,
       

tensor([[-0.1880,  0.6313, -0.4811,  0.8650, -0.1958,  0.3661, -0.2280, -0.4589,
          0.2172, -0.5145, -0.7175,  0.3791,  0.5832,  0.1654, -0.2651, -0.2080,
          0.4239, -0.3107,  0.1082, -0.0226]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8649734854698181
tensor([[-0.1878,  0.6313, -0.4809,  0.8646, -0.1960,  0.3663, -0.2279, -0.4589,
          0.2172, -0.5143, -0.7171,  0.3791,  0.5831,  0.1654, -0.2652, -0.2081,
          0.4239, -0.3107,  0.1081, -0.0226]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8645766973495483
tensor([[-0.1882,  0.6315, -0.4811,  0.8652, -0.1960,  0.3666, -0.2277, -0.4591,
          0.2171, -0.5145, -0.7176,  0.3798,  0.5836,  0.1655, -0.2654, -0.2082,
          0.4242, -0.3109,  0.1084, -0.0227]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8652068376541138
tensor([[-0.1877,  0.6319, -0.4806,  0.8645, -0.1965,  0.3660, -0.2282, -0.4590,
          0.2178, -0.5146, -0.7176,  0.3782,  0.5823,  0.1652, -0.2649, -0.2078,
       

tensor([[-0.1399,  0.5699, -0.4431,  0.8166, -0.1917,  0.3974, -0.2482, -0.4435,
          0.2286, -0.4668, -0.6869,  0.4483,  0.5431,  0.1188, -0.2702, -0.2414,
          0.4286, -0.2885,  0.1118,  0.0073]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8165937662124634
tensor([[-0.1402,  0.5697, -0.4429,  0.8170, -0.1914,  0.3973, -0.2480, -0.4436,
          0.2283, -0.4670, -0.6869,  0.4481,  0.5433,  0.1187, -0.2705, -0.2413,
          0.4289, -0.2889,  0.1120,  0.0072]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8170425295829773
tensor([[-0.1401,  0.5696, -0.4428,  0.8167, -0.1916,  0.3972, -0.2481, -0.4435,
          0.2283, -0.4669, -0.6868,  0.4479,  0.5432,  0.1185, -0.2702, -0.2413,
          0.4288, -0.2888,  0.1119,  0.0072]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8166769742965698
tensor([[-0.1398,  0.5697, -0.4427,  0.8165, -0.1916,  0.3972, -0.2481, -0.4435,
          0.2285, -0.4668, -0.6868,  0.4481,  0.5432,  0.1186, -0.2701, -0.2415,
       

tensor([[-0.1419,  0.5677, -0.4441,  0.8183, -0.1925,  0.3954, -0.2484, -0.4452,
          0.2290, -0.4682, -0.6892,  0.4494,  0.5444,  0.1174, -0.2708, -0.2400,
          0.4298, -0.2913,  0.1111,  0.0064]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8182744979858398
tensor([[-0.1444,  0.5691, -0.4446,  0.8200, -0.1922,  0.3979, -0.2493, -0.4452,
          0.2297, -0.4704, -0.6908,  0.4499,  0.5453,  0.1190, -0.2730, -0.2398,
          0.4289, -0.2927,  0.1134,  0.0066]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8200122117996216
tensor([[-0.1506,  0.5420, -0.4390,  0.8171, -0.1974,  0.4114, -0.2648, -0.4411,
          0.2253, -0.4428, -0.6947,  0.4699,  0.5815,  0.0982, -0.2591, -0.2281,
          0.4637, -0.2904,  0.1045, -0.0060]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8171366453170776
tensor([[-0.1528,  0.5394, -0.4411,  0.8156, -0.1932,  0.4128, -0.2657, -0.4406,
          0.2259, -0.4431, -0.6954,  0.4688,  0.5817,  0.0997, -0.2610, -0.2271,
       

tensor([[-0.1890,  0.6308, -0.4810,  0.8666, -0.1973,  0.3722, -0.2295, -0.4602,
          0.2173, -0.5130, -0.7183,  0.3833,  0.5871,  0.1630, -0.2685, -0.2087,
          0.4264, -0.3116,  0.1072, -0.0227]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8666020035743713
tensor([[-0.2273,  0.6408, -0.4960,  0.8908, -0.1971,  0.4120, -0.2628, -0.4624,
          0.2088, -0.4895, -0.7013,  0.3787,  0.6105,  0.1756, -0.2835, -0.2024,
          0.4504, -0.2954,  0.1161, -0.0193]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8908319473266602
tensor([[-0.1878,  0.6309, -0.4801,  0.8652, -0.1973,  0.3720, -0.2288, -0.4599,
          0.2172, -0.5121, -0.7173,  0.3831,  0.5875,  0.1625, -0.2682, -0.2092,
          0.4267, -0.3108,  0.1078, -0.0227]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8651866912841797
tensor([[-0.2273,  0.6399, -0.4960,  0.8895, -0.1969,  0.4132, -0.2623, -0.4624,
          0.2080, -0.4886, -0.7009,  0.3796,  0.6113,  0.1751, -0.2835, -0.2027,
       

tensor([[-0.1881,  0.6257, -0.4815,  0.8599, -0.1949,  0.3727, -0.2316, -0.4579,
          0.2183, -0.5073, -0.7164,  0.3851,  0.5900,  0.1588, -0.2688, -0.2072,
          0.4264, -0.3052,  0.1089, -0.0216]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8599334359169006
tensor([[-0.1516,  0.5415, -0.4387,  0.8181, -0.1972,  0.4095, -0.2656, -0.4416,
          0.2256, -0.4466, -0.6952,  0.4652,  0.5804,  0.0983, -0.2588, -0.2271,
          0.4631, -0.2924,  0.1056, -0.0067]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8180801868438721
tensor([[-0.1546,  0.5401, -0.4396,  0.8216, -0.1967,  0.4077, -0.2657, -0.4415,
          0.2249, -0.4503, -0.6959,  0.4632,  0.5789,  0.0996, -0.2595, -0.2267,
          0.4630, -0.2945,  0.1077, -0.0067]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8216026425361633
tensor([[-0.1528,  0.5403, -0.4408,  0.8184, -0.1963,  0.4093, -0.2660, -0.4408,
          0.2253, -0.4484, -0.6944,  0.4648,  0.5784,  0.1008, -0.2586, -0.2277,
       

tensor([[-0.1505,  0.5402, -0.4382,  0.8163, -0.1968,  0.4111, -0.2671, -0.4413,
          0.2251, -0.4442, -0.6924,  0.4678,  0.5813,  0.0999, -0.2581, -0.2295,
          0.4643, -0.2893,  0.1023, -0.0045]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8163226246833801
tensor([[-0.1511,  0.5418, -0.4417,  0.8166, -0.1964,  0.4094, -0.2678, -0.4405,
          0.2261, -0.4466, -0.6945,  0.4647,  0.5792,  0.1011, -0.2579, -0.2291,
          0.4615, -0.2896,  0.1040, -0.0046]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.8165998458862305
tensor([[-0.1501,  0.5404, -0.4384,  0.8162, -0.1971,  0.4111, -0.2667, -0.4413,
          0.2247, -0.4438, -0.6922,  0.4680,  0.5813,  0.0998, -0.2581, -0.2295,
          0.4644, -0.2891,  0.1023, -0.0048]], device='cuda:0',
       grad_fn=<AddmmBackward>)
0.816173255443573
tensor([[-0.1510,  0.5417, -0.4417,  0.8163, -0.1963,  0.4093, -0.2676, -0.4403,
          0.2260, -0.4463, -0.6944,  0.4647,  0.5791,  0.1011, -0.2579, -0.2290,
        