Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sample: Face Detection #3

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 45 additions & 2 deletions base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,13 @@ SET(UT_FILES
SET(SAMPLE_UT_FILES
test/utmain.cpp
test/test_utils.cpp
test/test_utils.h
../samples/face_detection_cpu/test_face_detection_cpu.cpp
)

SET(SAMPLE_CORE_FILES
../samples/face_detection_cpu/face_detection_cpu.h
../samples/face_detection_cpu/face_detection_cpu.cpp
test/test_utils.h
../samples/create_thumbnail_from_mp4_video/test_generate_thumbnail_from_mp4_video.cpp
../samples/play_mp4_from_beginning/test_play_mp4_video_from_beginning.cpp
Expand All @@ -645,6 +652,7 @@ SET(SAMPLE_SOURCE
${SAMPLE_CORE_FILES}
${SAMPLE_UT_FILES}
)

IF(ENABLE_LINUX)
list(APPEND UT_FILES
test/gtkglrenderer_tests.cpp
Expand All @@ -660,6 +668,17 @@ ENDIF(ENABLE_LINUX)
add_executable(aprapipesut ${UT_FILES})
add_executable(aprapipessampleut ${SAMPLE_SOURCE})

target_include_directories ( aprapipessampleut PRIVATE
${JETSON_MULTIMEDIA_LIB_INCLUDE}
${FFMPEG_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS}
${Boost_INCLUDE_DIRS}
${LIBMP4_INC_DIR}
${BARESIP_INC_DIR}
${LIBRE_INC_DIR}
${NVCODEC_INCLUDE_DIR}
test

IF(ENABLE_ARM64)
target_include_directories ( aprapipesut PRIVATE ${JETSON_MULTIMEDIA_LIB_INCLUDE} ${FFMPEG_ROOT} ${JPEG_INCLUDE_DIR})
ENDIF(ENABLE_ARM64)
Expand Down Expand Up @@ -695,13 +714,36 @@ ENDIF (ENABLE_CUDA)
find_library(OPENH264_LIB NAMES openh264.lib libopenh264.a REQUIRED)
find_library(LIBMP4_LIB NAMES mp4lib.lib libmp4lib.a REQUIRED)


add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../samples ${CMAKE_CURRENT_BINARY_DIR}/samples)

target_link_libraries(aprapipessampleut
aprapipes
${JPEG_LIBRARIES}

target_link_libraries(aprapipessampleut
aprapipes
${LIBMP4_LIB}
${OPENH264_LIB}
${Boost_LIBRARIES}
${FFMPEG_LIBRARIES}
${OpenCV_LIBRARIES}
${JETSON_LIBS}
${NVCUDAToolkit_LIBS}
${NVCODEC_LIB}
${NVJPEGLIB_L4T}
${CURSES_LIBRARIES}
ZXing::Core
ZXing::ZXing
BZip2::BZip2
ZLIB::ZLIB
liblzma::liblzma
bigint::bigint
sfml-audio
whisper::whisper
)

target_link_libraries(aprapipesut
${NVCUDAToolkit_LIBS}
${NVCODEC_LIB}
)
Expand All @@ -718,7 +760,7 @@ IF(ENABLE_LINUX)
)
ENDIF(ENABLE_LINUX)

target_link_libraries(aprapipesut
target_link_libraries(aprapipesut
aprapipes
${GLEW_LIBRARIES}
${JPEG_LIBRARIES}
Expand Down Expand Up @@ -748,6 +790,7 @@ IF(ENABLE_WINDOWS)
IF(GHA)
file(COPY ${RUNTIME_DLLS} DESTINATION RelWithDebInfo/)
ENDIF(GHA)

ENDIF(ENABLE_WINDOWS)

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../samples ${CMAKE_CURRENT_BINARY_DIR}/samples)
Expand Down Expand Up @@ -785,4 +828,4 @@ install(
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/aprapipes)

install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/include
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/aprapipes)
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/aprapipes)
5 changes: 4 additions & 1 deletion base/include/FaceDetectorXform.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
class FaceDetectorXformProps : public ModuleProps
{
public:
FaceDetectorXformProps(double _scaleFactor = 1.0, float _confidenceThreshold = 0.5) : scaleFactor(_scaleFactor), confidenceThreshold(_confidenceThreshold)
FaceDetectorXformProps(double _scaleFactor = 1.0, float _confidenceThreshold = 0.5, std::string _Face_Detection_Configuration= "./data/assets/deploy.prototxt", std::string _Face_Detection_Weights= "./data/assets/res10_300x300_ssd_iter_140000_fp16.caffemodel")
: scaleFactor(_scaleFactor), confidenceThreshold(_confidenceThreshold), FACE_DETECTION_CONFIGURATION(_Face_Detection_Configuration), FACE_DETECTION_WEIGHTS(_Face_Detection_Weights)
{
}
double scaleFactor;
float confidenceThreshold;
std::string FACE_DETECTION_CONFIGURATION;
std::string FACE_DETECTION_WEIGHTS;

size_t getSerializeSize()
{
Expand Down
1 change: 1 addition & 0 deletions base/include/FacialLandmarksCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,5 @@ class FacialLandmarkCV : public Module
FacialLandmarkCVProps mProp;
bool handlePropsChange(frame_sp& frame);
std::string mOutputPinId1;
std::string rawFramePinId;
};
46 changes: 36 additions & 10 deletions base/src/FaceDetectorXform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "AIPExceptions.h"
#include "ApraFaceInfo.h"
#include "FaceDetectsInfo.h"
#include "Overlay.h"

class FaceDetectorXform::Detail
{
Expand All @@ -34,13 +35,12 @@ class FaceDetectorXform::Detail
framemetadata_sp mOutputMetadata;
FaceDetectorXformProps mProps;
std::string mOutputPinId;
std::string rawFramePinId;
cv::Mat mInputImg;
int mFrameType;
cv::dnn::Net network;
cv::Mat inputBlob;
cv::Mat detection;
const std::string FACE_DETECTION_CONFIGURATION = "./data/assets/deploy.prototxt";
const std::string FACE_DETECTION_WEIGHTS = "./data/assets/res10_300x300_ssd_iter_140000_fp16.caffemodel";
// scalar with mean values which are subtracted from channels.
// Values are intended to be in (mean-R, mean-G, mean-B) order if image has BGR ordering and swapRB is true.
const cv::Scalar meanValuesRGB = cv::Scalar({104., 177.0, 123.0});
Expand Down Expand Up @@ -75,7 +75,7 @@ bool FaceDetectorXform::validateInputPins()

bool FaceDetectorXform::validateOutputPins()
{
if (getNumberOfOutputPins() != 1)
if (getNumberOfOutputPins() > 2)
{
LOG_ERROR << "<" << getId() << ">::validateOutputPins size is expected to be 1. Actual<" << getNumberOfOutputPins() << ">";
return false;
Expand All @@ -88,11 +88,12 @@ void FaceDetectorXform::addInputPin(framemetadata_sp &metadata, string &pinId)
Module::addInputPin(metadata, pinId);
mDetail->mOutputMetadata = framemetadata_sp(new FrameMetadata(FrameMetadata::FACEDETECTS_INFO));
mDetail->mOutputPinId = addOutputPin(mDetail->mOutputMetadata);
mDetail->rawFramePinId = addOutputPin(metadata);
}

bool FaceDetectorXform::init()
{
mDetail->network = cv::dnn::readNetFromCaffe(mDetail->FACE_DETECTION_CONFIGURATION, mDetail->FACE_DETECTION_WEIGHTS);
mDetail->network = cv::dnn::readNetFromCaffe(mDetail->mProps.FACE_DETECTION_CONFIGURATION, mDetail->mProps.FACE_DETECTION_WEIGHTS);
if (mDetail->network.empty())
{
LOG_ERROR << "Failed to load network with the given settings. Please check the loaded parameters.";
Expand Down Expand Up @@ -121,11 +122,14 @@ bool FaceDetectorXform::process(frame_container &frames)
mDetail->inputBlob = cv::dnn::blobFromImage(mDetail->mInputImg, mDetail->mProps.scaleFactor, cv::Size(mDetail->mInputImg.cols, mDetail->mInputImg.rows),
mDetail->meanValuesRGB, false, false);
mDetail->network.setInput(mDetail->inputBlob, "data");

mDetail->detection = mDetail->network.forward("detection_out");

cv::Mat detectionMatrix(mDetail->detection.size[2], mDetail->detection.size[3], CV_32F, mDetail->detection.ptr<float>());

std::vector<RectangleOverlay> rectangleOverlays;
CompositeOverlay compositeOverlay;

for (int i = 0; i < detectionMatrix.rows; i++)
{
float confidence = detectionMatrix.at<float>(i, 2);
Expand All @@ -136,20 +140,42 @@ bool FaceDetectorXform::process(frame_container &frames)
}

mDetail->faceInfo.x1 = detectionMatrix.at<float>(i, 3) * mDetail->mInputImg.cols;
mDetail->faceInfo.y2 = detectionMatrix.at<float>(i, 4) * mDetail->mInputImg.rows;
mDetail->faceInfo.y1 = detectionMatrix.at<float>(i, 4) * mDetail->mInputImg.rows;
mDetail->faceInfo.x2 = detectionMatrix.at<float>(i, 5) * mDetail->mInputImg.cols;
mDetail->faceInfo.y1 = detectionMatrix.at<float>(i, 6) * mDetail->mInputImg.rows;
mDetail->faceInfo.y2 = detectionMatrix.at<float>(i, 6) * mDetail->mInputImg.rows;
mDetail->faceInfo.score = confidence;

mDetail->faces.emplace_back(mDetail->faceInfo);

RectangleOverlay rectangleOverlay;
rectangleOverlay.x1 = mDetail->faceInfo.x1;
rectangleOverlay.y1 = mDetail->faceInfo.y1;
rectangleOverlay.x2 = mDetail->faceInfo.x2;
rectangleOverlay.y2 = mDetail->faceInfo.y2;
rectangleOverlays.push_back(rectangleOverlay);
}

for (auto &rectangleOverlay : rectangleOverlays) {
compositeOverlay.add(&rectangleOverlay);
}

auto rawFrame = frames.cbegin()->second;
frames.insert(make_pair(mDetail->rawFramePinId, rawFrame));

mDetail->faceDetectsInfo.faces = mDetail->faces;
auto outFrame = makeFrame(mDetail->faceDetectsInfo.getSerializeSize());
mDetail->faceDetectsInfo.serialize(outFrame->data(), mDetail->faceDetectsInfo.getSerializeSize());
frames.insert(make_pair(mDetail->mOutputPinId, outFrame));

if (rectangleOverlays.size() > 0) {
DrawingOverlay drawingOverlay;
drawingOverlay.add(&compositeOverlay);
auto mvSize = drawingOverlay.mGetSerializeSize();
auto outFrame = makeFrame(mvSize, mDetail->mOutputPinId);
drawingOverlay.serialize(outFrame);
frames.insert(make_pair(mDetail->mOutputPinId, outFrame));
}

mDetail->faces.clear();
mDetail->faceDetectsInfo.faces.clear();

send(frames);
return true;
}
Expand Down
78 changes: 63 additions & 15 deletions base/src/FacialLandmarksCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "Logger.h"
#include "Utils.h"
#include "AIPExceptions.h"
#include "Overlay.h"

class Detail
{
Expand Down Expand Up @@ -87,6 +88,7 @@ class Detail
FacialLandmarkCVProps props;
cv::Mat iImg;
vector<vector<cv::Point2f>>landmarks;
vector<cv::Rect> faces;

protected:
framemetadata_sp mInputMetadata;
Expand All @@ -113,6 +115,7 @@ class DetailSSD : public Detail
{
//input must be 3 channel image(RGB)
// Create a 4-dimensional blob from the image. Optionally resizes and crops image from center, subtract mean values, scales values by scalefactor, swap Blue and Red channels.
iImg.data = static_cast<uint8_t *>(buffer->data());
cv::Mat inputBlob = cv::dnn::blobFromImage(iImg, 1.0, cv::Size(300, 300), cv::Scalar(104, 177, 123), false, false);

// Set the input blob as input to the face detector network
Expand All @@ -123,8 +126,6 @@ class DetailSSD : public Detail

cv::Mat detectionMatrix(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

vector<cv::Rect> faces;

for (int i = 0; i < detectionMatrix.rows; i++)
{
float confidence = detectionMatrix.at<float>(i, 2);
Expand All @@ -139,13 +140,16 @@ class DetailSSD : public Detail
cv::Rect faceRect(x1, y1, x2 - x1, y2 - y1);

faces.push_back(faceRect);
cv::rectangle(iImg, faceRect, cv::Scalar(0, 255, 0), 2);
}
}

if (faces.size() == 0) {
return false;
}

bool success = facemark->fit(iImg, faces, landmarks);

return true;
return success;
}

private:
Expand All @@ -164,17 +168,15 @@ class DetailHCASCADE : public Detail

bool compute(frame_sp buffer)
{
vector<cv::Rect> faces;
faceDetector.detectMultiScale(iImg, faces);

for (int i = 0; i < faces.size(); i++)
{
rectangle(iImg, faces[i], cv::Scalar(0, 255, 0), 2);
if (faces.size() == 0) {
return false;
}

bool success = facemark->fit(iImg, faces, landmarks);

return true;
bool success = facemark->fit(iImg, faces, landmarks);
return success;
}

private:
Expand Down Expand Up @@ -243,6 +245,7 @@ void FacialLandmarkCV::addInputPin(framemetadata_sp &metadata, string &pinId)
Module::addInputPin(metadata, pinId);
auto landmarksOutputMetadata = framemetadata_sp(new FrameMetadata(FrameMetadata::FACE_LANDMARKS_INFO));
mOutputPinId1 = addOutputPin(landmarksOutputMetadata);
rawFramePinId = addOutputPin(metadata);
}

bool FacialLandmarkCV::init()
Expand Down Expand Up @@ -278,15 +281,40 @@ bool FacialLandmarkCV::term()
bool FacialLandmarkCV::process(frame_container& frames)
{
auto frame = frames.cbegin()->second;
bool computeValue = mDetail->compute(frame);

if (computeValue == false) {
send(frames);
return true;
}

std::vector<RectangleOverlay> rectangleOverlays;

for (const auto& face :mDetail->faces) {
RectangleOverlay rectangleOverlay;
rectangleOverlay.x1 = face.x;
rectangleOverlay.y1 = face.y;
rectangleOverlay.x2 = face.x + face.width;
rectangleOverlay.y2 = face.y + face.height;

mDetail->compute(frame);
rectangleOverlays.push_back(rectangleOverlay);
}

std::vector<CircleOverlay> circleOverlays;

// Convert the landmarks from cv::Point2f to ApraPoint2f
vector<vector<ApraPoint2f>> apralandmarks;
for (const auto& landmark : mDetail->landmarks) {
vector<ApraPoint2f> apralandmark;
for (const auto& point : landmark) {
apralandmark.emplace_back(ApraPoint2f(point));

CircleOverlay circleOverlay;
circleOverlay.x1 = point.x;
circleOverlay.y1 = point.y;
circleOverlay.radius = 1;

circleOverlays.push_back(circleOverlay);
}
apralandmarks.emplace_back(std::move(apralandmark));
}
Expand All @@ -297,11 +325,31 @@ bool FacialLandmarkCV::process(frame_container& frames)
bufferSize += sizeof(apralandmarks[i]) + (sizeof(ApraPoint2f) + 2 * sizeof(int)) * apralandmarks[i].size();
}

auto landmarksFrame = makeFrame(bufferSize);
CompositeOverlay compositeOverlay;

Utils::serialize<std::vector<std::vector<ApraPoint2f>>>(apralandmarks, landmarksFrame->data(), bufferSize);
for (auto &rectangleOverlay : rectangleOverlays) {
compositeOverlay.add(&rectangleOverlay);
}

for (auto &circleOverlay : circleOverlays) {
compositeOverlay.add(&circleOverlay);
}

auto rawFrame = frames.cbegin()->second;

frames.insert(make_pair(rawFramePinId, rawFrame));

if (rectangleOverlays.size() > 0 || circleOverlays.size() > 0) {
DrawingOverlay drawingOverlay;
drawingOverlay.add(&compositeOverlay);
auto mvSize = drawingOverlay.mGetSerializeSize();
auto landmarksFrame = makeFrame(mvSize, mOutputPinId1);
drawingOverlay.serialize(landmarksFrame);
frames.insert(make_pair(mOutputPinId1, landmarksFrame));
}

frames.insert(make_pair(mOutputPinId1, landmarksFrame));
mDetail->faces.clear();
mDetail->landmarks.clear();

send(frames);

Expand Down
Loading