Skip to content

Commit

Permalink
Merge pull request #3 from nsabale7/ns/samples_timelapse
Browse files Browse the repository at this point in the history
Sample: Face Detection
  • Loading branch information
AdityaKBhadragond14 committed Jul 9, 2024
2 parents 7f79097 + 197b75d commit 1031260
Show file tree
Hide file tree
Showing 13 changed files with 329 additions and 31 deletions.
47 changes: 45 additions & 2 deletions base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,13 @@ SET(UT_FILES
SET(SAMPLE_UT_FILES
test/utmain.cpp
test/test_utils.cpp
test/test_utils.h
../samples/face_detection_cpu/test_face_detection_cpu.cpp
)

SET(SAMPLE_CORE_FILES
../samples/face_detection_cpu/face_detection_cpu.h
../samples/face_detection_cpu/face_detection_cpu.cpp
test/test_utils.h
../samples/create_thumbnail_from_mp4_video/test_generate_thumbnail_from_mp4_video.cpp
../samples/play_mp4_from_beginning/test_play_mp4_video_from_beginning.cpp
Expand All @@ -645,6 +652,7 @@ SET(SAMPLE_SOURCE
${SAMPLE_CORE_FILES}
${SAMPLE_UT_FILES}
)

IF(ENABLE_LINUX)
list(APPEND UT_FILES
test/gtkglrenderer_tests.cpp
Expand All @@ -660,6 +668,17 @@ ENDIF(ENABLE_LINUX)
add_executable(aprapipesut ${UT_FILES})
add_executable(aprapipessampleut ${SAMPLE_SOURCE})

target_include_directories ( aprapipessampleut PRIVATE
${JETSON_MULTIMEDIA_LIB_INCLUDE}
${FFMPEG_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS}
${Boost_INCLUDE_DIRS}
${LIBMP4_INC_DIR}
${BARESIP_INC_DIR}
${LIBRE_INC_DIR}
${NVCODEC_INCLUDE_DIR}
test

IF(ENABLE_ARM64)
target_include_directories ( aprapipesut PRIVATE ${JETSON_MULTIMEDIA_LIB_INCLUDE} ${FFMPEG_ROOT} ${JPEG_INCLUDE_DIR})
ENDIF(ENABLE_ARM64)
Expand Down Expand Up @@ -695,13 +714,36 @@ ENDIF (ENABLE_CUDA)
find_library(OPENH264_LIB NAMES openh264.lib libopenh264.a REQUIRED)
find_library(LIBMP4_LIB NAMES mp4lib.lib libmp4lib.a REQUIRED)


add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../samples ${CMAKE_CURRENT_BINARY_DIR}/samples)

target_link_libraries(aprapipessampleut
aprapipes
${JPEG_LIBRARIES}

target_link_libraries(aprapipessampleut
aprapipes
${LIBMP4_LIB}
${OPENH264_LIB}
${Boost_LIBRARIES}
${FFMPEG_LIBRARIES}
${OpenCV_LIBRARIES}
${JETSON_LIBS}
${NVCUDAToolkit_LIBS}
${NVCODEC_LIB}
${NVJPEGLIB_L4T}
${CURSES_LIBRARIES}
ZXing::Core
ZXing::ZXing
BZip2::BZip2
ZLIB::ZLIB
liblzma::liblzma
bigint::bigint
sfml-audio
whisper::whisper
)

target_link_libraries(aprapipesut
${NVCUDAToolkit_LIBS}
${NVCODEC_LIB}
)
Expand All @@ -718,7 +760,7 @@ IF(ENABLE_LINUX)
)
ENDIF(ENABLE_LINUX)

target_link_libraries(aprapipesut
target_link_libraries(aprapipesut
aprapipes
${GLEW_LIBRARIES}
${JPEG_LIBRARIES}
Expand Down Expand Up @@ -748,6 +790,7 @@ IF(ENABLE_WINDOWS)
IF(GHA)
file(COPY ${RUNTIME_DLLS} DESTINATION RelWithDebInfo/)
ENDIF(GHA)

ENDIF(ENABLE_WINDOWS)

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../samples ${CMAKE_CURRENT_BINARY_DIR}/samples)
Expand Down Expand Up @@ -785,4 +828,4 @@ install(
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/aprapipes)

install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/include
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/aprapipes)
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/aprapipes)
5 changes: 4 additions & 1 deletion base/include/FaceDetectorXform.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
class FaceDetectorXformProps : public ModuleProps
{
public:
FaceDetectorXformProps(double _scaleFactor = 1.0, float _confidenceThreshold = 0.5) : scaleFactor(_scaleFactor), confidenceThreshold(_confidenceThreshold)
FaceDetectorXformProps(double _scaleFactor = 1.0, float _confidenceThreshold = 0.5, std::string _Face_Detection_Configuration= "./data/assets/deploy.prototxt", std::string _Face_Detection_Weights= "./data/assets/res10_300x300_ssd_iter_140000_fp16.caffemodel")
: scaleFactor(_scaleFactor), confidenceThreshold(_confidenceThreshold), FACE_DETECTION_CONFIGURATION(_Face_Detection_Configuration), FACE_DETECTION_WEIGHTS(_Face_Detection_Weights)
{
}
double scaleFactor;
float confidenceThreshold;
std::string FACE_DETECTION_CONFIGURATION;
std::string FACE_DETECTION_WEIGHTS;

size_t getSerializeSize()
{
Expand Down
1 change: 1 addition & 0 deletions base/include/FacialLandmarksCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,5 @@ class FacialLandmarkCV : public Module
FacialLandmarkCVProps mProp;
bool handlePropsChange(frame_sp& frame);
std::string mOutputPinId1;
std::string rawFramePinId;
};
46 changes: 36 additions & 10 deletions base/src/FaceDetectorXform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "AIPExceptions.h"
#include "ApraFaceInfo.h"
#include "FaceDetectsInfo.h"
#include "Overlay.h"

class FaceDetectorXform::Detail
{
Expand All @@ -34,13 +35,12 @@ class FaceDetectorXform::Detail
framemetadata_sp mOutputMetadata;
FaceDetectorXformProps mProps;
std::string mOutputPinId;
std::string rawFramePinId;
cv::Mat mInputImg;
int mFrameType;
cv::dnn::Net network;
cv::Mat inputBlob;
cv::Mat detection;
const std::string FACE_DETECTION_CONFIGURATION = "./data/assets/deploy.prototxt";
const std::string FACE_DETECTION_WEIGHTS = "./data/assets/res10_300x300_ssd_iter_140000_fp16.caffemodel";
// scalar with mean values which are subtracted from channels.
// Values are intended to be in (mean-R, mean-G, mean-B) order if image has BGR ordering and swapRB is true.
const cv::Scalar meanValuesRGB = cv::Scalar({104., 177.0, 123.0});
Expand Down Expand Up @@ -75,7 +75,7 @@ bool FaceDetectorXform::validateInputPins()

bool FaceDetectorXform::validateOutputPins()
{
if (getNumberOfOutputPins() != 1)
if (getNumberOfOutputPins() > 2)
{
LOG_ERROR << "<" << getId() << ">::validateOutputPins size is expected to be 1. Actual<" << getNumberOfOutputPins() << ">";
return false;
Expand All @@ -88,11 +88,12 @@ void FaceDetectorXform::addInputPin(framemetadata_sp &metadata, string &pinId)
Module::addInputPin(metadata, pinId);
mDetail->mOutputMetadata = framemetadata_sp(new FrameMetadata(FrameMetadata::FACEDETECTS_INFO));
mDetail->mOutputPinId = addOutputPin(mDetail->mOutputMetadata);
mDetail->rawFramePinId = addOutputPin(metadata);
}

bool FaceDetectorXform::init()
{
mDetail->network = cv::dnn::readNetFromCaffe(mDetail->FACE_DETECTION_CONFIGURATION, mDetail->FACE_DETECTION_WEIGHTS);
mDetail->network = cv::dnn::readNetFromCaffe(mDetail->mProps.FACE_DETECTION_CONFIGURATION, mDetail->mProps.FACE_DETECTION_WEIGHTS);
if (mDetail->network.empty())
{
LOG_ERROR << "Failed to load network with the given settings. Please check the loaded parameters.";
Expand Down Expand Up @@ -121,11 +122,14 @@ bool FaceDetectorXform::process(frame_container &frames)
mDetail->inputBlob = cv::dnn::blobFromImage(mDetail->mInputImg, mDetail->mProps.scaleFactor, cv::Size(mDetail->mInputImg.cols, mDetail->mInputImg.rows),
mDetail->meanValuesRGB, false, false);
mDetail->network.setInput(mDetail->inputBlob, "data");

mDetail->detection = mDetail->network.forward("detection_out");

cv::Mat detectionMatrix(mDetail->detection.size[2], mDetail->detection.size[3], CV_32F, mDetail->detection.ptr<float>());

std::vector<RectangleOverlay> rectangleOverlays;
CompositeOverlay compositeOverlay;

for (int i = 0; i < detectionMatrix.rows; i++)
{
float confidence = detectionMatrix.at<float>(i, 2);
Expand All @@ -136,20 +140,42 @@ bool FaceDetectorXform::process(frame_container &frames)
}

mDetail->faceInfo.x1 = detectionMatrix.at<float>(i, 3) * mDetail->mInputImg.cols;
mDetail->faceInfo.y2 = detectionMatrix.at<float>(i, 4) * mDetail->mInputImg.rows;
mDetail->faceInfo.y1 = detectionMatrix.at<float>(i, 4) * mDetail->mInputImg.rows;
mDetail->faceInfo.x2 = detectionMatrix.at<float>(i, 5) * mDetail->mInputImg.cols;
mDetail->faceInfo.y1 = detectionMatrix.at<float>(i, 6) * mDetail->mInputImg.rows;
mDetail->faceInfo.y2 = detectionMatrix.at<float>(i, 6) * mDetail->mInputImg.rows;
mDetail->faceInfo.score = confidence;

mDetail->faces.emplace_back(mDetail->faceInfo);

RectangleOverlay rectangleOverlay;
rectangleOverlay.x1 = mDetail->faceInfo.x1;
rectangleOverlay.y1 = mDetail->faceInfo.y1;
rectangleOverlay.x2 = mDetail->faceInfo.x2;
rectangleOverlay.y2 = mDetail->faceInfo.y2;
rectangleOverlays.push_back(rectangleOverlay);
}

for (auto &rectangleOverlay : rectangleOverlays) {
compositeOverlay.add(&rectangleOverlay);
}

auto rawFrame = frames.cbegin()->second;
frames.insert(make_pair(mDetail->rawFramePinId, rawFrame));

mDetail->faceDetectsInfo.faces = mDetail->faces;
auto outFrame = makeFrame(mDetail->faceDetectsInfo.getSerializeSize());
mDetail->faceDetectsInfo.serialize(outFrame->data(), mDetail->faceDetectsInfo.getSerializeSize());
frames.insert(make_pair(mDetail->mOutputPinId, outFrame));

if (rectangleOverlays.size() > 0) {
DrawingOverlay drawingOverlay;
drawingOverlay.add(&compositeOverlay);
auto mvSize = drawingOverlay.mGetSerializeSize();
auto outFrame = makeFrame(mvSize, mDetail->mOutputPinId);
drawingOverlay.serialize(outFrame);
frames.insert(make_pair(mDetail->mOutputPinId, outFrame));
}

mDetail->faces.clear();
mDetail->faceDetectsInfo.faces.clear();

send(frames);
return true;
}
Expand Down
78 changes: 63 additions & 15 deletions base/src/FacialLandmarksCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "Logger.h"
#include "Utils.h"
#include "AIPExceptions.h"
#include "Overlay.h"

class Detail
{
Expand Down Expand Up @@ -87,6 +88,7 @@ class Detail
FacialLandmarkCVProps props;
cv::Mat iImg;
vector<vector<cv::Point2f>>landmarks;
vector<cv::Rect> faces;

protected:
framemetadata_sp mInputMetadata;
Expand All @@ -113,6 +115,7 @@ class DetailSSD : public Detail
{
//input must be 3 channel image(RGB)
// Create a 4-dimensional blob from the image. Optionally resizes and crops image from center, subtract mean values, scales values by scalefactor, swap Blue and Red channels.
iImg.data = static_cast<uint8_t *>(buffer->data());
cv::Mat inputBlob = cv::dnn::blobFromImage(iImg, 1.0, cv::Size(300, 300), cv::Scalar(104, 177, 123), false, false);

// Set the input blob as input to the face detector network
Expand All @@ -123,8 +126,6 @@ class DetailSSD : public Detail

cv::Mat detectionMatrix(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

vector<cv::Rect> faces;

for (int i = 0; i < detectionMatrix.rows; i++)
{
float confidence = detectionMatrix.at<float>(i, 2);
Expand All @@ -139,13 +140,16 @@ class DetailSSD : public Detail
cv::Rect faceRect(x1, y1, x2 - x1, y2 - y1);

faces.push_back(faceRect);
cv::rectangle(iImg, faceRect, cv::Scalar(0, 255, 0), 2);
}
}

if (faces.size() == 0) {
return false;
}

bool success = facemark->fit(iImg, faces, landmarks);

return true;
return success;
}

private:
Expand All @@ -164,17 +168,15 @@ class DetailHCASCADE : public Detail

bool compute(frame_sp buffer)
{
vector<cv::Rect> faces;
faceDetector.detectMultiScale(iImg, faces);

for (int i = 0; i < faces.size(); i++)
{
rectangle(iImg, faces[i], cv::Scalar(0, 255, 0), 2);
if (faces.size() == 0) {
return false;
}

bool success = facemark->fit(iImg, faces, landmarks);

return true;
bool success = facemark->fit(iImg, faces, landmarks);
return success;
}

private:
Expand Down Expand Up @@ -243,6 +245,7 @@ void FacialLandmarkCV::addInputPin(framemetadata_sp &metadata, string &pinId)
Module::addInputPin(metadata, pinId);
auto landmarksOutputMetadata = framemetadata_sp(new FrameMetadata(FrameMetadata::FACE_LANDMARKS_INFO));
mOutputPinId1 = addOutputPin(landmarksOutputMetadata);
rawFramePinId = addOutputPin(metadata);
}

bool FacialLandmarkCV::init()
Expand Down Expand Up @@ -278,15 +281,40 @@ bool FacialLandmarkCV::term()
bool FacialLandmarkCV::process(frame_container& frames)
{
auto frame = frames.cbegin()->second;
bool computeValue = mDetail->compute(frame);

if (computeValue == false) {
send(frames);
return true;
}

std::vector<RectangleOverlay> rectangleOverlays;

for (const auto& face :mDetail->faces) {
RectangleOverlay rectangleOverlay;
rectangleOverlay.x1 = face.x;
rectangleOverlay.y1 = face.y;
rectangleOverlay.x2 = face.x + face.width;
rectangleOverlay.y2 = face.y + face.height;

mDetail->compute(frame);
rectangleOverlays.push_back(rectangleOverlay);
}

std::vector<CircleOverlay> circleOverlays;

// Convert the landmarks from cv::Point2f to ApraPoint2f
vector<vector<ApraPoint2f>> apralandmarks;
for (const auto& landmark : mDetail->landmarks) {
vector<ApraPoint2f> apralandmark;
for (const auto& point : landmark) {
apralandmark.emplace_back(ApraPoint2f(point));

CircleOverlay circleOverlay;
circleOverlay.x1 = point.x;
circleOverlay.y1 = point.y;
circleOverlay.radius = 1;

circleOverlays.push_back(circleOverlay);
}
apralandmarks.emplace_back(std::move(apralandmark));
}
Expand All @@ -297,11 +325,31 @@ bool FacialLandmarkCV::process(frame_container& frames)
bufferSize += sizeof(apralandmarks[i]) + (sizeof(ApraPoint2f) + 2 * sizeof(int)) * apralandmarks[i].size();
}

auto landmarksFrame = makeFrame(bufferSize);
CompositeOverlay compositeOverlay;

Utils::serialize<std::vector<std::vector<ApraPoint2f>>>(apralandmarks, landmarksFrame->data(), bufferSize);
for (auto &rectangleOverlay : rectangleOverlays) {
compositeOverlay.add(&rectangleOverlay);
}

for (auto &circleOverlay : circleOverlays) {
compositeOverlay.add(&circleOverlay);
}

auto rawFrame = frames.cbegin()->second;

frames.insert(make_pair(rawFramePinId, rawFrame));

if (rectangleOverlays.size() > 0 || circleOverlays.size() > 0) {
DrawingOverlay drawingOverlay;
drawingOverlay.add(&compositeOverlay);
auto mvSize = drawingOverlay.mGetSerializeSize();
auto landmarksFrame = makeFrame(mvSize, mOutputPinId1);
drawingOverlay.serialize(landmarksFrame);
frames.insert(make_pair(mOutputPinId1, landmarksFrame));
}

frames.insert(make_pair(mOutputPinId1, landmarksFrame));
mDetail->faces.clear();
mDetail->landmarks.clear();

send(frames);

Expand Down
Loading

0 comments on commit 1031260

Please sign in to comment.