Skip to content

Commit

Permalink
Added support to attach clip to detected object (ObjectDetection) effect
Browse files Browse the repository at this point in the history
This feature let's the user attach a clip to an object detected by the Object Detection effect, in the same way it is done with the Tracker Effect.
  • Loading branch information
BrennoCaldato committed Jan 22, 2021
1 parent 1746331 commit 32a217e
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 27 deletions.
21 changes: 16 additions & 5 deletions src/CVObjectDetection.cpp
Expand Up @@ -101,7 +101,6 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start,
// Update progress
processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));

// std::cout<<"Frame: "<<frame_number<<"\n";
}
}

Expand Down Expand Up @@ -134,6 +133,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> objectIds;

for (size_t i = 0; i < outs.size(); ++i)
{
Expand Down Expand Up @@ -176,13 +176,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);

// Clear data vectors
boxes.clear(); confidences.clear(); classIds.clear();
boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
// Get SORT predicted boxes
for(auto TBox : sort.frameTrackingResult){
if(TBox.frame == frameId){
boxes.push_back(TBox.box);
confidences.push_back(TBox.confidence);
classIds.push_back(TBox.classId);
objectIds.push_back(TBox.id);
}
}

Expand All @@ -198,12 +199,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
boxes.erase(boxes.begin() + j);
classIds.erase(classIds.begin() + j);
confidences.erase(confidences.begin() + j);
objectIds.erase(objectIds.begin() + j);
break;
}
else{
boxes.erase(boxes.begin() + i);
classIds.erase(classIds.begin() + i);
confidences.erase(confidences.begin() + i);
objectIds.erase(objectIds.begin() + i);
i = 0;
break;
}
Expand All @@ -222,12 +225,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
boxes.erase(boxes.begin() + j);
classIds.erase(classIds.begin() + j);
confidences.erase(confidences.begin() + j);
objectIds.erase(objectIds.begin() + j);
break;
}
else{
boxes.erase(boxes.begin() + i);
classIds.erase(classIds.begin() + i);
confidences.erase(confidences.begin() + i);
objectIds.erase(objectIds.begin() + i);
i = 0;
break;
}
Expand All @@ -247,7 +252,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
normalized_boxes.push_back(normalized_box);
}

detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId);
detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
}

// Compute IOU between 2 boxes
Expand Down Expand Up @@ -355,6 +360,7 @@ void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CV
box->set_h(dData.boxes.at(i).height);
box->set_classid(dData.classIds.at(i));
box->set_confidence(dData.confidences.at(i));
box->set_objectid(dData.objectIds.at(i));

}
}
Expand Down Expand Up @@ -457,7 +463,10 @@ bool CVObjectDetection::_LoadObjDetectdData(){
const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();

// Construct data vectors related to detections in the current frame
std::vector<int> classIds; std::vector<float> confidences; std::vector<cv::Rect_<float>> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect_<float>> boxes;
std::vector<int> objectIds;

for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
// Get bounding box coordinates
Expand All @@ -468,13 +477,15 @@ bool CVObjectDetection::_LoadObjDetectdData(){

// Get class Id (which will be assign to a class name) and prediction confidence
int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
// Get object Id
int objectId = pBox.Get(i).objectid();

// Push back data into vectors
boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
}

// Assign data to object detector map
detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id);
detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
}

// Show the time stamp from the last update in object detector data file
Expand Down
10 changes: 9 additions & 1 deletion src/CVObjectDetection.h
Expand Up @@ -49,16 +49,24 @@ namespace openshot
// Stores the detected object bounding boxes and its properties.
struct CVDetectionData{
CVDetectionData(){}
CVDetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect_<float>> _boxes, size_t _frameId){
CVDetectionData(
std::vector<int> _classIds,
std::vector<float> _confidences,
std::vector<cv::Rect_<float>> _boxes,
size_t _frameId,
std::vector<int> _objectIds)
{
classIds = _classIds;
confidences = _confidences;
boxes = _boxes;
frameId = _frameId;
objectIds = _objectIds;
}
size_t frameId;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect_<float>> boxes;
std::vector<int> objectIds;
};

/**
Expand Down
38 changes: 38 additions & 0 deletions src/Timeline.cpp
Expand Up @@ -290,6 +290,44 @@ std::list<std::string> Timeline::GetTrackedObjectsIds() const{
return trackedObjects_ids;
}

std::string Timeline::GetTrackedObjectValues(std::string id) const {

// Initialize the JSON object
Json::Value trackedObjectJson;

// Search for the tracked object on the map
auto iterator = tracked_objects.find(id);

if (iterator != tracked_objects.end())
{
// Id found, Get the object pointer and cast it as a TrackedObjectBBox
std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(iterator->second);

// Get the trackedObject values for it's first frame
auto boxes = trackedObject->BoxVec;
auto firstBox = boxes.begin()->second;
float x1 = firstBox.cx - (firstBox.width/2);
float y1 = firstBox.cy - (firstBox.height/2);
float x2 = firstBox.cx + (firstBox.width/2);
float y2 = firstBox.cy + (firstBox.height/2);

trackedObjectJson["x1"] = x1;
trackedObjectJson["y1"] = y1;
trackedObjectJson["x2"] = x2;
trackedObjectJson["y2"] = y2;

}
else {
// Id not found, return all 0 values
trackedObjectJson["x1"] = 0;
trackedObjectJson["y1"] = 0;
trackedObjectJson["x2"] = 0;
trackedObjectJson["y2"] = 0;
}

return trackedObjectJson.toStyledString();
}

// Add an openshot::Clip to the timeline
void Timeline::AddClip(Clip* clip)
{
Expand Down
2 changes: 2 additions & 0 deletions src/Timeline.h
Expand Up @@ -250,6 +250,8 @@ namespace openshot {
std::shared_ptr<openshot::TrackedObjectBase> GetTrackedObject(std::string id) const;
/// Return the ID's of the tracked objects as a list of strings
std::list<std::string> GetTrackedObjectsIds() const;
/// Return the first trackedObject's properties as a JSON string
std::string GetTrackedObjectValues(std::string id) const;

/// @brief Add an openshot::Clip to the timeline
/// @param clip Add an openshot::Clip to the timeline. A clip can contain any type of Reader.
Expand Down
2 changes: 1 addition & 1 deletion src/TrackedObjectBBox.cpp
Expand Up @@ -230,7 +230,7 @@ bool TrackedObjectBBox::LoadBoxData(std::string inputFilePath)
// Read the existing tracker message.
fstream input(inputFilePath, ios::in | ios::binary);

//Check if it was able to read the protobuf data
// Check if it was able to read the protobuf data
if (!bboxMessage.ParseFromIstream(&input))
{
cerr << "Failed to parse protobuf message." << endl;
Expand Down
2 changes: 1 addition & 1 deletion src/TrackedObjectBBox.h
Expand Up @@ -174,7 +174,7 @@ namespace openshot
TrackedObjectBBox();

/// Add a BBox to the BoxVec map
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle);
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override;

/// Update object's BaseFps
void SetBaseFPS(Fraction fps);
Expand Down
5 changes: 4 additions & 1 deletion src/TrackedObjectBase.h
Expand Up @@ -83,7 +83,10 @@ namespace openshot {
virtual std::map<std::string, float> GetBoxValues(int64_t frame_number) const { std::map<std::string, float> ret; return ret; };
/// Return the main properties of the tracked object's parent clip
virtual std::map<std::string, float> GetParentClipProperties(int64_t frame_number) const { std::map<std::string, float> ret; return ret; }

/// Add a bounding box to the tracked object's BoxVec map
virtual void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) { return; };


/// Get and Set JSON methods
virtual std::string Json() const = 0; ///< Generate JSON string of this object
virtual Json::Value JsonValue() const = 0; ///< Generate Json::Value for this object
Expand Down
80 changes: 65 additions & 15 deletions src/effects/ObjectDetection.cpp
Expand Up @@ -28,12 +28,15 @@
* along with OpenShot Library. If not, see <http://www.gnu.org/licenses/>.
*/

#include <string>

#include "effects/ObjectDetection.h"
#include "effects/Tracker.h"

using namespace std;
using namespace openshot;


/// Blank constructor, useful when using Json to load the effect properties
ObjectDetection::ObjectDetection(std::string clipObDetectDataPath)
{
Expand Down Expand Up @@ -64,7 +67,7 @@ void ObjectDetection::init_effect_details()
info.description = "Detect objects through the video.";
info.has_audio = false;
info.has_video = true;
info.has_tracked_object = false;
info.has_tracked_object = true;
}

// This method is required for all derived classes of EffectBase, and returns a
Expand Down Expand Up @@ -92,7 +95,7 @@ std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, i
(int)(bb_nrml.width*fw),
(int)(bb_nrml.height*fh));
drawPred(detections.classIds.at(i), detections.confidences.at(i),
box, cv_image);
box, cv_image, detections.objectIds.at(i));
}
}

Expand All @@ -103,7 +106,7 @@ std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, i
return frame;
}

void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame)
void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber)
{

//Draw a rectangle displaying the bounding box
Expand Down Expand Up @@ -133,29 +136,32 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
// Create tracker message
pb_objdetect::ObjDetect objMessage;

{
// Read the existing tracker message.
fstream input(inputFilePath, ios::in | ios::binary);
if (!objMessage.ParseFromIstream(&input)) {
cerr << "Failed to parse protobuf message." << endl;
return false;
}

// Read the existing tracker message.
fstream input(inputFilePath, ios::in | ios::binary);
if (!objMessage.ParseFromIstream(&input)) {
cerr << "Failed to parse protobuf message." << endl;
return false;
}


// Make sure classNames and detectionsData are empty
// Make sure classNames, detectionsData and trackedObjects are empty
classNames.clear();
detectionsData.clear();
trackedObjects.clear();

// Seed to generate same random numbers
std::srand(1);
// Get all classes names and assign a color to them
for(int i = 0; i < objMessage.classnames_size(); i++){
for(int i = 0; i < objMessage.classnames_size(); i++)
{
classNames.push_back(objMessage.classnames(i));
classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
}

// Iterate over all frames of the saved message
for (size_t i = 0; i < objMessage.frame_size(); i++) {
for (size_t i = 0; i < objMessage.frame_size(); i++)
{
// Create protobuf message reader
const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);

Expand All @@ -169,8 +175,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect_<float>> boxes;
std::vector<int> objectIds;

for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
// Iterate through the detected objects
for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
{
// Get bounding box coordinates
float x = pBox.Get(i).x();
float y = pBox.Get(i).y();
Expand All @@ -180,6 +189,26 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
int classId = pBox.Get(i).classid();
// Get prediction confidence
float confidence = pBox.Get(i).confidence();

// Get the object Id
int objectId = pBox.Get(i).objectid();

// Search for the object id on trackedObjects map
auto trackedObject = trackedObjects.find(objectId);
// Check if object already exists on the map
if (trackedObject != trackedObjects.end())
{
// Add a new BBox to it
trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
}
else
{
// There is no tracked object with that id, so insert a new one
TrackedObjectBBox trackedObj;
trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
trackedObjects.insert({objectId, trackedObjPtr});
}

// Create OpenCV rectangle with the bouding box info
cv::Rect_<float> box(x, y, w, h);
Expand All @@ -188,10 +217,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
boxes.push_back(box);
classIds.push_back(classId);
confidences.push_back(confidence);
objectIds.push_back(objectId);
}

// Assign data to object detector map
detectionsData[id] = DetectionData(classIds, confidences, boxes, id);
detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
}

// Delete all global objects allocated by libprotobuf.
Expand Down Expand Up @@ -226,6 +256,12 @@ Json::Value ObjectDetection::JsonValue() const {
Json::Value root = EffectBase::JsonValue(); // get parent properties
root["type"] = info.class_name;
root["protobuf_data_path"] = protobuf_data_path;

// Add trackedObjects IDs to JSON
for (auto const& trackedObject : trackedObjects){
// Save the trackedObject Id on root
root["box_id"+to_string(trackedObject.first)] = trackedObject.second->Id();
}

// return JsonValue
return root;
Expand Down Expand Up @@ -262,13 +298,27 @@ void ObjectDetection::SetJsonValue(const Json::Value root) {
protobuf_data_path = "";
}
}

for (auto const& trackedObject : trackedObjects){
Json::Value trackedObjectJSON;
trackedObjectJSON["box_id"] = root["box_id"+to_string(trackedObject.first)];
trackedObject.second->SetJsonValue(trackedObjectJSON);
}
}

// Get all properties for a specific frame
std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {

// Generate JSON properties list
Json::Value root;

// Add trackedObjects IDs to JSON
for (auto const& trackedObject : trackedObjects){
// Save the trackedObject Id on root
Json::Value trackedObjectJSON = trackedObject.second->PropertiesJSON(requested_frame);
root["box_id"+to_string(trackedObject.first)] = trackedObjectJSON["box_id"];
}

root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame);
root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame);
Expand Down

0 comments on commit 32a217e

Please sign in to comment.