Added support to attach clip to detected object (ObjectDetection) effect

This feature let's the user attach a clip to an object detected by the Object Detection effect, in the same way it is done with the Tracker Effect.
OpenShot · Jan 22, 2021 · 32a217e · 32a217e
1 parent 1746331
commit 32a217e
Show file tree

Hide file tree

Showing 11 changed files with 148 additions and 27 deletions.
diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp
@@ -101,7 +101,6 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start,
         // Update progress
         processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
 
-        // std::cout<<"Frame: "<<frame_number<<"\n";
     }
 }
 
@@ -134,6 +133,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
     std::vector<int> classIds;
     std::vector<float> confidences;
     std::vector<cv::Rect> boxes;
+    std::vector<int> objectIds;
 
     for (size_t i = 0; i < outs.size(); ++i)
     {
@@ -176,13 +176,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
     sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
 
     // Clear data vectors
-    boxes.clear(); confidences.clear(); classIds.clear();
+    boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
     // Get SORT predicted boxes
     for(auto TBox : sort.frameTrackingResult){
         if(TBox.frame == frameId){
             boxes.push_back(TBox.box);
             confidences.push_back(TBox.confidence);
             classIds.push_back(TBox.classId);
+            objectIds.push_back(TBox.id);
         }
     }
 
@@ -198,12 +199,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
                         boxes.erase(boxes.begin() + j);
                         classIds.erase(classIds.begin() + j);
                         confidences.erase(confidences.begin() + j);
+                        objectIds.erase(objectIds.begin() + j);
                         break;
                     }
                     else{
                         boxes.erase(boxes.begin() + i);
                         classIds.erase(classIds.begin() + i);
                         confidences.erase(confidences.begin() + i);
+                        objectIds.erase(objectIds.begin() + i);
                         i = 0;
                         break;
                     }
@@ -222,12 +225,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
                         boxes.erase(boxes.begin() + j);
                         classIds.erase(classIds.begin() + j);
                         confidences.erase(confidences.begin() + j);
+                        objectIds.erase(objectIds.begin() + j);
                         break;
                     }
                     else{
                         boxes.erase(boxes.begin() + i);
                         classIds.erase(classIds.begin() + i);
                         confidences.erase(confidences.begin() + i);
+                        objectIds.erase(objectIds.begin() + i);
                         i = 0;
                         break;
                     }
@@ -247,7 +252,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
         normalized_boxes.push_back(normalized_box);
     }
 
-    detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId);
+    detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
 }
 
 // Compute IOU between 2 boxes
@@ -355,6 +360,7 @@ void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CV
         box->set_h(dData.boxes.at(i).height);
         box->set_classid(dData.classIds.at(i));
         box->set_confidence(dData.confidences.at(i));
+        box->set_objectid(dData.objectIds.at(i));
 
     }
 }
@@ -457,7 +463,10 @@ bool CVObjectDetection::_LoadObjDetectdData(){
         const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
 
         // Construct data vectors related to detections in the current frame
-        std::vector<int> classIds; std::vector<float> confidences; std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> classIds; 
+        std::vector<float> confidences; 
+        std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> objectIds;
 
         for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
             // Get bounding box coordinates
@@ -468,13 +477,15 @@ bool CVObjectDetection::_LoadObjDetectdData(){
 
             // Get class Id (which will be assign to a class name) and prediction confidence
             int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
+            // Get object Id
+            int objectId = pBox.Get(i).objectid();
 
             // Push back data into vectors
             boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
         }
 
         // Assign data to object detector map
-        detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id);
+        detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
     }
 
     // Show the time stamp from the last update in object detector data file

diff --git a/src/CVObjectDetection.h b/src/CVObjectDetection.h
@@ -49,16 +49,24 @@ namespace openshot
     // Stores the detected object bounding boxes and its properties.
     struct CVDetectionData{
         CVDetectionData(){}
-        CVDetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect_<float>> _boxes, size_t _frameId){
+        CVDetectionData(
+            std::vector<int> _classIds, 
+            std::vector<float> _confidences,
+            std::vector<cv::Rect_<float>> _boxes, 
+            size_t _frameId,
+            std::vector<int> _objectIds)
+        {
             classIds = _classIds;
             confidences = _confidences;
             boxes = _boxes;
             frameId = _frameId;
+            objectIds = _objectIds;
         }
         size_t frameId;
         std::vector<int> classIds;
         std::vector<float> confidences;
         std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> objectIds;
     };
 
     /**

diff --git a/src/Timeline.cpp b/src/Timeline.cpp
@@ -290,6 +290,44 @@ std::list<std::string> Timeline::GetTrackedObjectsIds() const{
 	return trackedObjects_ids;
 }
 
+std::string Timeline::GetTrackedObjectValues(std::string id) const {
+
+	// Initialize the JSON object
+	Json::Value trackedObjectJson;
+
+	// Search for the tracked object on the map
+	auto iterator = tracked_objects.find(id);
+
+	if (iterator != tracked_objects.end())
+	{
+		// Id found, Get the object pointer and cast it as a TrackedObjectBBox
+		std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(iterator->second);
+
+		// Get the trackedObject values for it's first frame
+		auto boxes = trackedObject->BoxVec;
+		auto firstBox = boxes.begin()->second;
+		float x1 = firstBox.cx - (firstBox.width/2);
+		float y1 = firstBox.cy - (firstBox.height/2);
+		float x2 = firstBox.cx + (firstBox.width/2);
+		float y2 = firstBox.cy + (firstBox.height/2);
+
+		trackedObjectJson["x1"] = x1;
+		trackedObjectJson["y1"] = y1;
+		trackedObjectJson["x2"] = x2;
+		trackedObjectJson["y2"] = y2;
+
+	}
+	else {
+		// Id not found, return all 0 values
+		trackedObjectJson["x1"] = 0;
+		trackedObjectJson["y1"] = 0;
+		trackedObjectJson["x2"] = 0;
+		trackedObjectJson["y2"] = 0;
+	}	
+
+	return trackedObjectJson.toStyledString();
+}
+
 // Add an openshot::Clip to the timeline
 void Timeline::AddClip(Clip* clip)
 {

diff --git a/src/Timeline.h b/src/Timeline.h
@@ -250,6 +250,8 @@ namespace openshot {
 		std::shared_ptr<openshot::TrackedObjectBase> GetTrackedObject(std::string id) const;
 		/// Return the ID's of the tracked objects as a list of strings
 		std::list<std::string> GetTrackedObjectsIds() const;
+		/// Return the first trackedObject's properties as a JSON string
+		std::string GetTrackedObjectValues(std::string id) const;
 
 		/// @brief Add an openshot::Clip to the timeline
 		/// @param clip Add an openshot::Clip to the timeline. A clip can contain any type of Reader.

diff --git a/src/TrackedObjectBBox.cpp b/src/TrackedObjectBBox.cpp
@@ -230,7 +230,7 @@ bool TrackedObjectBBox::LoadBoxData(std::string inputFilePath)
     // Read the existing tracker message.
     fstream input(inputFilePath, ios::in | ios::binary);
 
-    //Check if it was able to read the protobuf data
+    // Check if it was able to read the protobuf data
     if (!bboxMessage.ParseFromIstream(&input))
     {
         cerr << "Failed to parse protobuf message." << endl;

diff --git a/src/TrackedObjectBBox.h b/src/TrackedObjectBBox.h
@@ -174,7 +174,7 @@ namespace openshot
         TrackedObjectBBox();
 
         /// Add a BBox to the BoxVec map
-        void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle);
+        void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override;
 
         /// Update object's BaseFps
         void SetBaseFPS(Fraction fps);

diff --git a/src/TrackedObjectBase.h b/src/TrackedObjectBase.h
@@ -83,7 +83,10 @@ namespace openshot {
 		virtual std::map<std::string, float> GetBoxValues(int64_t frame_number) const { std::map<std::string, float> ret; return ret; };
         /// Return the main properties of the tracked object's parent clip
         virtual std::map<std::string, float> GetParentClipProperties(int64_t frame_number) const { std::map<std::string, float> ret; return ret; }
-
+		/// Add a bounding box to the tracked object's BoxVec map
+		virtual void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) { return; };
+
+
 		/// Get and Set JSON methods
         virtual std::string Json() const = 0;                  ///< Generate JSON string of this object
         virtual Json::Value JsonValue() const = 0;             ///< Generate Json::Value for this object

diff --git a/src/effects/ObjectDetection.cpp b/src/effects/ObjectDetection.cpp
@@ -28,12 +28,15 @@
  * along with OpenShot Library. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <string>
+
 #include "effects/ObjectDetection.h"
 #include "effects/Tracker.h"
 
 using namespace std;
 using namespace openshot;
 
+
 /// Blank constructor, useful when using Json to load the effect properties
 ObjectDetection::ObjectDetection(std::string clipObDetectDataPath)
 {
@@ -64,7 +67,7 @@ void ObjectDetection::init_effect_details()
 	info.description = "Detect objects through the video.";
 	info.has_audio = false;
 	info.has_video = true;
-    info.has_tracked_object = false;
+    info.has_tracked_object = true;
 }
 
 // This method is required for all derived classes of EffectBase, and returns a
@@ -92,7 +95,7 @@ std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, i
                            (int)(bb_nrml.width*fw),
                            (int)(bb_nrml.height*fh));
             drawPred(detections.classIds.at(i), detections.confidences.at(i),
-                     box, cv_image);
+                     box, cv_image, detections.objectIds.at(i));
         }
     }
 
@@ -103,7 +106,7 @@ std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, i
 	return frame;
 }
 
-void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame)
+void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber)
 {
 
     //Draw a rectangle displaying the bounding box
@@ -133,29 +136,32 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
     // Create tracker message
     pb_objdetect::ObjDetect objMessage;
 
-    {
-        // Read the existing tracker message.
-        fstream input(inputFilePath, ios::in | ios::binary);
-        if (!objMessage.ParseFromIstream(&input)) {
-            cerr << "Failed to parse protobuf message." << endl;
-            return false;
-        }
+
+    // Read the existing tracker message.
+    fstream input(inputFilePath, ios::in | ios::binary);
+    if (!objMessage.ParseFromIstream(&input)) {
+        cerr << "Failed to parse protobuf message." << endl;
+        return false;
     }
+
 
-    // Make sure classNames and detectionsData are empty
+    // Make sure classNames, detectionsData and trackedObjects are empty
     classNames.clear();
     detectionsData.clear();
+    trackedObjects.clear();
 
     // Seed to generate same random numbers
     std::srand(1);
     // Get all classes names and assign a color to them
-    for(int i = 0; i < objMessage.classnames_size(); i++){
+    for(int i = 0; i < objMessage.classnames_size(); i++)
+    {
         classNames.push_back(objMessage.classnames(i));
         classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
     }
 
     // Iterate over all frames of the saved message
-    for (size_t i = 0; i < objMessage.frame_size(); i++) {
+    for (size_t i = 0; i < objMessage.frame_size(); i++)
+    {
         // Create protobuf message reader
         const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
 
@@ -169,8 +175,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
         std::vector<int> classIds;
         std::vector<float> confidences;
         std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> objectIds;
 
-        for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
+        // Iterate through the detected objects
+        for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
+        {
             // Get bounding box coordinates
             float x = pBox.Get(i).x();
             float y = pBox.Get(i).y();
@@ -180,6 +189,26 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
             int classId = pBox.Get(i).classid();
             // Get prediction confidence
             float confidence = pBox.Get(i).confidence();
+
+            // Get the object Id
+            int objectId = pBox.Get(i).objectid();
+
+            // Search for the object id on trackedObjects map
+            auto trackedObject = trackedObjects.find(objectId);
+            // Check if object already exists on the map
+            if (trackedObject != trackedObjects.end())
+            {
+                // Add a new BBox to it
+                trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
+            } 
+            else
+            {
+                // There is no tracked object with that id, so insert a new one
+                TrackedObjectBBox trackedObj;
+                trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
+	            std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
+                trackedObjects.insert({objectId, trackedObjPtr});
+            }
 
             // Create OpenCV rectangle with the bouding box info
             cv::Rect_<float> box(x, y, w, h);
@@ -188,10 +217,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
             boxes.push_back(box);
             classIds.push_back(classId);
             confidences.push_back(confidence);
+            objectIds.push_back(objectId);
         }
 
         // Assign data to object detector map
-        detectionsData[id] = DetectionData(classIds, confidences, boxes, id);
+        detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
     }
 
     // Delete all global objects allocated by libprotobuf.
@@ -226,6 +256,12 @@ Json::Value ObjectDetection::JsonValue() const {
 	Json::Value root = EffectBase::JsonValue(); // get parent properties
 	root["type"] = info.class_name;
 	root["protobuf_data_path"] = protobuf_data_path;
+
+    // Add trackedObjects IDs to JSON
+	for (auto const& trackedObject : trackedObjects){
+		// Save the trackedObject Id on root
+        root["box_id"+to_string(trackedObject.first)] = trackedObject.second->Id();
+	}
 
 	// return JsonValue
 	return root;
@@ -262,13 +298,27 @@ void ObjectDetection::SetJsonValue(const Json::Value root) {
 			protobuf_data_path = "";
 		}
 	}
+
+    for (auto const& trackedObject : trackedObjects){
+        Json::Value trackedObjectJSON;
+        trackedObjectJSON["box_id"] = root["box_id"+to_string(trackedObject.first)];
+		trackedObject.second->SetJsonValue(trackedObjectJSON);
+	}
 }
 
 // Get all properties for a specific frame
 std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
 
 	// Generate JSON properties list
 	Json::Value root;
+
+    // Add trackedObjects IDs to JSON
+	for (auto const& trackedObject : trackedObjects){
+		// Save the trackedObject Id on root
+        Json::Value trackedObjectJSON = trackedObject.second->PropertiesJSON(requested_frame);
+        root["box_id"+to_string(trackedObject.first)] = trackedObjectJSON["box_id"];
+	}
+
 	root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame);
 	root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
 	root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame);