Skip to content

Commit

Permalink
Added normalized bboxes for tracker and object detector
Browse files Browse the repository at this point in the history
Also added tests for ObjectDetection and several bug fixes
  • Loading branch information
BrennoCaldato committed Jul 29, 2020
1 parent 88a0c37 commit 483f288
Show file tree
Hide file tree
Showing 19 changed files with 377 additions and 124 deletions.
12 changes: 10 additions & 2 deletions include/CVObjectDetection.h
Expand Up @@ -44,11 +44,13 @@
#include "Clip.h"
#include "objdetectdata.pb.h"

#include "../src/sort_filter/sort.hpp"

using google::protobuf::util::TimeUtil;

struct CVDetectionData{
CVDetectionData(){}
CVDetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect> _boxes, size_t _frameId){
CVDetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect_<float>> _boxes, size_t _frameId){
classIds = _classIds;
confidences = _confidences;
boxes = _boxes;
Expand All @@ -57,7 +59,7 @@ struct CVDetectionData{
size_t frameId;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<cv::Rect_<float>> boxes;
};

class CVObjectDetection{
Expand All @@ -74,6 +76,8 @@ class CVObjectDetection{
std::string processingDevice;
std::string protobuf_data_path;

SortTracker sort;

uint progress;

size_t start;
Expand All @@ -86,6 +90,8 @@ class CVObjectDetection{

void DetectObjects(const cv::Mat &frame, size_t frame_number);

bool iou(cv::Rect pred_box, cv::Rect sort_box);

// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(const cv::Size &frameDims, const std::vector<cv::Mat>& out, size_t frame_number);

Expand All @@ -100,6 +106,8 @@ class CVObjectDetection{

void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false);

CVDetectionData GetDetectionData(size_t frameId);

/// Protobuf Save and Load methods
// Save protobuf file
bool SaveTrackedData();
Expand Down
3 changes: 1 addition & 2 deletions include/CVStabilization.h
Expand Up @@ -86,8 +86,7 @@ class CVStabilization {
size_t end;

cv::Mat last_T;
cv::Mat cur, cur_grey;
cv::Mat prev, prev_grey;
cv::Mat prev_grey;
std::vector <TransformParam> prev_to_cur_transform; // Previous to current
std::string protobuf_data_path;

Expand Down
24 changes: 7 additions & 17 deletions include/CVTracker.h
Expand Up @@ -28,10 +28,10 @@ using google::protobuf::util::TimeUtil;
struct FrameData{
size_t frame_id = -1;
float rotation = 0;
int x1 = -1;
int y1 = -1;
int x2 = -1;
int y2 = -1;
float x1 = -1;
float y1 = -1;
float x2 = -1;
float y2 = -1;

// Constructors
FrameData()
Expand All @@ -40,7 +40,7 @@ struct FrameData{
FrameData( size_t _frame_id)
{frame_id = _frame_id;}

FrameData( size_t _frame_id , float _rotation, int _x1, int _y1, int _x2, int _y2)
FrameData( size_t _frame_id , float _rotation, float _x1, float _y1, float _x2, float _y2)
{
frame_id = _frame_id;
rotation = _rotation;
Expand All @@ -51,24 +51,14 @@ struct FrameData{
}
};

class RemoveJitter{
private:
std::vector<cv::Rect2d> bboxTracker;
int boxesInterval;
int boxesInVector;

public:
RemoveJitter(int boxesInterval);
void update(cv::Rect2d bbox, cv::Rect2d &out_bbox);
};

class CVTracker {
private:
std::map<size_t, FrameData> trackedDataById; // Save tracked data
std::string trackerType; // Name of the chosen tracker
cv::Ptr<cv::Tracker> tracker; // Pointer of the selected tracker

cv::Rect2d bbox; // Bounding box coords
SortTracker sort;

std::string protobuf_data_path; // Path to protobuf data file

Expand All @@ -86,7 +76,7 @@ class CVTracker {
bool initTracker(cv::Mat &frame, size_t frameId);

// Update the object tracker according to frame
bool trackFrame(cv::Mat &frame, size_t frameId, SortTracker &sort, RemoveJitter &removeJitter);
bool trackFrame(cv::Mat &frame, size_t frameId);

public:

Expand Down
4 changes: 2 additions & 2 deletions include/effects/ObjectDetection.h
Expand Up @@ -43,7 +43,7 @@

struct DetectionData{
DetectionData(){}
DetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect> _boxes, size_t _frameId){
DetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect_<float>> _boxes, size_t _frameId){
classIds = _classIds;
confidences = _confidences;
boxes = _boxes;
Expand All @@ -52,7 +52,7 @@ struct DetectionData{
size_t frameId;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<cv::Rect_<float>> boxes;
};

namespace openshot
Expand Down
10 changes: 5 additions & 5 deletions include/effects/Tracker.h
Expand Up @@ -52,10 +52,10 @@ using google::protobuf::util::TimeUtil;
struct EffectFrameData{
size_t frame_id = -1;
float rotation = 0;
int x1 = -1;
int y1 = -1;
int x2 = -1;
int y2 = -1;
float x1 = -1;
float y1 = -1;
float x2 = -1;
float y2 = -1;

// Constructors
EffectFrameData()
Expand All @@ -64,7 +64,7 @@ struct EffectFrameData{
EffectFrameData( int _frame_id)
{frame_id = _frame_id;}

EffectFrameData( int _frame_id , float _rotation, int _x1, int _y1, int _x2, int _y2)
EffectFrameData( int _frame_id , float _rotation, float _x1, float _y1, float _x2, float _y2)
{
frame_id = _frame_id;
rotation = _rotation;
Expand Down
110 changes: 108 additions & 2 deletions src/CVObjectDetection.cpp
Expand Up @@ -40,7 +40,6 @@
CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
: processingController(&processingController), processingDevice("CPU"){
SetJson(processInfoJson);
setProcessingDevice();
}

void CVObjectDetection::setProcessingDevice(){
Expand Down Expand Up @@ -70,7 +69,10 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start,
nmsThreshold = 0.1;

// Load the network
if(classesFile == "" || modelConfiguration == "" || modelWeights == "")
return;
net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
setProcessingDevice();

size_t frame_number;
if(!process_interval || end == 0 || end-start <= 0){
Expand Down Expand Up @@ -164,7 +166,100 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);

detectionsData[frameId] = CVDetectionData(classIds, confidences, boxes, frameId);
// std::vector<cv::Rect> sortBoxes;
// for(auto box : boxes)
// sortBoxes.push_back(box);
// sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)));


// sortBoxes.clear();
// for(auto TBox : sort.frameTrackingResult)
// if(TBox.frame == frameId){
// sortBoxes.push_back(TBox.box);
// }

// for(int i = 0; i<boxes.size(); i++){
// bool found = false;
// for(int j = 0; j<sortBoxes.size(); j++){
// if( iou(boxes[i], sortBoxes[j]) ){
// boxes[i] = sortBoxes[j];
// sortBoxes.erase(sortBoxes.begin() + j);
// found = true;
// break;
// }
// }
// if(!found){
// boxes.erase(boxes.begin() + i);
// confidences.erase(confidences.begin() + i);
// classIds.erase(classIds.begin() + i);
// }
// }



// std::map<int, std::vector<cv::Rect> > rectAndClasses;
// for(int i=0; i<boxes.size(); i++){
// if(rectAndClasses.find(classIds[i]) == rectAndClasses.end()){
// std::vector<cv::Rect> bboxes;
// rectAndClasses[classIds[i]] = bboxes;
// }

// rectAndClasses[classIds[i]].push_back(boxes[i]);
// }

// for(std::map<int, std::vector<cv::Rect> >::iterator it = rectAndClasses.begin(); it != rectAndClasses.end(); it++){
// if(sort.find(it->first) == sort.end()){
// SortTracker classTracker;
// sort[it->first] = classTracker;
// }
// sort[it->first].update(it->second, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)));
// }

// classIds.clear(); boxes.clear(); confidences.clear();

// for(std::map<int, SortTracker>::iterator it = sort.begin(); it != sort.end(); it++){
// for(auto TBox : it->second.frameTrackingResult){
// boxes.push_back(TBox.box);
// classIds.push_back(it->first);
// confidences.push_back(1);
// }
// }


std::vector<cv::Rect_<float>> normalized_boxes;
for(auto box : boxes){
cv::Rect_<float> normalized_box;
normalized_box.x = (box.x)/(float)frameDims.width;
normalized_box.y = (box.y)/(float)frameDims.height;
normalized_box.width = (box.x+box.width)/(float)frameDims.width;
normalized_box.height = (box.y+box.height)/(float)frameDims.height;
normalized_boxes.push_back(normalized_box);
}

detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId);
}

bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
// determine the (x, y)-coordinates of the intersection rectangle
int xA = std::max(pred_box.x, sort_box.x);
int yA = std::max(pred_box.y, sort_box.y);
int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);

// compute the area of intersection rectangle
int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
// compute the area of both the prediction and ground-truth
// rectangles
int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
// compute the intersection over union by taking the intersection
// area and dividing it by the sum of prediction + ground-truth
// areas - the interesection area
float iou = interArea / (float)(boxAArea + boxBArea - interArea);

if(iou > 0.75)
return true;
return false;
}

// Get the names of the output layers
Expand All @@ -185,6 +280,17 @@ std::vector<cv::String> CVObjectDetection::getOutputsNames(const cv::dnn::Net& n
return names;
}

CVDetectionData CVObjectDetection::GetDetectionData(size_t frameId){
// Check if the stabilizer info for the requested frame exists
if ( detectionsData.find(frameId) == detectionsData.end() ) {

return CVDetectionData();
} else {

return detectionsData[frameId];
}
}

bool CVObjectDetection::SaveTrackedData(){
// Create tracker message
libopenshotobjdetect::ObjDetect objMessage;
Expand Down
41 changes: 30 additions & 11 deletions src/CVStabilization.cpp
Expand Up @@ -82,6 +82,12 @@ void CVStabilization::stabilizeClip(openshot::Clip& video, size_t _start, size_t

// Track current frame features and find the relative transformation
void CVStabilization::TrackFrameFeatures(cv::Mat frame, size_t frameNum){
std::cout<<"frame "<<frameNum<<"\n";
if(cv::countNonZero(frame) < 1){
last_T = cv::Mat();
prev_grey = cv::Mat();
return;
}

if(prev_grey.empty()){
prev_grey = frame;
Expand All @@ -93,7 +99,6 @@ void CVStabilization::TrackFrameFeatures(cv::Mat frame, size_t frameNum){
std::vector <cv::Point2f> prev_corner2, cur_corner2;
std::vector <uchar> status;
std::vector <float> err;

// Extract new image features
cv::goodFeaturesToTrack(prev_grey, prev_corner, 200, 0.01, 30);
// Track features
Expand All @@ -105,23 +110,37 @@ void CVStabilization::TrackFrameFeatures(cv::Mat frame, size_t frameNum){
cur_corner2.push_back(cur_corner[i]);
}
}
// In case no feature was detected
if(prev_corner2.empty() || cur_corner2.empty()){
last_T = cv::Mat();
prev_grey = cv::Mat();
return;
}

// Translation + rotation only
cv::Mat T = estimateRigidTransform(prev_corner2, cur_corner2, false); // false = rigid transform, no scaling/shearing
cv::Mat T = estimateAffinePartial2D(prev_corner2, cur_corner2); // false = rigid transform, no scaling/shearing

// If no transformation is found, just use the last known good transform.
if(T.data == NULL) {
last_T.copyTo(T);
double da, dx, dy;
if(T.size().width == 0 || T.size().height == 0){

dx = 0;
dy = 0;
da = 0;
}
else{
// If no transformation is found, just use the last known good transform.
if(T.data == NULL && !last_T.empty())
last_T.copyTo(T);
// Decompose T
dx = T.at<double>(0,2);
dy = T.at<double>(1,2);
da = atan2(T.at<double>(1,0), T.at<double>(0,0));
}

T.copyTo(last_T);
// Decompose T
double dx = T.at<double>(0,2);
double dy = T.at<double>(1,2);
double da = atan2(T.at<double>(1,0), T.at<double>(0,0));

prev_to_cur_transform.push_back(TransformParam(dx, dy, da));

cur.copyTo(prev);
std::cout<<"10\n";
frame.copyTo(prev_grey);

// Show processing info
Expand Down

0 comments on commit 483f288

Please sign in to comment.