From 7e5810cc711983cb8447f37ee95e8b1062f73f16 Mon Sep 17 00:00:00 2001 From: gineshidalgo99 Date: Sun, 15 Jan 2017 23:49:08 -0500 Subject: [PATCH] Merged data_transformer/proto with train repository --- include/caffe/data_transformer.hpp | 85 +- src/caffe/data_transformer.cpp | 2024 +++++++++++++++++++++++++++- src/caffe/proto/caffe.proto | 29 + 3 files changed, 2126 insertions(+), 12 deletions(-) diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp index 420a4bc2..05500725 100644 --- a/include/caffe/data_transformer.hpp +++ b/include/caffe/data_transformer.hpp @@ -2,16 +2,14 @@ #define CAFFE_DATA_TRANSFORMER_HPP #include -#include -#include -using namespace cv; - -#include #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" +// CPM extra code: extra includes +#include + namespace caffe { /** @@ -58,10 +56,10 @@ class DataTransformer { #ifdef USE_OPENCV /** * @brief Applies the transformation defined in the data layer's - * transform_param block to a vector of Mat. + * transform_param block to a vector of cv::Mat. * * @param mat_vector - * A vector of Mat containing the data to be transformed. + * A vector of cv::Mat containing the data to be transformed. * @param transformed_blob * This is destination blob. It can be part of top blob's data if * set_cpu_data() is used. See memory_layer.cpp for an example. @@ -118,7 +116,7 @@ class DataTransformer { * It uses the first element to infer the shape of the blob. * * @param mat_vector - * A vector of Mat containing the data to be transformed. + * A vector of cv::Mat containing the data to be transformed. */ #ifdef USE_OPENCV vector InferBlobShape(const vector & mat_vector); @@ -147,10 +145,81 @@ class DataTransformer { // Tranformation parameters TransformationParameter param_; + shared_ptr rng_; Phase phase_; Blob data_mean_; vector mean_values_; + +// CPM extra code: public and protected methods/structs/etc below +public: + void Transform_nv(const Datum& datum, Blob* transformed_blob, Blob* transformed_label_blob, int cnt); //image and label + + struct AugmentSelection { + bool flip; + float degree; + cv::Size crop; + float scale; + }; + + struct Joints { + vector joints; + vector isVisible; + }; + + struct MetaData { + string dataset; + cv::Size img_size; + bool isValidation; + int numOtherPeople; + int people_index; + int annolist_index; + int write_number; + int total_write_number; + int epoch; + cv::Point2f objpos; //objpos_x(float), objpos_y (float) + float scale_self; + Joints joint_self; //(3*16) + + vector objpos_other; //length is numOtherPeople + vector scale_other; //length is numOtherPeople + vector joint_others; //length is numOtherPeople + }; + + void generateLabelMap(Dtype*, cv::Mat&, MetaData meta); + void visualize(cv::Mat& img, MetaData meta, AugmentSelection as); + + bool augmentation_flip(cv::Mat& img, cv::Mat& img_aug, MetaData& meta); + float augmentation_rotate(cv::Mat& img_src, cv::Mat& img_aug, MetaData& meta); + float augmentation_scale(cv::Mat& img, cv::Mat& img_temp, MetaData& meta); + cv::Size augmentation_croppad(cv::Mat& img_temp, cv::Mat& img_aug, MetaData& meta); + + bool augmentation_flip(cv::Mat& img, cv::Mat& img_aug, cv::Mat& mask_miss, cv::Mat& mask_all, MetaData& meta, int mode); + float augmentation_rotate(cv::Mat& img_src, cv::Mat& img_aug, cv::Mat& mask_miss, cv::Mat& mask_all, MetaData& meta, int mode); + float augmentation_scale(cv::Mat& img, cv::Mat& img_temp, cv::Mat& mask_miss, cv::Mat& mask_all, MetaData& meta, int mode); + cv::Size augmentation_croppad(cv::Mat& img_temp, cv::Mat& img_aug, cv::Mat& mask_miss, cv::Mat& mask_miss_aug, cv::Mat& mask_all, cv::Mat& mask_all_aug, MetaData& meta, int mode); + + void RotatePoint(cv::Point2f& p, cv::Mat R); + bool onPlane(cv::Point p, cv::Size img_size); + void swapLeftRight(Joints& j); + void SetAugTable(int numData); + + int np_in_lmdb; + int np; + bool is_table_set; + vector > aug_degs; + vector > aug_flips; + +protected: + void Transform_nv(const Datum& datum, Dtype* transformed_data, Dtype* transformed_label, int cnt); + void ReadMetaData(MetaData& meta, const string& data, size_t offset3, size_t offset1); + void TransformMetaJoints(MetaData& meta); + void TransformJoints(Joints& joints); + void clahe(cv::Mat& img, int, int); + void putGaussianMaps(Dtype* entry, cv::Point2f center, int stride, int grid_x, int grid_y, float sigma); + void putVecMaps(Dtype* entryX, Dtype* entryY, cv::Mat& count, cv::Point2f centerA, cv::Point2f centerB, int stride, int grid_x, int grid_y, float sigma, int thre); + void putVecPeaks(Dtype* entryX, Dtype* entryY, cv::Mat& count, cv::Point2f centerA, cv::Point2f centerB, int stride, int grid_x, int grid_y, float sigma, int thre); + void dumpEverything(Dtype* transformed_data, Dtype* transformed_label, MetaData); }; } // namespace caffe diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 981c683b..2720f98f 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -10,10 +10,19 @@ #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp" +// CPM extra code: extra includes +#include +#include +#include +#include +// CPM end extra code + namespace caffe { template -DataTransformer::DataTransformer(const TransformationParameter& param, Phase phase) : param_(param), phase_(phase) { +DataTransformer::DataTransformer(const TransformationParameter& param, + Phase phase) + : param_(param), phase_(phase) { // check if we want to use mean_file if (param_.has_mean_file()) { CHECK_EQ(param_.mean_value_size(), 0) << @@ -34,10 +43,19 @@ DataTransformer::DataTransformer(const TransformationParameter& param, Ph mean_values_.push_back(param_.mean_value(c)); } } + + // CPM extra code: + LOG(INFO) << "DataTransformer constructor done."; + np_in_lmdb = param_.np_in_lmdb(); + LOG(INFO) << "np_in_lmdb" << np_in_lmdb; + np = param_.num_parts(); + is_table_set = false; + // CPM end extra code } template -void DataTransformer::Transform(const Datum& datum, Dtype* transformed_data) { +void DataTransformer::Transform(const Datum& datum, + Dtype* transformed_data) { const string& data = datum.data(); const int datum_channels = datum.channels(); const int datum_height = datum.height(); @@ -123,8 +141,10 @@ void DataTransformer::Transform(const Datum& datum, Dtype* transformed_da } } + template -void DataTransformer::Transform(const Datum& datum, Blob* transformed_blob) { +void DataTransformer::Transform(const Datum& datum, + Blob* transformed_blob) { // If datum is encoded, decoded and transform the cv::image. if (datum.encoded()) { #ifdef USE_OPENCV @@ -535,9 +555,2005 @@ int DataTransformer::Rand(int n) { return ((*rng)() % n); } +INSTANTIATE_CLASS(DataTransformer); + + + + + +// CPM extra code: public and protected methods/structs/etc below +template +void DecodeFloats(const string& data, size_t idx, Dtype* pf, size_t len) { + memcpy(pf, const_cast(&data[idx]), len * sizeof(Dtype)); +} + +string DecodeString(const string& data, size_t idx) { + string result = ""; + int i = 0; + while(data[idx+i] != 0){ + result.push_back(char(data[idx+i])); + i++; + } + return result; +} + +template +void DataTransformer::ReadMetaData(MetaData& meta, const string& data, size_t offset3, size_t offset1) { //very specific to genLMDB.py + // ------------------- Dataset name ---------------------- + meta.dataset = DecodeString(data, offset3); + // ------------------- Image Dimension ------------------- + float height, width; + DecodeFloats(data, offset3+offset1, &height, 1); + DecodeFloats(data, offset3+offset1+4, &width, 1); + meta.img_size = cv::Size(width, height); + // ----------- Validation, nop, counters ----------------- + meta.isValidation = (data[offset3+2*offset1]==0 ? false : true); + meta.numOtherPeople = (int)data[offset3+2*offset1+1]; + meta.people_index = (int)data[offset3+2*offset1+2]; + float annolist_index; + DecodeFloats(data, offset3+2*offset1+3, &annolist_index, 1); + meta.annolist_index = (int)annolist_index; + float write_number; + DecodeFloats(data, offset3+2*offset1+7, &write_number, 1); + meta.write_number = (int)write_number; + float total_write_number; + DecodeFloats(data, offset3+2*offset1+11, &total_write_number, 1); + meta.total_write_number = (int)total_write_number; + + // count epochs according to counters + static int cur_epoch = -1; + if(meta.write_number == 0){ + cur_epoch++; + } + meta.epoch = cur_epoch; + if(meta.write_number % 1000 == 0){ + LOG(INFO) << "dataset: " << meta.dataset <<"; img_size: " << meta.img_size + << "; meta.annolist_index: " << meta.annolist_index << "; meta.write_number: " << meta.write_number + << "; meta.total_write_number: " << meta.total_write_number << "; meta.epoch: " << meta.epoch; + } + //LOG(INFO) << "np_in_lmdb" << np_in_lmdb; + if(param_.aug_way() == "table" && !is_table_set){ + SetAugTable(meta.total_write_number); + is_table_set = true; + } + + // ------------------- objpos ----------------------- + DecodeFloats(data, offset3+3*offset1, &meta.objpos.x, 1); + DecodeFloats(data, offset3+3*offset1+4, &meta.objpos.y, 1); + meta.objpos -= cv::Point2f(1,1); + // ------------ scale_self, joint_self -------------- + DecodeFloats(data, offset3+4*offset1, &meta.scale_self, 1); + meta.joint_self.joints.resize(np_in_lmdb); + meta.joint_self.isVisible.resize(np_in_lmdb); + for(int i=0; i= meta.img_size.width || meta.joint_self.joints[i].y >= meta.img_size.height){ + meta.joint_self.isVisible[i] = 2; // 2 means cropped, 0 means occluded by still on image + } + } + //LOG(INFO) << meta.joint_self.joints[i].x << " " << meta.joint_self.joints[i].y << " " << meta.joint_self.isVisible[i]; + } + + //others (7 lines loaded) + meta.objpos_other.resize(meta.numOtherPeople); + meta.scale_other.resize(meta.numOtherPeople); + meta.joint_others.resize(meta.numOtherPeople); + for(int p=0; p= meta.img_size.width || meta.joint_others[p].joints[i].y >= meta.img_size.height){ + meta.joint_others[p].isVisible[i] = 2; // 2 means cropped, 1 means occluded by still on image + } + //LOG(INFO) << meta.joint_others[p].joints[i].x << " " << meta.joint_others[p].joints[i].y << " " << meta.joint_others[p].isVisible[i]; + } + } +} + +template +void DataTransformer::SetAugTable(int numData){ + aug_degs.resize(numData); + aug_flips.resize(numData); + for(int i = 0; i < numData; i++){ + aug_degs[i].resize(param_.num_total_augs()); + aug_flips[i].resize(param_.num_total_augs()); + } + //load table files + char filename[100]; + sprintf(filename, "../../rotate_%d_%d.txt", param_.num_total_augs(), numData); + std::ifstream rot_file(filename); + char filename2[100]; + sprintf(filename2, "../../flip_%d_%d.txt", param_.num_total_augs(), numData); + std::ifstream flip_file(filename2); + + for(int i = 0; i < numData; i++){ + for(int j = 0; j < param_.num_total_augs(); j++){ + rot_file >> aug_degs[i][j]; + flip_file >> aug_flips[i][j]; + } + } + //debug + // for(int i = 0; i < numData; i++){ + // for(int j = 0; j < param_.num_total_augs(); j++){ + // printf("%d ", (int)aug_degs[i][j]); + // } + // printf("\n"); + // } +} + +template +void DataTransformer::TransformMetaJoints(MetaData& meta) { + //transform joints in meta from np_in_lmdb (specified in prototxt) to np (specified in prototxt) + TransformJoints(meta.joint_self); + for(int i=0;i +void DataTransformer::TransformJoints(Joints& j) { + //transform joints in meta from np_in_lmdb (specified in prototxt) to np (specified in prototxt) + //MPII R leg: 0(ankle), 1(knee), 2(hip) + // L leg: 5(ankle), 4(knee), 3(hip) + // R arms: 10(wrist), 11(elbow), 12(shoulder) + // L arms: 15(wrist), 14(elbow), 13(shoulder) + // 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top + //LOG(INFO) << "TransformJoints: here np == " << np << " np_lmdb = " << np_in_lmdb << " joints.size() = " << j.joints.size(); + //assert(joints.size() == np_in_lmdb); + //assert(np == 14 || np == 28); + Joints jo = j; + if(np == 14){ + int MPI_to_ours[14] = {9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5}; + jo.joints.resize(np); + jo.isVisible.resize(np); + for(int i=0;i +void DataTransformer::Transform_nv(const Datum& datum, Blob* transformed_data, Blob* transformed_label, int cnt) { + //std::cout << "Function 2 is used"; std::cout.flush(); + // int offset = datum.height()*datum.width(); + // int offset3 = 3 * offset; + // int offset1 = datum.width(); + // MetaData meta; + // ReadMetaData(meta, datum.data(), offset3, offset1); + // LOG(INFO) << "dataset: " << meta.dataset <<"; img_size: " << meta.img_size + // << "; meta.annolist_index: " << meta.annolist_index; + + const int datum_channels = datum.channels(); + //LOG(INFO) << datum.channels(); + //const int datum_height = datum.height(); + //const int datum_width = datum.width(); + + const int im_channels = transformed_data->channels(); + //LOG(INFO) << im_channels; + //const int im_height = transformed_data->height(); + //const int im_width = transformed_data->width(); + const int im_num = transformed_data->num(); + + //const int lb_channels = transformed_label->channels(); + //const int lb_height = transformed_label->height(); + //const int lb_width = transformed_label->width(); + const int lb_num = transformed_label->num(); + + //LOG(INFO) << "image shape: " << transformed_data->num() << " " << transformed_data->channels() << " " + // << transformed_data->height() << " " << transformed_data->width(); + //LOG(INFO) << "label shape: " << transformed_label->num() << " " << transformed_label->channels() << " " + // << transformed_label->height() << " " << transformed_label->width(); + + CHECK_EQ(datum_channels, 6); + CHECK_EQ(im_channels, 6); + //CHECK_EQ(im_channels, 4); + //CHECK_EQ(datum_channels, 4); + //CHECK_EQ(im_channels, 5); + //CHECK_EQ(datum_channels, 5); + + CHECK_EQ(im_num, lb_num); + //CHECK_LE(im_height, datum_height); + //CHECK_LE(im_width, datum_width); + CHECK_GE(im_num, 1); + + //const int crop_size = param_.crop_size(); + + // if (crop_size) { + // CHECK_EQ(crop_size, im_height); + // CHECK_EQ(crop_size, im_width); + // } else { + // CHECK_EQ(datum_height, im_height); + // CHECK_EQ(datum_width, im_width); + // } + + Dtype* transformed_data_pointer = transformed_data->mutable_cpu_data(); + Dtype* transformed_label_pointer = transformed_label->mutable_cpu_data(); + + Transform_nv(datum, transformed_data_pointer, transformed_label_pointer, cnt); //call function 1 +} + +template +void DataTransformer::Transform_nv(const Datum& datum, Dtype* transformed_data, Dtype* transformed_label, int cnt) { + + //TODO: some parameter should be set in prototxt + int clahe_tileSize = param_.clahe_tile_size(); + int clahe_clipLimit = param_.clahe_clip_limit(); + //float targetDist = 41.0/35.0; + AugmentSelection as = { + false, + 0.0, + cv::Size(), + 0, + }; + MetaData meta; + + const string& data = datum.data(); + const int datum_channels = datum.channels(); + //LOG(INFO) << datum.channels(); + const int datum_height = datum.height(); + const int datum_width = datum.width(); + // To do: make this a parameter in caffe.proto + const int mode = 5; //related to datum.channels(); + + //const int crop_size = param_.crop_size(); + //const Dtype scale = param_.scale(); + //const bool do_mirror = param_.mirror() && Rand(2); + //const bool has_mean_file = param_.has_mean_file(); + const bool has_uint8 = data.size() > 0; + //const bool has_mean_values = mean_values_.size() > 0; + int crop_x = param_.crop_size_x(); + int crop_y = param_.crop_size_y(); + + CHECK_GT(datum_channels, 0); + //CHECK_GE(datum_height, crop_size); + //CHECK_GE(datum_width, crop_size); + + //before any transformation, get the image from datum + cv::Mat img = cv::Mat::zeros(datum_height, datum_width, CV_8UC3); + cv::Mat mask_all, mask_miss; + if(mode >= 5){ + mask_miss = cv::Mat::ones(datum_height, datum_width, CV_8UC1); + } + if(mode == 6){ + mask_all = cv::Mat::zeros(datum_height, datum_width, CV_8UC1); + } + + int offset = img.rows * img.cols; + int dindex; + Dtype d_element; + for (int i = 0; i < img.rows; ++i) { + for (int j = 0; j < img.cols; ++j) { + cv::Vec3b& rgb = img.at(i, j); + for(int c = 0; c < 3; c++){ + dindex = c*offset + i*img.cols + j; + if (has_uint8) + d_element = static_cast(static_cast(data[dindex])); + else + d_element = datum.float_data(dindex); + rgb[c] = d_element; + } + + if(mode >= 5){ + dindex = 4*offset + i*img.cols + j; + if (has_uint8) + d_element = static_cast(static_cast(data[dindex])); + else + d_element = datum.float_data(dindex); + if (round(d_element/255)!=1 && round(d_element/255)!=0){ + std::cout << d_element << " " << round(d_element/255) << std::endl; + } + mask_miss.at(i, j) = d_element; //round(d_element/255); + } + + if(mode == 6){ + dindex = 5*offset + i*img.cols + j; + if (has_uint8) + d_element = static_cast(static_cast(data[dindex])); + else + d_element = datum.float_data(dindex); + mask_all.at(i, j) = d_element; + } + } + } + + //testing image + //imshow("mask_miss",mask_miss); + //imshow("mask_all",mask_all); + // if(mode >= 5){ + // cv::Mat erosion_dst; + // int erosion_size = 1; + // mask_miss = 1.0/255 *mask_miss; + // cv::Mat element = getStructuringElement( MORPH_ELLIPSE, + // cv::Size( 2*erosion_size + 1, 2*erosion_size+1 ), + // cv::Point( erosion_size, erosion_size ) ); + // erode( mask_miss, erosion_dst, element ); + // erosion_dst = 255 *erosion_dst; + // imshow( "Erosion Demo", erosion_dst ); + // } + + + //color, contract + if(param_.do_clahe()) + clahe(img, clahe_tileSize, clahe_clipLimit); + if(param_.gray() == 1){ + cv::cvtColor(img, img, CV_BGR2GRAY); + cv::cvtColor(img, img, CV_GRAY2BGR); + } + + int offset3 = 3 * offset; + int offset1 = datum_width; + int stride = param_.stride(); + ReadMetaData(meta, data, offset3, offset1); + if(param_.transform_body_joint()) // we expect to transform body joints, and not to transform hand joints + TransformMetaJoints(meta); + + //visualize original + if(0 && param_.visualize()) + visualize(img, meta, as); + + //Start transforming + cv::Mat img_aug = cv::Mat::zeros(crop_y, crop_x, CV_8UC3); + cv::Mat mask_miss_aug, mask_all_aug ; + //cv::Mat mask_miss_aug = cv::Mat::zeros(crop_y, crop_x, CV_8UC1); + //cv::Mat mask_all_aug = cv::Mat::zeros(crop_y, crop_x, CV_8UC1); + cv::Mat img_temp, img_temp2, img_temp3; //size determined by scale + // We only do random transform as augmentation when training. + if (phase_ == TRAIN) { + as.scale = augmentation_scale(img, img_temp, mask_miss, mask_all, meta, mode); + //LOG(INFO) << meta.joint_self.joints.size(); + //LOG(INFO) << meta.joint_self.joints[0]; + as.degree = augmentation_rotate(img_temp, img_temp2, mask_miss, mask_all, meta, mode); + //LOG(INFO) << meta.joint_self.joints.size(); + //LOG(INFO) << meta.joint_self.joints[0]; + if(0 && param_.visualize()) + visualize(img_temp2, meta, as); + as.crop = augmentation_croppad(img_temp2, img_temp3, mask_miss, mask_miss_aug, mask_all, mask_all_aug, meta, mode); + //LOG(INFO) << meta.joint_self.joints.size(); + //LOG(INFO) << meta.joint_self.joints[0]; + if(0 && param_.visualize()) + visualize(img_temp3, meta, as); + as.flip = augmentation_flip(img_temp3, img_aug, mask_miss_aug, mask_all_aug, meta, mode); + //LOG(INFO) << meta.joint_self.joints.size(); + //LOG(INFO) << meta.joint_self.joints[0]; + if(param_.visualize()) + visualize(img_aug, meta, as); + + // imshow("img_aug", img_aug); + // cv::Mat label_map = mask_miss_aug; + // applyColorMap(label_map, label_map, COLORMAP_JET); + // addWeighted(label_map, 0.5, img_aug, 0.5, 0.0, label_map); + // imshow("mask_miss_aug", label_map); + + if (mode > 4){ + resize(mask_miss_aug, mask_miss_aug, cv::Size(), 1.0/stride, 1.0/stride, cv::INTER_CUBIC); + resize(mask_all_aug, mask_all_aug, cv::Size(), 1.0/stride, 1.0/stride, cv::INTER_CUBIC); + } + } + else { + img_aug = img.clone(); + as.scale = 1; + as.crop = cv::Size(); + as.flip = 0; + as.degree = 0; + } + //LOG(INFO) << "scale: " << as.scale << "; crop:(" << as.crop.width << "," << as.crop.height + // << "); flip:" << as.flip << "; degree: " << as.degree; + + //copy transformed img (img_aug) into transformed_data, do the mean-subtraction here + offset = img_aug.rows * img_aug.cols; + int rezX = img_aug.cols; + int rezY = img_aug.rows; + int grid_x = rezX / stride; + int grid_y = rezY / stride; + int channelOffset = grid_y * grid_x; + + for (int i = 0; i < img_aug.rows; ++i) { + for (int j = 0; j < img_aug.cols; ++j) { + cv::Vec3b& rgb = img_aug.at(i, j); + transformed_data[0*offset + i*img_aug.cols + j] = (rgb[0] - 128)/256.0; + transformed_data[1*offset + i*img_aug.cols + j] = (rgb[1] - 128)/256.0; + transformed_data[2*offset + i*img_aug.cols + j] = (rgb[2] - 128)/256.0; + } + } + + // label size is image size/ stride + for (int g_y = 0; g_y < grid_y; g_y++){ + for (int g_x = 0; g_x < grid_x; g_x++){ + for (int i = 0; i < np; i++){ + // To do + // if (mode = 4){ + // transformed_label[i*channelOffset + g_y*grid_x + g_x] = 1; + // } + if(mode > 4){ + float weight = float(mask_miss_aug.at(g_y, g_x)) /255; //mask_miss_aug.at(i, j); + if (meta.joint_self.isVisible[i] != 3){ + transformed_label[i*channelOffset + g_y*grid_x + g_x] = weight; + } + else{ + transformed_label[i*channelOffset + g_y*grid_x + g_x] = 0; + } + } + } + // background channel + //To do: if (mode = 4){ + if(mode == 5){ + transformed_label[np*channelOffset + g_y*grid_x + g_x] = float(mask_miss_aug.at(g_y, g_x)) /255; + } + if(mode > 5){ + transformed_label[np*channelOffset + g_y*grid_x + g_x] = 1; + transformed_label[(2*np+1)*channelOffset + g_y*grid_x + g_x] = float(mask_all_aug.at(g_y, g_x)) /255; + } + } + } + + //putGaussianMaps(transformed_data + 3*offset, meta.objpos, 1, img_aug.cols, img_aug.rows, param_.sigma_center()); + //LOG(INFO) << "image transformation done!"; + generateLabelMap(transformed_label, img_aug, meta); + + //starts to visualize everything (transformed_data in 4 ch, label) fed into conv1 + //if(param_.visualize()){ + //dumpEverything(transformed_data, transformed_label, meta); + //} +} + +// include mask_miss +template +float DataTransformer::augmentation_scale(cv::Mat& img_src, cv::Mat& img_temp, cv::Mat& mask_miss, cv::Mat& mask_all, MetaData& meta, int mode) { + float dice = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + float scale_multiplier; + //float scale = (param_.scale_max() - param_.scale_min()) * dice + param_.scale_min(); //linear shear into [scale_min, scale_max] + if(dice > param_.scale_prob()) { + img_temp = img_src.clone(); + scale_multiplier = 1; + } + else { + float dice2 = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + scale_multiplier = (param_.scale_max() - param_.scale_min()) * dice2 + param_.scale_min(); //linear shear into [scale_min, scale_max] + } + float scale_abs = param_.target_dist()/meta.scale_self; + //LOG(INFO) << "scale_abs: " << scale_abs; + if (scale_abs > 3.0){ + //scale_abs = std::min(scale_abs/2, float(3.0)); + //std::cout << "scale_abs: " << scale_abs << std::endl; + } + + float scale = scale_abs * scale_multiplier; + resize(img_src, img_temp, cv::Size(), scale, scale, cv::INTER_CUBIC); + if(mode>4){ + resize(mask_miss, mask_miss, cv::Size(), scale, scale, cv::INTER_CUBIC); + } + if(mode>5){ + resize(mask_all, mask_all, cv::Size(), scale, scale, cv::INTER_CUBIC); + } + + //modify meta data + meta.objpos *= scale; + for(int i=0; i +cv::Size DataTransformer::augmentation_croppad(cv::Mat& img_src, cv::Mat& img_dst, cv::Mat& mask_miss, cv::Mat& mask_miss_aug, cv::Mat& mask_all, cv::Mat& mask_all_aug, MetaData& meta, int mode) { + float dice_x = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + float dice_y = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + int crop_x = param_.crop_size_x(); + int crop_y = param_.crop_size_y(); + + float x_offset = int((dice_x - 0.5) * 2 * param_.center_perterb_max()); + float y_offset = int((dice_y - 0.5) * 2 * param_.center_perterb_max()); + + //LOG(INFO) << "cv::Size of img_temp is " << img_temp.cols << " " << img_temp.rows; + //LOG(INFO) << "ROI is " << x_offset << " " << y_offset << " " << min(800, img_temp.cols) << " " << min(256, img_temp.rows); + cv::Point2i center = meta.objpos + cv::Point2f(x_offset, y_offset); + int offset_left = -(center.x - (crop_x/2)); + int offset_up = -(center.y - (crop_y/2)); + // int to_pad_right = std::max(center.x + (crop_x - crop_x/2) - img_src.cols, 0); + // int to_pad_down = std::max(center.y + (crop_y - crop_y/2) - img_src.rows, 0); + + img_dst = cv::Mat::zeros(crop_y, crop_x, CV_8UC3) + cv::Scalar(128,128,128); + mask_miss_aug = cv::Mat::zeros(crop_y, crop_x, CV_8UC1) + cv::Scalar(255); //cv::Scalar(1); + mask_all_aug = cv::Mat::zeros(crop_y, crop_x, CV_8UC1); + for(int i=0;i(i,j) = img_src.at(coord_y_on_img, coord_x_on_img); + if(mode>4){ + mask_miss_aug.at(i,j) = mask_miss.at(coord_y_on_img, coord_x_on_img); + } + if(mode>5){ + mask_all_aug.at(i,j) = mask_all.at(coord_y_on_img, coord_x_on_img); + } + } + } + } + + //modify meta data + cv::Point2f offset(offset_left, offset_up); + meta.objpos += offset; + for(int i=0; i +bool DataTransformer::augmentation_flip(cv::Mat& img_src, cv::Mat& img_aug, cv::Mat& mask_miss, cv::Mat& mask_all, MetaData& meta, int mode) { + bool doflip; + if(param_.aug_way() == "rand"){ + float dice = static_cast (rand()) / static_cast (RAND_MAX); + doflip = (dice <= param_.flip_prob()); + } + else if(param_.aug_way() == "table"){ + doflip = (aug_flips[meta.write_number][meta.epoch % param_.num_total_augs()] == 1); + } + else { + doflip = 0; + LOG(INFO) << "Unhandled exception!!!!!!"; + } + + if(doflip){ + flip(img_src, img_aug, 1); + int w = img_src.cols; + if(mode>4){ + flip(mask_miss, mask_miss, 1); + } + if(mode>5){ + flip(mask_all, mask_all, 1); + } + meta.objpos.x = w - 1 - meta.objpos.x; + for(int i=0; i +float DataTransformer::augmentation_rotate(cv::Mat& img_src, cv::Mat& img_dst, cv::Mat& mask_miss, cv::Mat& mask_all, MetaData& meta, int mode) { + + float degree; + if(param_.aug_way() == "rand"){ + float dice = static_cast (rand()) / static_cast (RAND_MAX); + degree = (dice - 0.5) * 2 * param_.max_rotate_degree(); + } + else if(param_.aug_way() == "table"){ + degree = aug_degs[meta.write_number][meta.epoch % param_.num_total_augs()]; + } + else { + degree = 0; + LOG(INFO) << "Unhandled exception!!!!!!"; + } + + cv::Point2f center(img_src.cols/2.0, img_src.rows/2.0); + cv::Mat R = cv::getRotationMatrix2D(center, degree, 1.0); + cv::Rect bbox = cv::RotatedRect(center, img_src.size(), degree).boundingRect(); + // adjust transformation matrix + R.at(0,2) += bbox.width/2.0 - center.x; + R.at(1,2) += bbox.height/2.0 - center.y; + //LOG(INFO) << "R=[" << R.at(0,0) << " " << R.at(0,1) << " " << R.at(0,2) << ";" + // << R.at(1,0) << " " << R.at(1,1) << " " << R.at(1,2) << "]"; + cv::warpAffine(img_src, img_dst, R, bbox.size(), cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(128,128,128)); + if(mode >4){ + cv::warpAffine(mask_miss, mask_miss, R, bbox.size(), cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(255)); //cv::Scalar(1)); + } + if(mode >5){ + cv::warpAffine(mask_all, mask_all, R, bbox.size(), cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(0)); + } + + //adjust meta data + RotatePoint(meta.objpos, R); + for(int i=0; i +float DataTransformer::augmentation_scale(cv::Mat& img_src, cv::Mat& img_temp, MetaData& meta) { + float dice = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + float scale_multiplier; + //float scale = (param_.scale_max() - param_.scale_min()) * dice + param_.scale_min(); //linear shear into [scale_min, scale_max] + if(dice > param_.scale_prob()) { + img_temp = img_src.clone(); + scale_multiplier = 1; + } + else { + float dice2 = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + scale_multiplier = (param_.scale_max() - param_.scale_min()) * dice2 + param_.scale_min(); //linear shear into [scale_min, scale_max] + } + float scale_abs = param_.target_dist()/meta.scale_self; + float scale = scale_abs * scale_multiplier; + resize(img_src, img_temp, cv::Size(), scale, scale, cv::INTER_CUBIC); + //modify meta data + meta.objpos *= scale; + for(int i=0; i +bool DataTransformer::onPlane(cv::Point p, cv::Size img_size) { + if(p.x < 0 || p.y < 0) return false; + if(p.x >= img_size.width || p.y >= img_size.height) return false; + return true; +} + +template +cv::Size DataTransformer::augmentation_croppad(cv::Mat& img_src, cv::Mat& img_dst, MetaData& meta) { + float dice_x = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + float dice_y = static_cast (rand()) / static_cast (RAND_MAX); //[0,1] + int crop_x = param_.crop_size_x(); + int crop_y = param_.crop_size_y(); + + float x_offset = int((dice_x - 0.5) * 2 * param_.center_perterb_max()); + float y_offset = int((dice_y - 0.5) * 2 * param_.center_perterb_max()); + + //LOG(INFO) << "cv::Size of img_temp is " << img_temp.cols << " " << img_temp.rows; + //LOG(INFO) << "ROI is " << x_offset << " " << y_offset << " " << min(800, img_temp.cols) << " " << min(256, img_temp.rows); + cv::Point2i center = meta.objpos + cv::Point2f(x_offset, y_offset); + int offset_left = -(center.x - (crop_x/2)); + int offset_up = -(center.y - (crop_y/2)); + // int to_pad_right = std::max(center.x + (crop_x - crop_x/2) - img_src.cols, 0); + // int to_pad_down = std::max(center.y + (crop_y - crop_y/2) - img_src.rows, 0); + + img_dst = cv::Mat::zeros(crop_y, crop_x, CV_8UC3) + cv::Scalar(128,128,128); + for(int i=0;i(i,j) = img_src.at(coord_y_on_img, coord_x_on_img); + } + } + } + + //modify meta data + cv::Point2f offset(offset_left, offset_up); + meta.objpos += offset; + for(int i=0; i +void DataTransformer::swapLeftRight(Joints& j) { + //assert(j.joints.size() == 9 && j.joints.size() == 14 && j.isVisible.size() == 27 && j.isVisible.size() == 28 && j.isVisible.size() == 29 && j.isVisible.size() == 33 && j.isVisible.size() == 34 && j.isVisible.size() == 43); + //MPII R leg: 0(ankle), 1(knee), 2(hip) + // L leg: 5(ankle), 4(knee), 3(hip) + // R arms: 10(wrist), 11(elbow), 12(shoulder) + // L arms: 15(wrist), 14(elbow), 13(shoulder) + if(np == 9){ + int right[4] = {1,2,3,7}; + int left[4] = {4,5,6,8}; + for(int i=0; i<4; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 14){ + int right[6] = {3,4,5,9,10,11}; //1-index + int left[6] = {6,7,8,12,13,14}; //1-index + for(int i=0; i<6; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 27){ + int right[12] = {3,4,5,9,10,11,15,18,19,20,24,25}; //1-index + int left[12] = {6,7,8,12,13,14,16,21,22,23,26,27}; //1-index + for(int i=0; i<12; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 28){ + int right[11] = {3,4,5,9,10,11,18,19,20,24,25}; //1-index + int left[11] = {6,7,8,12,13,14,21,22,23,26,27}; //1-index + for(int i=0; i<11; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 29){ + int right[12] = {3,4,5,9,10,11,15,18,19,20,24,25}; + int left[12] = {6,7,8,12,13,14,16,21,22,23,26,27}; + for(int i=0; i<12; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 33){ + int right[15] = {3,4,5, 9,10,11,15,17,20,21,22,26,27,30,32}; + int left[15] = {6,7,8,12,13,14,16,18,23,24,25,28,29,31,33}; + for(int i=0; i<15; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 34){ + int right[14] = {3,4,5, 9,10,11,15,17,20,21,22,26,27,32}; + int left[14] = {6,7,8,12,13,14,16,18,23,24,25,28,29,33}; + for(int i=0; i<14; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 36){ + int right[16] = {1,3,5,7,10,11,12,13,14,15,23,24,25,26,32,33}; + int left[16] = {2,4,6,8,17,16,19,18,21,20,28,27,30,29,35,34}; + for(int i=0; i<16; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 37){ + int right[8] = {3,4,5, 9,10,11,15,17}; + int left[8] = {6,7,8,12,13,14,16,18}; + for(int i=0; i<8; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 43){ + int right[18] = {3,4,5,9,10,11,18,19,20,21,22,23,32,33,34,35,36,37}; + int left[18] = {6,7,8,12,13,14,24,25,26,27,28,29,38,39,40,41,42,43}; + for(int i=0; i<18; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 52){ + int right[24] = {3,4,5, 9,10,11,15,17,19,20,21,22,23,24,31,32,33,34,35,36,37,38,49,51}; + int left[24] = {6,7,8,12,13,14,16,18,25,26,27,28,29,30,39,40,41,42,43,44,45,46,50,52}; + for(int i=0; i<24; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 56){ + //int right[26] = {3,4,5, 9,10,11,15,17,19,20,21,22,23,24,31,32,33,34,35,36,37,38,49,50,53,55}; + //int left[26] = {6,7,8,12,13,14,16,18,25,26,27,28,29,30,39,40,41,42,43,44,45,46,51,52,54,56}; + //for(int i=0; i<26; i++){ + int right[8] = {3,4,5, 9,10,11,15,17}; + int left[8] = {6,7,8,12,13,14,16,18}; + for(int i=0; i<8; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 75){ + int right[35] = {3,4,5, 9,10,11,15,17,19,20,21,22,23,24,25,26,27,37,38,39,40,41,42,43,44,45,46,47,48,64,65,66,70,71,72}; + int left[35] = {6,7,8,12,13,14,16,18,28,29,30,31,32,33,34,35,36,49,50,51,52,53,54,55,56,57,58,59,60,67,68,69,73,74,75}; + for(int i=0; i<35; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } + else if(np == 78){ + int right[36] = {1,3,5,7, 9,11,14,15,16,17,18,19,20,21,22,23,35,36,37,38,39,40,41,42,52,53,54,55,56,57,65,66,67,68,74,75}; + int left[36] = {2,4,6,8,10,12,25,24,27,26,29,28,31,30,33,32,44,43,46,45,48,47,50,49,59,58,61,60,63,62,70,69,72,71,77,76}; + for(int i=0; i<36; i++){ + int ri = right[i] - 1; + int li = left[i] - 1; + cv::Point2f temp = j.joints[ri]; + j.joints[ri] = j.joints[li]; + j.joints[li] = temp; + int temp_v = j.isVisible[ri]; + j.isVisible[ri] = j.isVisible[li]; + j.isVisible[li] = temp_v; + } + } +} + +template +bool DataTransformer::augmentation_flip(cv::Mat& img_src, cv::Mat& img_aug, MetaData& meta) { + bool doflip; + if(param_.aug_way() == "rand"){ + float dice = static_cast (rand()) / static_cast (RAND_MAX); + doflip = (dice <= param_.flip_prob()); + } + else if(param_.aug_way() == "table"){ + doflip = (aug_flips[meta.write_number][meta.epoch % param_.num_total_augs()] == 1); + } + else { + doflip = 0; + LOG(INFO) << "Unhandled exception!!!!!!"; + } + + if(doflip){ + flip(img_src, img_aug, 1); + int w = img_src.cols; + + meta.objpos.x = w - 1 - meta.objpos.x; + for(int i=0; i +void DataTransformer::RotatePoint(cv::Point2f& p, cv::Mat R){ + cv::Mat point(3,1,CV_64FC1); + point.at(0,0) = p.x; + point.at(1,0) = p.y; + point.at(2,0) = 1; + cv::Mat new_point = R * point; + p.x = new_point.at(0,0); + p.y = new_point.at(1,0); +} + +template +float DataTransformer::augmentation_rotate(cv::Mat& img_src, cv::Mat& img_dst, MetaData& meta) { + + float degree; + if(param_.aug_way() == "rand"){ + float dice = static_cast (rand()) / static_cast (RAND_MAX); + degree = (dice - 0.5) * 2 * param_.max_rotate_degree(); + } + else if(param_.aug_way() == "table"){ + degree = aug_degs[meta.write_number][meta.epoch % param_.num_total_augs()]; + } + else { + degree = 0; + LOG(INFO) << "Unhandled exception!!!!!!"; + } + + cv::Point2f center(img_src.cols/2.0, img_src.rows/2.0); + cv::Mat R = cv::getRotationMatrix2D(center, degree, 1.0); + cv::Rect bbox = cv::RotatedRect(center, img_src.size(), degree).boundingRect(); + // adjust transformation matrix + R.at(0,2) += bbox.width/2.0 - center.x; + R.at(1,2) += bbox.height/2.0 - center.y; + //LOG(INFO) << "R=[" << R.at(0,0) << " " << R.at(0,1) << " " << R.at(0,2) << ";" + // << R.at(1,0) << " " << R.at(1,1) << " " << R.at(1,2) << "]"; + warpAffine(img_src, img_dst, R, bbox.size(), cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(128,128,128)); + + //adjust meta data + RotatePoint(meta.objpos, R); + for(int i=0; i +void DataTransformer::putGaussianMaps(Dtype* entry, cv::Point2f center, int stride, int grid_x, int grid_y, float sigma){ + //LOG(INFO) << "putGaussianMaps here we start for " << center.x << " " << center.y; + float start = stride/2.0 - 0.5; //0 if stride = 1, 0.5 if stride = 2, 1.5 if stride = 4, ... + for (int g_y = 0; g_y < grid_y; g_y++){ + for (int g_x = 0; g_x < grid_x; g_x++){ + float x = start + g_x * stride; + float y = start + g_y * stride; + float d2 = (x-center.x)*(x-center.x) + (y-center.y)*(y-center.y); + float exponent = d2 / 2.0 / sigma / sigma; + if(exponent > 4.6052){ //ln(100) = -ln(1%) + continue; + } + entry[g_y*grid_x + g_x] += exp(-exponent); + if(entry[g_y*grid_x + g_x] > 1) + entry[g_y*grid_x + g_x] = 1; + } + } +} + +template +void DataTransformer::putVecPeaks(Dtype* entryX, Dtype* entryY, cv::Mat& count, cv::Point2f centerA, cv::Point2f centerB, int stride, int grid_x, int grid_y, float sigma, int thre){ + //int thre = 4; + centerB = centerB*0.125; + centerA = centerA*0.125; + cv::Point2f bc = centerB - centerA; + float norm_bc = sqrt(bc.x*bc.x + bc.y*bc.y); + bc.x = bc.x /norm_bc; + bc.y = bc.y /norm_bc; + + for(int j=0;j<3;j++){ + //cv::Point2f center = centerB*0.5 + centerA*0.5; + cv::Point2f center = centerB*0.5*j + centerA*0.5*(2-j); + + int min_x = std::max( int(floor(center.x-thre)), 0); + int max_x = std::min( int(ceil(center.x+thre)), grid_x); + + int min_y = std::max( int(floor(center.y-thre)), 0); + int max_y = std::min( int(ceil(center.y+thre)), grid_y); + + for (int g_y = min_y; g_y < max_y; g_y++){ + for (int g_x = min_x; g_x < max_x; g_x++){ + float dist = (g_x-center.x)*(g_x-center.x) + (g_y-center.y)*(g_y-center.y); + if(dist <= thre){ + int cnt = count.at(g_y, g_x); + //LOG(INFO) << "putVecMaps here we start for " << g_x << " " << g_y; + if (cnt == 0){ + entryX[g_y*grid_x + g_x] = bc.x; + entryY[g_y*grid_x + g_x] = bc.y; + } + else{ + entryX[g_y*grid_x + g_x] = (entryX[g_y*grid_x + g_x]*cnt + bc.x) / (cnt + 1); + entryY[g_y*grid_x + g_x] = (entryY[g_y*grid_x + g_x]*cnt + bc.y) / (cnt + 1); + count.at(g_y, g_x) = cnt + 1; + } + } + } + } + } +} + +template +void DataTransformer::putVecMaps(Dtype* entryX, Dtype* entryY, cv::Mat& count, cv::Point2f centerA, cv::Point2f centerB, int stride, int grid_x, int grid_y, float sigma, int thre){ + //int thre = 4; + centerB = centerB*0.125; + centerA = centerA*0.125; + cv::Point2f bc = centerB - centerA; + int min_x = std::max( int(round(std::min(centerA.x, centerB.x)-thre)), 0); + int max_x = std::min( int(round(std::max(centerA.x, centerB.x)+thre)), grid_x); + + int min_y = std::max( int(round(std::min(centerA.y, centerB.y)-thre)), 0); + int max_y = std::min( int(round(std::max(centerA.y, centerB.y)+thre)), grid_y); + + float norm_bc = sqrt(bc.x*bc.x + bc.y*bc.y); + bc.x = bc.x /norm_bc; + bc.y = bc.y /norm_bc; + + // float x_p = (centerA.x + centerB.x) / 2; + // float y_p = (centerA.y + centerB.y) / 2; + // float angle = atan2f(centerB.y - centerA.y, centerB.x - centerA.x); + // float sine = sinf(angle); + // float cosine = cosf(angle); + // float a_sqrt = (centerA.x - x_p) * (centerA.x - x_p) + (centerA.y - y_p) * (centerA.y - y_p); + // float b_sqrt = 10; //fixed + + for (int g_y = min_y; g_y < max_y; g_y++){ + for (int g_x = min_x; g_x < max_x; g_x++){ + cv::Point2f ba; + ba.x = g_x - centerA.x; + ba.y = g_y - centerA.y; + float dist = std::abs(ba.x*bc.y -ba.y*bc.x); + + // float A = cosine * (g_x - x_p) + sine * (g_y - y_p); + // float B = sine * (g_x - x_p) - cosine * (g_y - y_p); + // float judge = A * A / a_sqrt + B * B / b_sqrt; + + if(dist <= thre){ + //if(judge <= 1){ + int cnt = count.at(g_y, g_x); + //LOG(INFO) << "putVecMaps here we start for " << g_x << " " << g_y; + if (cnt == 0){ + entryX[g_y*grid_x + g_x] = bc.x; + entryY[g_y*grid_x + g_x] = bc.y; + } + else{ + entryX[g_y*grid_x + g_x] = (entryX[g_y*grid_x + g_x]*cnt + bc.x) / (cnt + 1); + entryY[g_y*grid_x + g_x] = (entryY[g_y*grid_x + g_x]*cnt + bc.y) / (cnt + 1); + count.at(g_y, g_x) = cnt + 1; + } + } + + } + } +} + +template +void DataTransformer::generateLabelMap(Dtype* transformed_label, cv::Mat& img_aug, MetaData meta) { + int rezX = img_aug.cols; + int rezY = img_aug.rows; + int stride = param_.stride(); + int grid_x = rezX / stride; + int grid_y = rezY / stride; + int channelOffset = grid_y * grid_x; + int mode = 6; // TO DO: make this as a parameter + + // TO DO: in transform_nv, generate the weight Map for MPI images + // clear out transformed_label, it may remain things for last batch + // for (int g_y = 0; g_y < grid_y; g_y++){ + // for (int g_x = 0; g_x < grid_x; g_x++){ + // for (int i = 0; i < np; i++){ + // if (meta.joint_self.isVisible[i] == 3){ + // transformed_label[i*channelOffset + g_y*grid_x + g_x] = 0; + // } + // else{ + // transformed_label[i*channelOffset + g_y*grid_x + g_x] = 1; + // } + // } + // //background channel weight map + // if (meta.joint_self.isVisible[0] == 3){ + // transformed_label[np*channelOffset + g_y*grid_x + g_x] = 0; + // } + // else{ + // transformed_label[np*channelOffset + g_y*grid_x + g_x] = 1; + // } + // } + // } + + for (int g_y = 0; g_y < grid_y; g_y++){ + for (int g_x = 0; g_x < grid_x; g_x++){ + for (int i = np+1; i < 2*(np+1); i++){ + if (mode == 6 && i == (2*np + 1)) + continue; + transformed_label[i*channelOffset + g_y*grid_x + g_x] = 0; + } + } + } + + //LOG(INFO) << "label cleaned"; + + if (np == 37){ + for (int i = 0; i < 18; i++){ + cv::Point2f center = meta.joint_self.joints[i]; + if(meta.joint_self.isVisible[i] <= 1){ + putGaussianMaps(transformed_label + (i+np+1)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); //self + } + for(int j = 0; j < meta.numOtherPeople; j++){ //for every other person + cv::Point2f center = meta.joint_others[j].joints[i]; + if(meta.joint_others[j].isVisible[i] <= 1){ + putGaussianMaps(transformed_label + (i+np+1)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); + } + } + } + + int mid_1[19] = {2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16}; + int mid_2[19] = {9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18}; + + for(int i=0;i<19;i++){ + for (int j=1;j<=3;j++){ + Joints jo = meta.joint_self; + if(jo.isVisible[mid_1[i]-1]<=1 && jo.isVisible[mid_2[i]-1]<=1){ + cv::Point2f center = jo.joints[mid_1[i]-1]*(1-j*0.25) + jo.joints[mid_2[i]-1]*j*0.25; + putGaussianMaps(transformed_label + (np+19+i)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); //self + } + + for(int j = 0; j < meta.numOtherPeople; j++){ //for every other person + Joints jo2 = meta.joint_others[j]; + if(jo2.isVisible[mid_1[i]-1]<=1 && jo2.isVisible[mid_2[i]-1]<=1){ + cv::Point2f center = jo2.joints[mid_1[i]-1]*(1-j*0.25) + jo2.joints[mid_2[i]-1]*j*0.25; + putGaussianMaps(transformed_label + (np+19+i)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); + } + } + } + } + + //put background channel + for (int g_y = 0; g_y < grid_y; g_y++){ + for (int g_x = 0; g_x < grid_x; g_x++){ + float maximum = 0; + //second background channel + for (int i = np+1; i < 2*np+1; i++){ + maximum = (maximum > transformed_label[i*channelOffset + g_y*grid_x + g_x]) ? maximum : transformed_label[i*channelOffset + g_y*grid_x + g_x]; + } + transformed_label[(2*np+1)*channelOffset + g_y*grid_x + g_x] = std::max(1.0-maximum, 0.0); + } + } + //LOG(INFO) << "background put"; + } + else if (np == 56){ + for (int i = 0; i < 18; i++){ + cv::Point2f center = meta.joint_self.joints[i]; + if(meta.joint_self.isVisible[i] <= 1){ + putGaussianMaps(transformed_label + (i+np+39)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); //self + } + for(int j = 0; j < meta.numOtherPeople; j++){ //for every other person + cv::Point2f center = meta.joint_others[j].joints[i]; + if(meta.joint_others[j].isVisible[i] <= 1){ + putGaussianMaps(transformed_label + (i+np+39)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); + } + } + } + + int mid_1[19] = {2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16}; + int mid_2[19] = {9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18}; + int thre = 1; + + for(int i=0;i<19;i++){ + // if (i>14){ + // thre = 1; + // } + cv::Mat count = cv::Mat::zeros(grid_y, grid_x, CV_8UC1); + Joints jo = meta.joint_self; + if(jo.isVisible[mid_1[i]-1]<=1 && jo.isVisible[mid_2[i]-1]<=1){ + //putVecPeaks + putVecMaps(transformed_label + (np+ 1+ 2*i)*channelOffset, transformed_label + (np+ 2+ 2*i)*channelOffset, + count, jo.joints[mid_1[i]-1], jo.joints[mid_2[i]-1], param_.stride(), grid_x, grid_y, param_.sigma(), thre); //self + } + + for(int j = 0; j < meta.numOtherPeople; j++){ //for every other person + Joints jo2 = meta.joint_others[j]; + if(jo2.isVisible[mid_1[i]-1]<=1 && jo2.isVisible[mid_2[i]-1]<=1){ + //putVecPeaks + putVecMaps(transformed_label + (np+ 1+ 2*i)*channelOffset, transformed_label + (np+ 2+ 2*i)*channelOffset, + count, jo2.joints[mid_1[i]-1], jo2.joints[mid_2[i]-1], param_.stride(), grid_x, grid_y, param_.sigma(), thre); //self + } + } + } + + //put background channel + for (int g_y = 0; g_y < grid_y; g_y++){ + for (int g_x = 0; g_x < grid_x; g_x++){ + float maximum = 0; + //second background channel + for (int i = np+39; i < np+57; i++){ + maximum = (maximum > transformed_label[i*channelOffset + g_y*grid_x + g_x]) ? maximum : transformed_label[i*channelOffset + g_y*grid_x + g_x]; + } + transformed_label[(2*np+1)*channelOffset + g_y*grid_x + g_x] = std::max(1.0-maximum, 0.0); + } + } + //LOG(INFO) << "background put"; + } + else{ + for (int i = 0; i < np; i++){ + //LOG(INFO) << i << meta.numOtherPeople; + cv::Point2f center = meta.joint_self.joints[i]; + if(meta.joint_self.isVisible[i] <= 1){ + putGaussianMaps(transformed_label + (i+np+1)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); //self + } + //LOG(INFO) << "label put for" << i; + //plot others + for(int j = 0; j < meta.numOtherPeople; j++){ //for every other person + cv::Point2f center = meta.joint_others[j].joints[i]; + if(meta.joint_others[j].isVisible[i] <= 1){ + putGaussianMaps(transformed_label + (i+np+1)*channelOffset, center, param_.stride(), + grid_x, grid_y, param_.sigma()); + } + } + } + + //put background channel + if (mode != 6){ // mode = 6, use the mask_all as the background + for (int g_y = 0; g_y < grid_y; g_y++){ + for (int g_x = 0; g_x < grid_x; g_x++){ + if (meta.joint_self.isVisible[0] == 3){ + transformed_label[(2*np+1)*channelOffset + g_y*grid_x + g_x] = 0; + } + else{ + float maximum = 0; + //second background channel + for (int i = np+1; i < 2*np+1; i++){ + maximum = (maximum > transformed_label[i*channelOffset + g_y*grid_x + g_x]) ? maximum : transformed_label[i*channelOffset + g_y*grid_x + g_x]; + } + transformed_label[(2*np+1)*channelOffset + g_y*grid_x + g_x] = maximum; //std::max(1.0-maximum, 0.0); + } + } + } + } + //LOG(INFO) << "background put"; + } + + //visualize + if(1 && param_.visualize()){ + cv::Mat label_map; + for(int i = 0; i < 2*(np+1); i++){ + label_map = cv::Mat::zeros(grid_y, grid_x, CV_8UC1); + //int MPI_index = MPI_to_ours[i]; + //cv::Point2f center = meta.joint_self.joints[MPI_index]; + for (int g_y = 0; g_y < grid_y; g_y++){ + //printf("\n"); + for (int g_x = 0; g_x < grid_x; g_x++){ + label_map.at(g_y,g_x) = (int)(transformed_label[i*channelOffset + g_y*grid_x + g_x]*255); + //printf("%f ", transformed_label_entry[g_y*grid_x + g_x]*255); + } + } + resize(label_map, label_map, cv::Size(), stride, stride, cv::INTER_LINEAR); + applyColorMap(label_map, label_map, cv::COLORMAP_JET); + addWeighted(label_map, 0.5, img_aug, 0.5, 0.0, label_map); + + //center = center * (1.0/(float)param_.stride()); + //circle(label_map, center, 3, CV_RGB(255,0,255), -1); + char imagename [100]; + sprintf(imagename, "augment_%04d_label_part_%02d.jpg", meta.write_number, i); + //LOG(INFO) << "filename is " << imagename; + imwrite(imagename, label_map); + } + + // label_map = cv::Mat::zeros(grid_y, grid_x, CV_8UC1); + // for (int g_y = 0; g_y < grid_y; g_y++){ + // //printf("\n"); + // for (int g_x = 0; g_x < grid_x; g_x++){ + // label_map.at(g_y,g_x) = (int)(transformed_label[np*channelOffset + g_y*grid_x + g_x]*255); + // //printf("%f ", transformed_label_entry[g_y*grid_x + g_x]*255); + // } + // } + // resize(label_map, label_map, cv::Size(), stride, stride, cv::INTER_CUBIC); + // applyColorMap(label_map, label_map, cv::COLORMAP_JET); + // addWeighted(label_map, 0.5, img_aug, 0.5, 0.0, label_map); + + // for(int i=0;i +void DataTransformer::visualize(cv::Mat& img, MetaData meta, AugmentSelection as) { + //cv::Mat img_vis = cv::Mat::zeros(img.rows*2, img.cols, CV_8UC3); + //copy image content + // for (int i = 0; i < img.rows; ++i) { + // for (int j = 0; j < img.cols; ++j) { + // cv::Vec3b& rgb = img.at(i, j); + // cv::Vec3b& rgb_vis_upper = img_vis.at(i, j); + // rgb_vis_upper = rgb; + // } + // } + // for (int i = 0; i < img_aug.rows; ++i) { + // for (int j = 0; j < img_aug.cols; ++j) { + // cv::Vec3b& rgb_aug = img_aug.at(i, j); + // cv::Vec3b& rgb_vis_lower = img_vis.at(i + img.rows, j); + // rgb_vis_lower = rgb_aug; + // } + // } + cv::Mat img_vis = img.clone(); + static int counter = 0; + + rectangle(img_vis, meta.objpos-cv::Point2f(3,3), meta.objpos+cv::Point2f(3,3), CV_RGB(255,255,0), CV_FILLED); + for(int i=0;i +void DataTransformer::clahe(cv::Mat& bgr_image, int tileSize, int clipLimit) { + cv::Mat lab_image; + cvtColor(bgr_image, lab_image, CV_BGR2Lab); + + // Extract the L channel + std::vector lab_planes(3); + split(lab_image, lab_planes); // now we have the L image in lab_planes[0] + + // apply the CLAHE algorithm to the L channel + cv::Ptr clahe = cv::createCLAHE(clipLimit, cv::Size(tileSize, tileSize)); + //clahe->setClipLimit(4); + cv::Mat dst; + clahe->apply(lab_planes[0], dst); + + // Merge the the color planes back into an Lab image + dst.copyTo(lab_planes[0]); + merge(lab_planes, lab_image); + + // convert back to RGB + cv::Mat image_clahe; + cvtColor(lab_image, image_clahe, CV_Lab2BGR); + bgr_image = image_clahe.clone(); +} + +template +void DataTransformer::dumpEverything(Dtype* transformed_data, Dtype* transformed_label, MetaData meta){ + + char filename[100]; + sprintf(filename, "transformed_data_%04d_%02d", meta.annolist_index, meta.people_index); + std::ofstream myfile; + myfile.open(filename); + int data_length = param_.crop_size_y() * param_.crop_size_x() * 4; + + //LOG(INFO) << "before copy data: " << filename << " " << data_length; + for(int i = 0; i