src/task_align.cc

#include "task_align.hh"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/video.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/core/ocl.hpp>
#include <cmath>
#include <cstdio>

using namespace focusstack;

static inline float sq(float x) { return x * x; }

Task_Align::Task_Align(std::shared_ptr<ImgTask> refgray, std::shared_ptr<ImgTask> refcolor,
                       std::shared_ptr<ImgTask> srcgray, std::shared_ptr<ImgTask> srccolor,
                       std::shared_ptr<Task_Align> initial_guess,
                       std::shared_ptr<Task_Align> stacked_transform,
                       FocusStack::align_flags_t flags)
{
  m_filename = "aligned_" + srccolor->basename();
  m_name = "Align " + srccolor->basename() + " to " + refcolor->basename();
  m_index = srccolor->index();

  m_refgray = refgray;
  m_refcolor = refcolor;
  m_srcgray = srcgray;
  m_srccolor = srccolor;
  m_initial_guess = initial_guess;
  m_stacked_transform = stacked_transform;
  m_flags = flags;

  m_depends_on.push_back(refgray);
  m_depends_on.push_back(refcolor);
  m_depends_on.push_back(srcgray);
  m_depends_on.push_back(srccolor);
  if (initial_guess) m_depends_on.push_back(initial_guess);

  // Create initial guess for the transformation
  m_transformation.create(2, 3, CV_32F);
  m_transformation = 0;
  m_transformation.at<float>(0, 0) = 1.0f;
  m_transformation.at<float>(1, 1) = 1.0f;

  // For contrast; it is a column vector of [constant, x, x^2, y, y^2] factors
  m_contrast.create(5, 1, CV_32F);
  m_contrast = 0.0f;
  m_contrast.at<float>(0, 0) = 1.0f;

  // For white balance, it is column vector of [bb, bc, gb, gc, rb, rc]
  // brightness & contrast terms for each channel.
  m_whitebalance.create(6, 1, CV_32F);
  m_whitebalance = 0.0f;
  m_whitebalance.at<float>(1, 0) = 1.0f;
  m_whitebalance.at<float>(3, 0) = 1.0f;
  m_whitebalance.at<float>(5, 0) = 1.0f;
}

void Task_Align::task()
{
  if (m_refcolor == m_srccolor)
  {
    m_result = m_srccolor->img();
  }
  else
  {
    if (m_initial_guess)
    {
      m_initial_guess->m_transformation.copyTo(m_transformation);
    }

    // Mask off the reflected borders generated by Task_LoadImg.
    m_roi = m_srcgray->valid_area();

    // Perform low resolution initial geometric alignment
    match_transform(256, true);

    // Perform grayscale brightness alignment
    if (!(m_flags & FocusStack::ALIGN_NO_CONTRAST))
    {
      match_contrast();
    }

    // Perform color/whit balance alignment
    if (!(m_flags & FocusStack::ALIGN_NO_WHITEBALANCE) && m_srccolor->img().channels() == 3)
    {
      match_whitebalance();
    }

    // Finally, do the high resolution geometric alignment step
    if (m_flags & FocusStack::ALIGN_FULL_RESOLUTION)
    {
      int res = std::max(m_srccolor->img().cols, m_srccolor->img().rows);
      match_transform(res, false);
    }
    else
    {
      // By default limit image resolution used in alignment to 2k.
      // Because this uses subpixel positioning, higher resolution provides little benefit.
      match_transform(2048, false);
    }

    // The image is now aligned against the neighbour image.
    // Now we can compute the alignment against the global reference image.
    if (m_stacked_transform)
    {
      // At this point we need to know the stacked transform to apply it to the final image.
      // Not putting this in m_depends_on gives better parallelism in the alignment phase.
      m_stacked_transform->wait();
      cv::Mat tmp = m_stacked_transform->m_transformation.clone();
      tmp.resize(3, 0.0f);
      tmp.at<float>(2, 2) = 1.0f;
      m_transformation(cv::Rect(0, 0, 3, 2)) *= tmp;

      // For contrast the stacking is not exact as x^3 and y^3 terms are not modelled,
      // but close enough.
      cv::Mat c = m_contrast.clone();
      m_contrast *= m_stacked_transform->m_contrast.at<float>(0);
      m_contrast.at<float>(1) += m_stacked_transform->m_contrast.at<float>(1) * c.at<float>(0);
      m_contrast.at<float>(2) += m_stacked_transform->m_contrast.at<float>(2) * c.at<float>(0);
      m_contrast.at<float>(2) += m_stacked_transform->m_contrast.at<float>(1) * c.at<float>(1);
      m_contrast.at<float>(3) += m_stacked_transform->m_contrast.at<float>(3) * c.at<float>(0);
      m_contrast.at<float>(4) += m_stacked_transform->m_contrast.at<float>(4) * c.at<float>(0);
      m_contrast.at<float>(4) += m_stacked_transform->m_contrast.at<float>(3) * c.at<float>(3);

      // For white balance, scale the brightness terms and multiply the contrast terms.
      m_whitebalance.at<float>(0) += m_stacked_transform->m_whitebalance.at<float>(0) * m_whitebalance.at<float>(1);
      m_whitebalance.at<float>(1) *= m_stacked_transform->m_whitebalance.at<float>(1);
      m_whitebalance.at<float>(2) += m_stacked_transform->m_whitebalance.at<float>(2) * m_whitebalance.at<float>(3);
      m_whitebalance.at<float>(3) *= m_stacked_transform->m_whitebalance.at<float>(3);
      m_whitebalance.at<float>(4) += m_stacked_transform->m_whitebalance.at<float>(4) * m_whitebalance.at<float>(5);
      m_whitebalance.at<float>(5) *= m_stacked_transform->m_whitebalance.at<float>(5);
    }

    if (m_logger->get_level() <= Logger::LOG_VERBOSE)
    {
      std::string name = basename();
      m_logger->verbose("%s transform: [%0.3f %0.3f %0.3f; %0.3f %0.3f %0.3f]\n",
                  name.c_str(),
                  m_transformation.at<float>(0, 0), m_transformation.at<float>(0, 1), m_transformation.at<float>(0, 2),
                  m_transformation.at<float>(1, 0), m_transformation.at<float>(1, 1), m_transformation.at<float>(1, 2));
    }

    apply_transform(m_srccolor->img(), m_result, false);

    if (!(m_flags & FocusStack::ALIGN_NO_CONTRAST) || !(m_flags & FocusStack::ALIGN_NO_WHITEBALANCE))
    {
      if (m_logger->get_level() <= Logger::LOG_VERBOSE)
      {
        std::string name = basename();
        m_logger->verbose("%s contrast map: C:%0.3f, X:%0.3f, X2:%0.3f, Y:%0.3f, Y2:%0.3f\n",
                    name.c_str(),
                    m_contrast.at<float>(0), m_contrast.at<float>(1), m_contrast.at<float>(2),
                    m_contrast.at<float>(3), m_contrast.at<float>(4));

        m_logger->verbose("%s whitebalance: R:x%0.3f%+0.1f, G:x%0.3f%+0.1f, B:x%0.3f%+0.1f\n",
                    name.c_str(),
                    m_whitebalance.at<float>(5), m_whitebalance.at<float>(4),
                    m_whitebalance.at<float>(3), m_whitebalance.at<float>(2),
                    m_whitebalance.at<float>(1), m_whitebalance.at<float>(0));
      }

      apply_contrast_whitebalance(m_result);
    }
  }

  compute_valid_area();

  m_refgray.reset();
  m_refcolor.reset();
  m_srcgray.reset();
  m_srccolor.reset();
  m_initial_guess.reset();
  m_stacked_transform.reset();
}

// Collect samples and use them to predict contrast between images
// based on 5 factors: constant difference, x, x^2, y and y^2 dependencies.
// These factors can model most lighting differences caused by e.g.
// rolling shutter and lens vignetting.
void Task_Align::match_contrast()
{
  cv::Mat ref, src;

  int xsamples = 64;
  int ysamples = 64;
  int total = xsamples * ysamples;

  cv::Mat tmp;
  apply_transform(m_srcgray->img(), tmp, false);

  cv::resize(m_refgray->img()(m_roi), ref, cv::Size(xsamples, ysamples), 0, 0, cv::INTER_AREA);
  cv::resize(tmp(m_roi), src, cv::Size(xsamples, ysamples), 0, 0, cv::INTER_AREA);

  cv::Mat contrast(total, 1, CV_32F);
  cv::Mat positions(total, 5, CV_32F);

  for (int y = 0; y < ysamples; y++)
  {
    for (int x = 0; x < xsamples; x++)
    {
      int idx = y * xsamples + x;

      float yd = (y - ref.rows/2.0f) / (float)ref.rows;
      float xd = (x - ref.cols/2.0f) / (float)ref.cols;

      float refpix = (float)ref.at<uint8_t>(y, x);
      float srcpix = (float)src.at<uint8_t>(y, x);

      float c = 1.0;
      if (refpix > 4 && srcpix > 4)
      {
        // Contrast result is only meaningful for bright enough pixels
        c = refpix / srcpix;
      }

      contrast.at<float>(idx) = c;
      positions.at<float>(idx, 0) = 1.0f;
      positions.at<float>(idx, 1) = xd;
      positions.at<float>(idx, 2) = sq(xd);
      positions.at<float>(idx, 3) = yd;
      positions.at<float>(idx, 4) = sq(yd);
    }
  }

  cv::solve(positions, contrast, m_contrast, cv::DECOMP_SVD);

  if (!cv::checkRange(m_contrast, true, NULL, -2.0f, 2.0f))
  {
    throw std::runtime_error("Contrast match result out of range, try --no-contrast");
  }
}

void Task_Align::match_transform(int max_resolution, bool rough)
{
  cv::Mat ref, src, mask;

  int resolution = std::max(m_refgray->img().cols, m_refgray->img().rows);
  float scale_ratio = 1.0f;

  if (resolution <= max_resolution)
  {
    ref = m_refgray->img();
    m_srcgray->img().copyTo(src);
  }
  else
  {
    scale_ratio = max_resolution / (float)resolution;
    cv::resize(m_refgray->img(), ref, cv::Size(), scale_ratio, scale_ratio, cv::INTER_AREA);
    cv::resize(m_srcgray->img(), src, cv::Size(), scale_ratio, scale_ratio, cv::INTER_AREA);
  }

  mask.create(ref.rows, ref.cols, CV_8U);
  mask = 0;
  mask(cv::Rect((int)(m_roi.x * scale_ratio), (int)(m_roi.y * scale_ratio),
                (int)(m_roi.width * scale_ratio), (int)(m_roi.height * scale_ratio))) = 255;

  apply_contrast_whitebalance(src);

  m_transformation.at<float>(0, 2) *= scale_ratio;
  m_transformation.at<float>(1, 2) *= scale_ratio;

  if (rough)
  {
    cv::findTransformECC(src, ref, m_transformation, cv::MOTION_AFFINE,
                        cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 25, 0.01),
                        mask, 1);
  }
  else
  {
    cv::findTransformECC(src, ref, m_transformation, cv::MOTION_AFFINE,
                        cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 50, 0.001),
                        mask, 3);
  }


  m_transformation.at<float>(0, 2) /= scale_ratio;
  m_transformation.at<float>(1, 2) /= scale_ratio;
}

void Task_Align::match_whitebalance()
{
  cv::Mat ref, src;

  int xsamples = 64;
  int ysamples = 64;
  int total = xsamples * ysamples;

  cv::Mat tmp;
  apply_transform(m_srccolor->img(), tmp, false);
  apply_contrast_whitebalance(tmp);

  cv::resize(m_refcolor->img()(m_roi), ref, cv::Size(xsamples, ysamples), 0, 0, cv::INTER_AREA);
  cv::resize(tmp(m_roi), src, cv::Size(xsamples, ysamples), 0, 0, cv::INTER_AREA);

  cv::Mat targets(total * 3, 1, CV_32F);
  cv::Mat factors(total * 3, 6, CV_32F);
  factors = 0.0f;

  for (int y = 0; y < ysamples; y++)
  {
    for (int x = 0; x < xsamples; x++)
    {
      int idx = y * xsamples + x;

      cv::Vec3b srcpixel = src.at<cv::Vec3b>(y, x);
      cv::Vec3b refpixel = ref.at<cv::Vec3b>(y, x);

      targets.at<float>(idx * 3 + 0, 0) = refpixel[0];
      targets.at<float>(idx * 3 + 1, 0) = refpixel[1];
      targets.at<float>(idx * 3 + 2, 0) = refpixel[2];

      factors.at<float>(idx * 3 + 0, 0) = 1.0f;
      factors.at<float>(idx * 3 + 0, 1) = srcpixel[0];
      factors.at<float>(idx * 3 + 1, 2) = 1.0f;
      factors.at<float>(idx * 3 + 1, 3) = srcpixel[1];
      factors.at<float>(idx * 3 + 2, 4) = 1.0f;
      factors.at<float>(idx * 3 + 2, 5) = srcpixel[2];
    }
  }

  cv::solve(factors, targets, m_whitebalance, cv::DECOMP_SVD);

  if (!cv::checkRange(m_whitebalance, true, NULL, -128, 128))
  {
    throw std::runtime_error("Whitebalance match result out of range, try --no-whitebalance");
  }
}

// Round value to integer and add quantization error to delta for dithering.
// Finally, clamp the result to 0..255 range
static inline int round_and_dither(float value, float &delta)
{
    int intval = (int)(value + delta);
    delta += value - intval;
    return std::min(255, std::max(0, intval));
}

void Task_Align::apply_contrast_whitebalance(cv::Mat& img)
{
  if (img.channels() == 1)
  {
    // For grayscale images, apply contrast only
    for (int y = 0; y < img.rows; y++)
    {
      float delta = 0.0f;
      for (int x = 0; x < img.cols; x++)
      {
        float yd = (y - img.rows/2.0f) / (float)img.rows;
        float xd = (x - img.cols/2.0f) / (float)img.cols;

        float c = m_contrast.at<float>(0)
                + xd * (m_contrast.at<float>(1) + m_contrast.at<float>(2) * xd)
                + yd * (m_contrast.at<float>(3) + m_contrast.at<float>(4) * yd);

        // Simple dithering reduces banding in result image
        uint8_t v = img.at<uint8_t>(y, x);
        float f = v * c;
        v = round_and_dither(f, delta);
        img.at<uint8_t>(y, x) = v;
      }
    }
  }
  else
  {
    // For RGB images, apply contrast and white balance
    for (int y = 0; y < img.rows; y++)
    {
      float delta[3] = {0.0f, 0.0f, 0.0f};

      for (int x = 0; x < img.cols; x++)
      {
        float yd = (y - img.rows/2.0f) / (float)img.rows;
        float xd = (x - img.cols/2.0f) / (float)img.cols;

        float c = m_contrast.at<float>(0)
                + xd * (m_contrast.at<float>(1) + m_contrast.at<float>(2) * xd)
                + yd * (m_contrast.at<float>(3) + m_contrast.at<float>(4) * yd);


        cv::Vec3b v = img.at<cv::Vec3b>(y, x);
        float b = v[0] * c * m_whitebalance.at<float>(1) + m_whitebalance.at<float>(0);
        float g = v[1] * c * m_whitebalance.at<float>(3) + m_whitebalance.at<float>(2);
        float r = v[2] * c * m_whitebalance.at<float>(5) + m_whitebalance.at<float>(4);
        v[0] = round_and_dither(b, delta[0]);
        v[1] = round_and_dither(g, delta[1]);
        v[2] = round_and_dither(r, delta[2]);
        img.at<cv::Vec3b>(y, x) = v;
      }
    }
  }
}

void Task_Align::apply_transform(const cv::Mat &src, cv::Mat &dst, bool inverse)
{
  int invflag = (!inverse) ? 0 : cv::WARP_INVERSE_MAP;

  dst.create(src.rows, src.cols, src.type());
  cv::warpAffine(src, dst, m_transformation, cv::Size(src.cols, src.rows), cv::INTER_CUBIC | invflag, cv::BORDER_REFLECT);
}

cv::Point2f Task_Align::transform_point(cv::Point2f point)
{
  float x = m_transformation.at<float>(0, 0) * point.x + m_transformation.at<float>(0, 1) * point.y + m_transformation.at<float>(0, 2);
  float y = m_transformation.at<float>(1, 0) * point.x + m_transformation.at<float>(1, 1) * point.y + m_transformation.at<float>(1, 2);
  return cv::Point2f(x, y);
}

void Task_Align::compute_valid_area()
{
  // Transform all corners and get enclosed axis-aligned rectangle
  cv::Rect a = m_srccolor->valid_area();
  cv::Point2f tl = transform_point(cv::Point2f(a.x, a.y));
  cv::Point2f tr = transform_point(cv::Point2f(a.x + a.width, a.y));
  cv::Point2f bl = transform_point(cv::Point2f(a.x, a.y + a.height));
  cv::Point2f br = transform_point(cv::Point2f(a.x + a.width, a.y + a.height));
  int top = std::ceil(std::max(tl.y, tr.y));
  int left = std::ceil(std::max(tl.x, bl.x));
  int bottom = std::floor(std::min(bl.y, br.y));
  int right = std::floor(std::min(br.x, tr.x));

  m_logger->verbose("%s transformed corners TL (%0.1f,%0.1f), TR (%0.1f,%0.1f), BL (%0.1f,%0.1f), BR (%0.1f,%0.1f)\n",
    m_filename.c_str(), tl.x, tl.y, tr.x, tr.y, bl.x, bl.y, br.x, br.y);

  m_valid_area = a;

  if (!(m_flags & FocusStack::ALIGN_KEEP_SIZE))
  {
    limit_valid_area(cv::Rect(left, top, right - left, bottom - top));

    m_logger->verbose("%s valid area X %d, Y %d, W %d, H %d\n",
      m_filename.c_str(), m_valid_area.x, m_valid_area.y, m_valid_area.width, m_valid_area.height);
  }
}