# Sequential Mosaic Testing

This notebook provides a cross-section of the Less-Referenced Mosaic creation process.


# Setup


## Imports


In [None]:
import copy
import glob
import inspect
import os
import shutil
import sys
import unittest

In [None]:
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
import yaml

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
sns.set_style('white')

In [None]:
from night_horizons import utils, raster
from night_horizons.image_processing import mosaicking
from night_horizons.mapmake import SequentialMosaicMaker

## Settings


In [None]:
local_options = {
    # General settings
    'random_state': 1682142,
    'show_images': False,

    # Evaluation settings
    'use_raw_images': False,
    'train_size': {
        0: 0,
        1: 1,
        2: 0,
    },
    'n_loops': 6,
    # The fraction of non-nan georeferencings required to claim success.
    # We only require a bare minimum here. Performance beyond approximate
    # functionality should be evaluated elsewhere
    'acceptance_fraction': 0.8,

    ###########################################################################
    # Below are arguments for specific services defined in the container
    'io_manager': {
        'output_dir': 'mosaics/sequential',
        'file_exists': 'overwrite',
        'checkpoint_freq': 4,
    },

    'geotiff_preprocessor': {
        'padding_fraction': 0.3,
    },

    'preprocessor': {
        'steps': [
            'metadata_preprocessor',
            'altitude_filter',
            'steady_filter',
            # 'metadata_image_registrar',
            'geotiff_preprocessor',
            'order',
        ],
    },

    'processor': {
        'save_return_codes': ['success'],
    },

    'mosaicker': {
        'memory_snapshot_freq': 1,
    },

    'image_scorer': {
        'acceptance_threshold': 0.95,
    },
}

In [None]:
# Because thiks is a test notebook, we log way more than usual.
comprehensive_log_keys = [
    'i',
    'ind',
    'x_off',
    'y_off',
    'x_size',
    'y_size',
    'dst_img',
    'dst_pts',
    'src_pts',
    'snapshot',
    'abs_det_M',
    'mask',
]
for obj_key in ['image_operator', 'processor', 'mosaicker']:
    local_options.setdefault(obj_key, {})['log_keys'] = comprehensive_log_keys

In [None]:
# This is a pre-built factory to make the objects needed for mosaicking,
# updated to use the config.
mosaic_maker = SequentialMosaicMaker(
    config_filepath='./config.yml',
    local_options=local_options,
)

In [None]:
# For ease of access
settings = mosaic_maker.container.config

In [None]:
palette = sns.color_palette(settings['color_palette'])


## Parse Settings


In [None]:
io_manager = mosaic_maker.container.get_service('io_manager')

In [None]:
# Remove any existing files
test_filenames = {
    'mosaic': 'mosaic.tiff',
    'y_pred': 'y_pred.csv',
    'settings': 'settings.yaml', 
    'log': 'log.csv',
    'checkpoint': os.path.join(
        'checkpoints',
        f"mosaic_i{io_manager.checkpoint_freq:06d}.tiff"
    ),
    'initial_checkpoint': 'mosaic_fit.tiff',
    'reffed_settings': 'settings_train.yaml',
    'progress_img': 'progress_images/000000_src.tiff',
}
check_fps = {
    key: os.path.join(io_manager.output_dir, value)
    for key, value in test_filenames.items()
}
def clear_files():
    if os.path.exists(io_manager.output_dir):
        shutil.rmtree(io_manager.output_dir)

# Prepare Data

The first part is to prepare the data (AKA extract/transform/load).


## Train-Test Split

We split the data into training data (data that is georeferenced) and test data (data that is not georeferenced, or for which we don't use the georeferencing information when we're building the models).

We set the train size to some small number, because ideally the user only needs to georeference a couple of images manually.


In [None]:
fps_train, fps_test, fps = io_manager.train_test_production_split(
    train_size=settings['train_size'],
    random_state=settings['random_state'],
    use_raw_images=settings['use_raw_images'],
)

In [None]:
# Expected number of training files
assert (
    len(pd.unique(fps_train.index)) == 
    np.sum(list(settings['train_size'].values()))
)
# Consistent indices for test set
assert len(pd.unique(fps_test.index)) == len(fps_test.index)

## Preprocessing


### y values

We get the y-values first because we use a model fitted to them to get the X values


In [None]:
preprocessor_y = mosaic_maker.container.get_service('preprocessor_y')

In [None]:
# Get the geo-transforms used for training
y_train = preprocessor_y.fit_transform(fps_train)
y_test = preprocessor_y.fit_transform(fps_test)

### X values

We use the sensor (high-altitude balloon) positions to provide approximate georeferencing, which will be useful for saving computational time when building the unreferenced mosaic.


In [None]:
preprocessor = mosaic_maker.container.get_service('preprocessor')

In [None]:
# Fit the pipeline
preprocessor = preprocessor.fit(
    X=fps_train,
    y=y_train,
    metadata_preprocessor__img_log_fp=io_manager.input_filepaths['img_log'],
    metadata_preprocessor__imu_log_fp=io_manager.input_filepaths['imu_log'],
    metadata_preprocessor__gps_log_fp=io_manager.input_filepaths['gps_log'],
)

In [None]:
# TODO: This isn't showing a progress bar right now
# Get out the X values we'll use for the mosaic
X_train = preprocessor.transform(fps_train)
X = preprocessor.transform(fps)

In [None]:
# Check the camera numbers
for camera_num in range(3):
    n_camera = (X_train['camera_num'] == camera_num).sum()
    assert n_camera == settings['train_size'][camera_num], \
        'Camera numbers are not as expected'

In [None]:
# Check the order
np.testing.assert_allclose(X['order'], np.arange(len(X)))
assert X.loc[X['camera_num'] == 1, 'order'].max() < X.loc[X['camera_num'] == 0, 'order'].min(), 'Some camera 1 images come before camera 0 images.'
assert (np.diff(X.loc[X['camera_num'] == 1, 'd_to_center']) < 0).sum() == 0, 'Some smaller distances appear out of order.'

### Check consistency


In [None]:
# We don't want to drop more than a few files when we're working
# with the referenced dataset
if not settings['use_raw_images']:
    assert y_test.index.size - X.index.size < 3, 'Too many files dropped.'

In [None]:
# Drop the y values and filepaths that were filtered out
fps = fps.loc[X.index]
X_test = X.loc[X.index.isin(y_test.index)]
y_test = y_test.loc[X_test.index]

In [None]:
# Check that our test Xs and ys align
n_bad = (y_test['filepath'] != X_test['filepath']).sum()
assert n_bad == 0, f'{n_bad} wrong filepaths'

### Look at Order


In [None]:
# Let's take a look.
if settings['show_images']:
    sp = sns.scatterplot(
        data=X,
        x='x_center',
        y='y_center',
        hue='camera_num',
    )
    sp.set_aspect('equal')

# Step-by-Step Mosaicking


### Initialization


#### Test

Check that initialization works, first with a mosaic that only uses the training data.


In [None]:
clear_files()

In [None]:
# Add enough padding to be visible
y_fitcheck = y_train.copy()
y_fitcheck['padding'] = 0.5 * (y_train['x_max'] - y_train['x_min'])

In [None]:
small_mosaicker = mosaic_maker.container.get_service('mosaicker')

In [None]:
small_mosaicker.fit(
    X=y_fitcheck,
    approx_y=y_fitcheck,
)

In [None]:
# The full mosaic image that's saved
dataset = small_mosaicker.io_manager.open_dataset()
mosaic_img = dataset.ReadAsArray().transpose(1, 2, 0)
mosaic_image = raster.ReferencedImage(
    mosaic_img[:, :, :3],
    [small_mosaicker.transformer.x_min_, small_mosaicker.transformer.x_max_],
    [small_mosaicker.transformer.y_min_, small_mosaicker.transformer.y_max_]
)

In [None]:
# Check that there is empty space on the edges,
# as expected if padding is working right
summed_mosaic_img = mosaic_img[:,:,:3].sum(axis=2)
expected_empty_px = int(np.abs(np.round(
    y_fitcheck['padding'].max()
    / small_mosaicker.transformer.pixel_width_
)))
assert summed_mosaic_img[:, :expected_empty_px].sum() == 0
assert summed_mosaic_img[:, -expected_empty_px:].sum() == 0
expected_empty_py = int(np.abs(np.round(
    y_fitcheck['padding'].max()
    / small_mosaicker.transformer.pixel_height_
)))
assert summed_mosaic_img[:expected_empty_py].sum() == 0
assert summed_mosaic_img[-expected_empty_py:].sum() == 0

In [None]:
# The actual image used to make the mosaic
original_image = raster.ReferencedImage.open(y_train.iloc[0]['filepath'])

In [None]:
if settings['show_images']:
    subplot_mosaic = [['original', 'mosaic']]
    fig = plt.figure(figsize=(20,10))
    ax_dict = fig.subplot_mosaic(subplot_mosaic)

    original_image.show(
        crs='cartesian',
        img='semitransparent_img',
        ax=ax_dict['original'],
    )

    mosaic_image.show(
        crs='cartesian',
        img='semitransparent_img',
        ax=ax_dict['mosaic'],
    )

    for ax_key, ax in ax_dict.items():
        ax.set_title(ax_key)

In [None]:
if settings['show_images']:
    fig = plt.figure(figsize=(10,10))
    ax = plt.gca()

    original_image.plot_bounds(ax, set_limits=True, label='original')
    mosaic_image.plot_bounds(
        ax, set_limits=True, edgecolor=palette[0], label='mosaic')
    
    ax.legend()

In [None]:
# Check the centers
mosaic_center = np.array(mosaic_image.cart_bounds).mean(axis=1)
original_center = np.array(original_image.cart_bounds).mean(axis=1)
d_between_centers = np.linalg.norm(mosaic_center - original_center)
np.testing.assert_allclose(d_between_centers, 0.)

In [None]:
# Check the widths
mosaic_width, mosaic_height = np.diff(mosaic_image.cart_bounds, axis=1).flatten()
original_width, original_height = np.diff(original_image.cart_bounds, axis=1).flatten()
np.testing.assert_allclose(mosaic_width, original_width + 2. * y_fitcheck['padding'])
np.testing.assert_allclose(mosaic_height, original_height + 2. * y_fitcheck['padding'])

In [None]:
# Delete the temporary initialization
dataset.FlushCache()
dataset = None

In [None]:
clear_files()

#### Actual full initialization and fit


In [None]:
mosaicker = mosaic_maker.container.get_service('mosaicker')

In [None]:
# This creates the dataset and adds the referenced mosaic.
mosaicker.fit(
    X=y_train,
    approx_y=X,
)

In [None]:
# Check consistent values between the referenced and less-referenced mosaics.
for key in ['x_min_', 'x_max_', 'y_min_', 'y_max_', 'x_size_', 'y_size_' ]:
    assert np.isclose(
        getattr(mosaicker.transformer, key),
        getattr(mosaicker.mosaicker_train.transformer, key),
    )

In [None]:
# Check the mosiacker bounds are as expected
padding = X['padding'].max()
assert padding > 0.
for axis in ['x', 'y']:
    np.testing.assert_allclose(
        X[f'{axis}_min'].min() - padding,
        getattr(mosaicker.transformer, f'{axis}_min_'),
    )
    np.testing.assert_allclose(
        X[f'{axis}_max'].max() + padding,
        getattr(mosaicker.transformer, f'{axis}_max_'),
    )

In [None]:
assert mosaicker.i_start_ == 0, \
    f'Not starting fresh: i_start = {mosaicker.i_start_}'

#### Validate settings saving


In [None]:
# Open the file
settings_fp = mosaicker.io_manager.output_filepaths['settings']
with open(settings_fp, 'r', encoding='UTF-8') as file:
    saved_settings = yaml.load(file, Loader=yaml.BaseLoader)

In [None]:
# Check that all the values exist
fullargspec = inspect.getfullargspec(mosaicking.SequentialMosaicker.__init__)
expected_args = fullargspec.args
for key in expected_args:
    if (key == 'self'):
        continue
    assert key in saved_settings, f'attr {key} not found in settings'

## In-Mosaicker Preprocessing

The coordinate conversions to pixel frame are necessarily part of the Mosaicker
preprocessing.


In [None]:
# Start by making a copy of X, which we'll work with throughout
X_t = X.copy()

### Check No-Pad Coordinate Conversion


In [None]:
tfer = mosaicker.transformer

In [None]:
X_t_nopad = tfer.transform_to_pixel(X_t)

In [None]:
# Ensure that the conversion back and forth is self-consistent
physical_recovered = np.array(tfer.pixel_to_physical(
    X_t_nopad['x_off'], X_t_nopad['y_off'],
    X_t_nopad['x_size'], X_t_nopad['y_size']
))
np.testing.assert_allclose(
    X_t[['x_min', 'x_max', 'y_min', 'y_max']].values.transpose(),
    physical_recovered,
    rtol=1e-6,
)

### Check Simplified Conversions


In [None]:
# Case where we should be getting the full dimensions plus some padding
padding_px = 100
padding = tfer.pixel_width_ * padding_px
actual = tfer.physical_to_pixel(
    tfer.x_min_ - padding, tfer.x_max_ + padding,
    tfer.y_min_ - padding, tfer.y_max_ + padding,
)
padding_py = int(np.abs(np.round(padding_px * (
    tfer.pixel_width_ / tfer.pixel_height_
))))
expected = np.array([
    -padding_px, -padding_py,
    tfer.x_size_ + 2 * padding_px,
    tfer.y_size_ + 2 * padding_py,
])
np.testing.assert_allclose(expected, actual)

### Actual Conversion and Check


In [None]:
y_train = tfer.transform_to_pixel(y_train)

In [None]:
# Convert units
X_t = mosaicker.get_search_zone(X_t)
X_t = tfer.transform_to_pixel(X_t)

In [None]:
# Ensure that the conversion back and forth is self-consistent
physical_recovered = tfer.transform_to_physical(X_t).copy()
physical_recovered['padding'] *= -1
physical_recovered = mosaicker.get_search_zone(physical_recovered)
np.testing.assert_allclose(
    X[['x_min', 'x_max', 'y_min', 'y_max']].values,
    physical_recovered[['x_min', 'x_max', 'y_min', 'y_max']].values,
    rtol=1e-6,
)

In [None]:
# Trimming also happens. However, this shouldn't actually do anything
# if we made our mosaic large enough, so we check that.
trimmed = tfer.handle_out_of_bounds(
    X_t['x_off'], X_t['y_off'],
    X_t['x_size'], X_t['y_size']
)
np.testing.assert_allclose(
    X_t[['x_off', 'y_off', 'x_size', 'y_size']].values.transpose(),
    trimmed,
)

## First Image

We'll test the first loop in greater detail than the others.


In [None]:
i = 0
row = X_t.iloc[i]
dataset = mosaicker.io_manager.open_dataset()

In [None]:
mosaic_img = dataset.ReadAsArray().transpose(1, 2, 0)

In [None]:
image_operator = mosaicker.processor.image_operator

### Search Region in the Context of the Full Mosaic


In [None]:
# Expected bounds
x_off = row['x_off']
y_off = row['y_off']
x_size = row['x_size']
y_size = row['y_size']

In [None]:
row_nopad = X.copy().iloc[i]
(
    x_off_nopad, y_off_nopad,
    x_size_nopad, y_size_nopad,
) = tfer.physical_to_pixel(
    row_nopad['x_min'], row_nopad['x_max'],
    row_nopad['y_min'], row_nopad['y_max'],
)

In [None]:
# Ensure that the with-padding and without-padding versions are not the same
assert not np.array_equal(
    np.array([
        x_off, y_off,
        x_size, y_size,
    ]),
    np.array([
        x_off_nopad, y_off_nopad,
        x_size_nopad, y_size_nopad,
    ]),
)

In [None]:
# Check that the conversion back and forth is self-consistent
physical_nopad = tfer.pixel_to_physical(
    x_off_nopad, y_off_nopad,
    x_size_nopad, y_size_nopad,
)
np.testing.assert_allclose(
    row_nopad[['x_min', 'x_max', 'y_min', 'y_max']].values.astype(float),
    np.array(physical_nopad),
    rtol=1e-6,
)

In [None]:
if settings['show_images']:
    fig = plt.figure(figsize=(20,10))
    ax = plt.gca()

    # Current mosaic
    ax.imshow(mosaic_img)

    # The first image location
    rect = patches.Rectangle(
        (x_off, y_off),
        x_size,
        y_size,
        linewidth = 3,
        facecolor = 'none',
        edgecolor = palette[0],
        label='with padding',
    )
    ax.add_patch(rect)

    # The non-padded first image location
    rect = patches.Rectangle(
        (x_off_nopad, y_off_nopad),
        x_size_nopad,
        y_size_nopad,
        linewidth = 3,
        facecolor = 'none',
        edgecolor = palette[1],
        label='no padding',
    )
    ax.add_patch(rect)

    ax.set_aspect('equal')

    ax.legend()

### Search Region Image


In [None]:
# The existing mosaic at this location
dst_img = mosaicker.processor.get_image_from_dataset(
    dataset, x_off, y_off, x_size, y_size)

This is plotted below with matched features.


In [None]:
# At this time we expect all data added to the mosaic to be within the bounds
# of the search region, if we're using approximate georeferencing
use_approximate_georeferencing = (
    'metadata_image_registrar' in preprocessor.named_steps
)
if use_approximate_georeferencing:
    assert dst_img.sum() == mosaic_img.sum()

In [None]:
# Here's a zoomed in version, so we know what we're looking at
row_train = y_train.iloc[0]
training_img = mosaicker.processor.get_image_from_dataset(
    dataset,
    row_train['x_off'], row_train['y_off'],
    row_train['x_size'], row_train['y_size']
)
if settings['show_images']:
    plt.imshow(training_img)

In [None]:
if settings['show_images']:
    plt.imshow(dst_img)

In [None]:
assert training_img.sum() == dst_img.sum(), (
    'Search region (dst_img) should encompass the training image'
)

### Search Region KeyPoints

We get these for later.


In [None]:
# Get the features from the original mosaic
dst_kp, dst_des = image_operator.detect_and_compute(dst_img)

In [None]:
# Transform the dst keypoints to mosaic frame
dst_pts = cv2.KeyPoint_convert(dst_kp)
dsframe_dst_pts = dst_pts + np.array([x_off, y_off])
dsframe_dst_des = copy.copy(dst_des)

In [None]:
if settings['show_images']:
    # Look at the image and its keypoints
    raster.Image(dst_img).show()

    fig = plt.gcf()
    ax = plt.gca()

    ax.scatter(
        dst_pts[:,0],
        dst_pts[:,1],
        color='none',
        edgecolor='w',
        linewidth=3,
        s=150,
    )

### New Image


In [None]:
src_img = utils.load_image(
    row['filepath'],
    dtype=mosaicker.dtype,
)

In [None]:
src_kp, src_des = image_operator.detect_and_compute(src_img)
src_pts = cv2.KeyPoint_convert(src_kp)

In [None]:
if settings['show_images']:
    # Look at the image and its keypoints
    raster.Image(src_img).show()

    fig = plt.gcf()
    ax = plt.gca()

    ax.scatter(
        src_pts[:,0],
        src_pts[:,1],
        color='none',
        edgecolor='w',
        linewidth=3,
        s=150,
    )

### Feature Matching


In [None]:
# Get and validate the transform predicted from feature matching
M = image_operator.find_homography(src_kp, src_des, dst_kp, dst_des)

In [None]:
# Inspect relationship
mask = image_operator.log['mask'].astype(bool)
valid_src_pts = image_operator.log['src_pts'][mask].reshape((mask.sum(), 2))
valid_dst_pts = image_operator.log['dst_pts'][mask].reshape((mask.sum(), 2))

In [None]:
if settings['show_images']:
    subplot_mosaic = [['dst_img', 'src_img']]
    fig = plt.figure(figsize=(20,10))
    ax_dict = fig.subplot_mosaic(subplot_mosaic)

    ax = ax_dict['dst_img']
    ax.imshow(dst_img)

    ax = ax_dict['src_img']
    ax.imshow(src_img)

    for i in range(valid_src_pts.shape[0]):

        con = patches.ConnectionPatch(
            xyA=valid_dst_pts[i],
            xyB=valid_src_pts[i],
            coordsA='data',
            coordsB='data',
            axesA=ax_dict['dst_img'],
            axesB=ax_dict['src_img'],
            color=palette[1],
            linewidth=3,
        )
        ax.add_artist(con)

In [None]:
# raster.Image(src_img).save('../test_data/feature_matching/src_0.tiff')
# raster.Image(dst_img).save('../test_data/feature_matching/dst_0.tiff')

In [None]:
image_operator.validate_homography(M)

### Warp the Source Image


In [None]:
# Warp the image being fit
warped_img = image_operator.warp(src_img, dst_img, M)

In [None]:
if settings['show_images']:
    raster.Image(warped_img[:, :, :3]).show(img='semitransparent_img')

In [None]:
# The warped image should have the same dimensions as the dst img
assert warped_img.shape[:2] == dst_img.shape[:2]

### Blend the images


In [None]:
blended_img = image_operator.blend(
    src_img=warped_img,
    dst_img=dst_img,
)

In [None]:
# Show
if settings['show_images']:
    raster.Image(blended_img[:, :, :3]).show(img='semitransparent_img')

### Save and look at the mosaic


In [None]:
mosaicker.processor.save_image_to_dataset(
    dataset, blended_img, x_off, y_off)

In [None]:
# Get the region of just the first image for comparison from before
training_img_region_after = mosaicker.processor.get_image_from_dataset(
    dataset,
    row_train['x_off'], row_train['y_off'],
    row_train['x_size'], row_train['y_size'],
)

In [None]:
# More content should have been added
assert training_img_region_after.sum() > training_img.sum()

In [None]:
# View
if settings['show_images']:
    subplot_mosaic = [['before', 'after']]
    fig = plt.figure(figsize=(20,10))
    ax_dict = fig.subplot_mosaic(subplot_mosaic)

    ax = ax_dict['before']
    raster.Image(training_img[:, :, :3]).show(img='semitransparent_img', ax=ax)

    ax = ax_dict['after']
    raster.Image(training_img_region_after[:, :, :3]).show(img='semitransparent_img', ax=ax)

### Warp the Keypoints


In [None]:
# Transform to local frame and then the full mosaic frame
src_pts = cv2.KeyPoint_convert(src_kp)
global_src_pts = cv2.perspectiveTransform(src_pts.reshape(-1, 1, 2), M).reshape(-1, 2)
global_src_pts += np.array([x_off, y_off])

In [None]:
# Store the transformed points for the next loop
dsframe_dst_pts = np.append(dsframe_dst_pts, global_src_pts, axis=0)
dsframe_dst_des = np.append(dsframe_dst_des, src_des, axis=0)

In [None]:
if settings['show_images']:
    fig = plt.figure()
    ax = plt.gca()

    sns.scatterplot(
        x=dsframe_dst_pts[:,0],
        y=dsframe_dst_pts[:,1],
        ax = ax,
    )

    rect = patches.Rectangle(
        (x_off, y_off),
        x_size,
        y_size,
        linewidth = 3,
        facecolor = 'none',
        edgecolor = palette[0],
    )
    ax.add_patch(rect)

    ax.set_xlim(0, dataset.RasterXSize)
    ax.set_ylim(dataset.RasterYSize, 0)
    ax.set_aspect('equal')

In [None]:
# Automated check that everything's in bounds
not_in_bounds = ~(
    (x_off <= dsframe_dst_pts[:,0] )
    & (dsframe_dst_pts[:,0] <= x_off + x_size)
    & (y_off <= dsframe_dst_pts[:,1] )
    & (dsframe_dst_pts[:,1] <= y_off + y_size)
)
assert not_in_bounds.sum() == 0

### Check the georeferencing


In [None]:
# Call the fn
warped_x_off, warped_y_off, warped_x_size, warped_y_size = \
    image_operator.warp_bounds(src_img, M)
warped_x_off += x_off
warped_y_off += y_off

In [None]:
# Convert to physical
warped_x_min, warped_x_max, warped_y_min, warped_y_max = tfer.pixel_to_physical(
    warped_x_off, warped_y_off, warped_x_size, warped_y_size)

In [None]:
warped_center = np.array([
    0.5 * (warped_x_min + warped_x_max),
    0.5 * (warped_y_min + warped_y_max),
])

In [None]:
# Compare to recorded
if not settings['use_raw_images']:

    # Get the recorded bounds
    recorded_x_min, recorded_x_max, recorded_y_min, recorded_y_max = \
        y_test.loc[row.name, ['x_min', 'x_max', 'y_min', 'y_max']]
    
    # Get the center
    recorded_center = np.array([
        0.5 * (recorded_x_min + recorded_x_max),
        0.5 * (recorded_y_min + recorded_y_max),
    ])
    
    # Check the centers
    assert np.linalg.norm(warped_center - recorded_center) < 600.

In [None]:
if settings['show_images']:
    fig = plt.figure(figsize=(20,10))
    ax = plt.gca()

    # The warped image location
    width = warped_x_max - warped_x_min
    height = warped_y_max - warped_y_min
    rect = patches.Rectangle(
        (warped_x_min, warped_y_min),
        width,
        height,
        linewidth = 3,
        facecolor = 'none',
        edgecolor = palette[0],
    )
    ax.add_patch(rect)
    ax.scatter(
        *warped_center,
        s=100,
        color=palette[0],
    )

    # The actual image location
    if not settings['use_raw_images']:
        rect = patches.Rectangle(
            (recorded_x_min, recorded_y_min),
            recorded_x_max - recorded_x_min,
            recorded_y_max - recorded_y_min,
            linewidth = 3,
            facecolor = 'none',
            edgecolor = palette[1],
        )
        ax.add_patch(rect)
        ax.scatter(
            *recorded_center,
            s=100,
            color=palette[1],
        )

    padding_for_this_plot = 0.1 * width
    ax.set_xlim(warped_x_min - padding_for_this_plot, warped_x_max + padding_for_this_plot)
    ax.set_ylim(warped_y_min - padding_for_this_plot, warped_y_max + padding_for_this_plot)

    ax.set_aspect('equal')

### Log Values


In [None]:
# Store for later comparison
log = {}
mosaicker.update_log(image_operator.log, target=log)
mosaicker.update_log(locals(), target=log)
log['i'] = 0
log['ind'] = row.name
log['return_code'] = 'success'

In [None]:
fit_values = {
    key: getattr(mosaicker, key)
    for key in mosaicker.__dir__()
    if key[-1] == '_' if isinstance(getattr(mosaicker, key), float)
}

## Next Image


In [None]:
i = 1
row1 = X_t.iloc[i]

### Preview keypoint selection


In [None]:
x_off1 = row1['x_off']
y_off1 = row1['y_off']
x_size1 = row1['x_size']
y_size1 = row1['y_size']

In [None]:
in_bounds1 = (
    (dsframe_dst_pts[:,0] >= x_off1)
    & (dsframe_dst_pts[:,1] >= y_off1)
    & (dsframe_dst_pts[:,0] <= x_off1 + x_size1)
    & (dsframe_dst_pts[:,1] <= y_off1 + y_size1)
)

In [None]:
assert in_bounds1.sum() > 0, \
    f'No image data in the search zone for index {row.name}'

In [None]:
dst_pts1 = dsframe_dst_pts[in_bounds1]
dst_des1 = dsframe_dst_des[in_bounds1]

In [None]:
# At this point in the loops, *all* the points should be in bounds, if we're doing approximate georeferencing
if use_approximate_georeferencing:
    assert (~in_bounds1).sum() == 0

### Call the typical function


In [None]:
row1_out = mosaicker.processor.process_row(
    1,
    row1.copy(),
    {
        'dataset': dataset,
        'transformer': tfer,
    },
)
log1 = mosaicker.processor.log
log1['i'] = 1
log1['ind'] = row1.name

In [None]:
# Freeze logging here so we can compare
mosaicker.processor.stop_logging()

In [None]:
assert row1_out['return_code'] == 'success', \
    'Image was not successfully combined.'

In [None]:
training_img_region_after2 = mosaicker.processor.get_image_from_dataset(
    dataset,
    row_train['x_off'], row_train['y_off'],
    row_train['x_size'], row_train['y_size'],
)

In [None]:
if settings['show_images']:
    subplot_mosaic = [['before', 'after']]
    fig = plt.figure(figsize=(20,10))
    ax_dict = fig.subplot_mosaic(subplot_mosaic)

    ax = ax_dict['before']
    raster.Image(training_img_region_after[:, :, :3]).show(img='semitransparent_img', ax=ax)

    ax = ax_dict['after']
    raster.Image(training_img_region_after2[:, :, :3]).show(img='semitransparent_img', ax=ax)

In [None]:
# Close dataset out
dataset.FlushCache()
dataset = None

In [None]:
# Clear out any existing files
clear_files()

# Full Process

Now we'll check if it runs for a subset


## Run


In [None]:
i = 0
n_loops = settings['n_loops']

In [None]:
# The actual calls
mosaicker2: mosaicking.SequentialMosaicker = \
    mosaic_maker.container.get_service('mosaicker')

In [None]:
mosaicker2.fit(
    X=y_train,
    approx_y=X,
)

In [None]:
assert mosaicker2.io_manager.output_filepaths['mosaic'] == check_fps['mosaic'],\
    'Mosaic filepath not correct'

In [None]:
y_pred = mosaicker2.predict(
    X.iloc[:n_loops],
)

## Evaluate


### File Existence


In [None]:
# Check for output files
for key, fp in check_fps.items():
    print(f'Checking for {key} ({fp})...')
    assert os.path.isfile(fp), f'File {key} ({fp}) not found.'

### Consistency with Above


In [None]:
# Check for consistency for fit values
for key, value in fit_values.items():
    print(f'Checking for {key}...')
    np.testing.assert_allclose(value, getattr(mosaicker2, key))

In [None]:
# Check for consistency for calculated values
# We skip these because they're referred to in multiple spots.
skip_check = ['dst_pts', 'src_pts']
for log_key in mosaicker2.log_keys:

    if log_key in skip_check:
        continue
    
    print(f'Checking {log_key}...')

    if log_key == 'snapshot':
        assert 'snapshot' in mosaicker2.logs[0]
        continue

    actual = [mosaicker2.logs[0][log_key], mosaicker2.logs[1][log_key]]
    expected = [log[log_key], log1[log_key]]
    
    try:
        np.testing.assert_allclose(actual, expected)
    except ValueError:
        for j, actual_j in enumerate(actual):
            try:
                np.testing.assert_allclose(actual_j, expected[j])
            except TypeError:
                actual_j = cv2.KeyPoint_convert(actual_j)
                expected_j = cv2.KeyPoint_convert(expected[j])
                np.testing.assert_allclose(actual_j, expected_j)   
    except np.exceptions.DTypePromotionError:
        for j, actual_j in enumerate(actual):
            assert actual_j == expected[j]

### Reported Success


In [None]:
# Check how many were successful
return_codes = y_pred['return_code']
n_good = (return_codes == 'success').sum()
n_bad = n_loops - n_good
assert n_good / n_loops > settings['acceptance_fraction'], (
    f"{n_bad} failures, success rate of {settings['acceptance_fraction']} "
    f'not met. Return codes are...\n{return_codes}'
)

### Referenced Positions


In [None]:
if y_pred.index.isin(y_test.index).sum() > 0:

    # Merge into a comparison dataframe
    y_pred_for_eval = y_pred.reindex(y_test.index)

    # Estimate the consistency with the manual geotransforms
    comp_cols = ['x_center', 'y_center']
    y_err = y_test[comp_cols] - y_pred_for_eval[comp_cols]
    err = np.sqrt((y_err['x_center']**2. + y_err['y_center']**2.).astype(float))

    if settings['show_images']:
        # Visualize the errors
        fig = plt.figure()
        ax = plt.gca()

        sns.scatterplot(
            x=np.arange(y_err.index.size),
            y=err,
            hue=np.arange(len(y_err)),
            ax=ax,
        )

        ax.set_ylim(0, ax.get_ylim()[1])

    # Check how bad the errors are
    n_egregious = (err > 300.).sum()
    assert n_egregious == 0, f'Found {n_egregious} egregious errors.'

### Log


In [None]:
log_df = pd.DataFrame(mosaicker2.logs)
log_df.set_index('ind')
assert len(log_df) == n_loops, (
    'Different number of log entries and loops. '
    f'{n_loops} loops, {len(log_df)} log entries'
)

In [None]:
# Check the saved log
log_df = pd.read_csv(check_fps['log'], index_col=0)
for col in ['ind', ]:
    assert len(log_df) == pd.unique(log_df[col]).size, \
        f'{col} values are non-unique: {log_df[col]}'
assert len(log_df) == n_loops, 'Log was not completely saved.'

### Referenced Images


In [None]:
# Check that the referenced image files exist and agree well-enough with
# the existing images
image_scorer = mosaic_maker.container.get_service('image_scorer')
for ind in y_pred.index:
    output_fp = io_manager.output_filepaths['referenced_images'].format(ind)
    input_fp = X.loc[ind, 'filepath']

    assert os.path.isfile(output_fp), f'File {output_fp} not found.'
    actual_image = raster.ReferencedImage.open(
        output_fp,
        cart_crs_code=settings['global']['crs'],
    )

    expected_image = raster.ReferencedImage.open(
        input_fp,
        cart_crs_code=settings['global']['crs'],
    )

    # Compare image shape
    np.testing.assert_allclose(
        actual_image.img_shape,
        expected_image.img_shape,
    )

    # Compare image bounds
    np.testing.assert_allclose(
        actual_image.cart_bounds,
        expected_image.cart_bounds,
    )

    # Compare image contents
    score_results = image_scorer.operate(
        actual_image.img_int, expected_image.img_int)
    score = score_results['score']
    assert score > settings['acceptance_threshold'], f'Image has a score of {score}'

# Restarting From Checkpoint


In [None]:
# Remove final file
os.remove(check_fps['mosaic'])

In [None]:
mosaicker2 = mosaic_maker.container.get_service('mosaicker')

In [None]:
# Remove things that save poorly as strings
bad_rep_keys = ['dst_img', 'src_img', 'dst_kp', 'src_kp', 'M']
mosaicker2.log_keys = [_ for _ in mosaicker2.log_keys if _ not in bad_rep_keys]

In [None]:
mosaicker2.fit(
    X=y_train,
    approx_y=X_t,
)

In [None]:
assert mosaicker2.i_start_ != 0, 'Not successfully starting from a later point.'

In [None]:
y_pred_checkpoint = mosaicker2.predict(
    X_t.iloc[:n_loops],
)

## Check output


In [None]:
# Check for output files
for key, fp in check_fps.items():
    print(f'Checking for {key} ({fp})...')
    assert os.path.isfile(fp), f'File {key} ({fp}) not found.'

In [None]:
# Check for consistency for fit values
for key, value in fit_values.items():
    np.testing.assert_allclose(value, getattr(mosaicker2, key))

In [None]:
# Check for consistency for calculated values
skip_check = ['dst_pts', 'src_pts', 'mask']
for log_key in mosaicker2.log_keys:

    if log_key in skip_check:
        continue
    
    print(f'Checking {log_key}...')

    if log_key == 'snapshot':
        assert 'snapshot' in mosaicker2.logs[0]
        continue

    actual = [mosaicker2.logs[0][log_key], mosaicker2.logs[1][log_key]]
    expected = [log[log_key], log1[log_key]]
    
    try:
        np.testing.assert_allclose(actual, expected)
    except ValueError:
        for j, actual_j in enumerate(actual):
            try:
                np.testing.assert_allclose(actual_j, expected[j])
            except TypeError:
                actual_j = cv2.KeyPoint_convert(actual_j)
                expected_j = cv2.KeyPoint_convert(expected[j])
                np.testing.assert_allclose(actual_j, expected_j)   
    except np.exceptions.DTypePromotionError:
        for j, actual_j in enumerate(actual):
            assert actual_j == expected[j]

In [None]:
pd.testing.assert_frame_equal(y_pred, y_pred_checkpoint)

In [None]:
return_codes = y_pred_checkpoint['return_code']
assert len(return_codes) == n_loops, (
    f'Different number of return codes and loops. {n_loops} loops, '
    f'{len(return_codes)} return codes'
)

# Cleanup


In [None]:
clear_files()