# Less-Referenced Mosaic

This notebook provides a cross-section of the Less-Referenced Mosaic creation process.

# Setup

## Imports

In [None]:
import copy
import os

In [None]:
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
import yaml

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
sns.set_style('white')

In [None]:
from night_horizons import utils, preprocess, reference, mosaic, raster, pipelines

## Settings

In [None]:
with open('./config.yml', "r", encoding='UTF-8') as file:
    settings = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
local_settings = {
    'include_raw_images': True,
    'mosaic_filepath': 'mosaics/FH135.tiff',
    'random_state': 16849,
    'train_size': 1,

    # This set of choices assumes we have really good starting positions.
    # This is useful for debugging.
    'padding_factor': 1.,
}
settings.update(local_settings)

## Parse Settings

In [None]:
settings['mosaic_filepath'] = os.path.join(settings['data_dir'], settings['mosaic_filepath'])

In [None]:
for key, relpath in settings['paths_relative_to_data_dir'].items():
    settings[key] = os.path.join(settings['data_dir'], relpath)

In [None]:
random_state = check_random_state(settings['random_state'])

In [None]:
palette = sns.color_palette(settings['color_palette'])

In [None]:
crs = settings['crs']

# Prepare Data
The first part is to prepare the data (AKA extract/transform/load).

## Get filepaths

In [None]:
referenced_fps = utils.discover_data(settings['referenced_images_dir'], ['tif', 'tiff'], pattern=r'Geo\s\d+_\d.tif')

In [None]:
raw_fps = utils.discover_data(settings['images_dir'], ['tif', 'tiff', 'raw'])

## Train-Test Split

We split the data into training data (data that is georeferenced) and test data (data that is not georeferenced, or for which we don't use the georeferencing information when we're building the models).

We set the train size to some small number, because ideally the user only needs to georeference a couple of images manually.

In [None]:
referenced_fps_train, referenced_fps_test = train_test_split(
    referenced_fps,
    train_size=settings['train_size'],
    random_state=settings['random_state'],
    shuffle=True,
)

## Combine Referenced and Raw

In [None]:
# Adjust the index so we don't have duplicates
raw_fps.index += referenced_fps.size

In [None]:
# Actual combination
fps_train = referenced_fps_train
fps_test = referenced_fps_test
if settings['include_raw_images']:
    fps = pd.concat([referenced_fps_test, raw_fps])
else:
    fps = referenced_fps_test

## Initial, Approximate Georeferencing
We use the sensor (high-altitude balloon) positions to provide approximate georeferencing, which will be useful for saving computational time when building the unreferenced mosaic.

In [None]:
# This is the pipeline for approximate georeferencing
sensor_georeference_pipeline = Pipeline([
    ('nitelite', preprocess.NITELitePreprocesser(
        output_columns=['filepath', 'sensor_x', 'sensor_y', 'camera_num'],
        crs=crs,
        unhandled_files='warn and drop',
    )),
    ('sensor_georeference', reference.SensorGeoreferencer(crs=crs, passthrough=['filepath', 'camera_num'])),
])

sensor_georeference_pipeline_y = preprocess.GeoTIFFPreprocesser(crs=crs)

In [None]:
# Get the geo-transforms used for training
y_train = sensor_georeference_pipeline_y.fit_transform(fps_train)
y_test = sensor_georeference_pipeline_y.fit_transform(fps_test)

In [None]:
# Train the pipeline
sensor_georeference_pipeline.fit(
    fps_train,
    y_train,
    nitelite__img_log_fp=settings['img_log_fp'],
    nitelite__imu_log_fp=settings['imu_log_fp'],
    nitelite__gps_log_fp=settings['gps_log_fp'],
)

In [None]:
# Get the approximate georeferences for the data we'll be testing
X = sensor_georeference_pipeline.predict(fps)

In [None]:
# Drop the files that were bad from the test data entirely
fps = fps.loc[X.index]
y_test = y_test.loc[y_test.index.isin(X.index)]

In [None]:
# The score for the fit gives us an estimate of the error from the approximate georeferencing
first_pass_error = sensor_georeference_pipeline.score(fps_train, y_train)
padding = settings['padding_factor'] * first_pass_error

In [None]:
# Check that our test Xs and ys align
n_bad = (y_test['filepath'] != X.loc[y_test.index, 'filepath']).sum()
assert n_bad == 0, f'{n_bad} wrong filepaths'

# The Mosaic

## Determine order of iteration

In [None]:
# Camera order of iteration--1 is the nader camera, so that's first
X['camera_order'] = X['camera_num'].map({0: 1, 1: 0, 2: 2})

In [None]:
# Proximity order of iteration
center_coords = y_train[['x_center', 'y_center']].mean()
offset = X[['x_center', 'y_center']] - center_coords
X['d_to_center'] = np.linalg.norm(offset, axis=1)

In [None]:
# Actual sort
X_iter = X.sort_values(['camera_order', 'd_to_center'])
X_iter['order'] = np.arange(len(X_iter))
iter_inds = X.index

## Initialization

In [None]:
less_reffed_mosaic = mosaic.LessReferencedMosaic(
    filepath=settings['mosaic_filepath'],
    padding=padding,
    file_exists='overwrite',
)

In [None]:
# This creates the dataset and adds the referenced mosaic.
less_reffed_mosaic.fit(
    X=y_train,
    approx_y=X_iter[['filepath'] + preprocess.GEOTRANSFORM_COLS],
)

## Run

In [None]:
y_pred = less_reffed_mosaic.predict(
    X_iter[['filepath', ] + preprocess.GEOTRANSFORM_COLS],
)

In [None]:
less_reffed_mosaic.close()