# Validation
Trying to understand what metrics correspond to a good fit.

# Imports

In [None]:
# Native python
import copy
import os

In [None]:
# External
import cv2
import numpy as np
from osgeo import gdal
import pandas as pd
import scipy
import sklearn.model_selection
import tqdm
import tqdm.notebook
import pyproj

In [None]:
# Plotting
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.patheffects as patheffects
import seaborn as sns
sns.set_style('white')
import folium

In [None]:
# Custom scripts
from nitelite_mapmaker import mapmaker, georeference, observations, data

# Settings

In [None]:
settings = dict(
    # Data architecture
    flight_name = '220513-FH135',
    data_dir = '/Users/Shared/data/nitelite',
    google_drive_dir = '/Users/zhafensaavedra/Google Drive/Shared drives/NITELite/Data & Analysis',
    flight_subdir = 'Old NITELite Flights/220513-FH135',
    reffed_subdir = 'QGIS FH135/FH135 Main Project/Main Geo Files',
    img_log_filename = 'image.log',
    imu_log_filename = 'OBC/PresIMULog.csv',
    gps_log_filename = 'OBC/GPSLog.csv',
      
    # Choices for what to process
    camera_num = 1,
    # test_size = 0.2,
    # overwrite_coords_for_reffed = True,
    random_state = 165741,

    # Choices for validation parameters
    pad_width = 10,
    
    # # Data filter choices
    # # gyro_mag_cut = 0.5, # Corresponds to ~84th percentile
    # gyro_mag_cut = 0.02, # Based on visual inspection
    # # percent_for_landed = 95.,
    # percent_for_cruising = 85.,
    # # mult_of_std_for_steady = 2.,
    # # rolling_window_in_min = 1.,
    # homography_det_min = 0.5, # Only let the image shrink or scale by up to a factor of 2
    
    # # Mosaicking choices
    # allotted_memory = 2., # In GB
    # n_tiles_guess = 16,
    # reset_mosaic = False,
    # ground_alt = 220.,
)

# Set Up

## Settings Parsing

In [None]:
# The camera has an according long number
settings['camera_long_num'] = settings['camera_num'] + 23085686

In [None]:
# Data architecture processing
settings['image_dir'] = os.path.join(
    settings['data_dir'],
    'images',
    settings['flight_name'],
    str(settings['camera_long_num'])
)
settings['metadata_dir'] = os.path.join(
    settings['google_drive_dir'],
    settings['flight_subdir'],
    'data',
)
settings['referenced_dir'] = os.path.join(
    settings['google_drive_dir'],
    settings['reffed_subdir'],
)
settings['mosaic_dir'] = os.path.join(
    settings['data_dir'],
    'mosaics',
    settings['flight_name'],
)

## Object Creation and Preprocessing

In [None]:
# Create the main mapmaker object
mm = mapmaker.Mapmaker(
    image_dir=settings['image_dir'],
    img_log_fp=os.path.join(settings['metadata_dir'], settings['img_log_filename']),
    imu_log_fp=os.path.join(settings['metadata_dir'], settings['imu_log_filename']),
    gps_log_fp=os.path.join(settings['metadata_dir'], settings['gps_log_filename']),
    referenced_dir=settings['referenced_dir']
)

In [None]:
# General metadata loading
mm.prep()

In [None]:
mm.flight.update_metadata_with_cart_bounds()

In [None]:
metadata = mm.flight.metadata
referenced = metadata.loc[metadata['manually_referenced_fp'].notna()].copy()

In [None]:
# Color palette
palette = sns.color_palette('deep')

In [None]:
rng = np.random.default_rng(seed=settings['random_state'])

# Visual Relationship Between Metrics

## Relationship Between Offset and Metrics

In [None]:
metrics = ['ccoeff', 'sqdiff']
metric_flags = [cv2.TM_CCOEFF_NORMED, cv2.TM_SQDIFF_NORMED]

In [None]:
offset_data = {
    'pr': [],
    'r': [],
}
for i, ind in enumerate(tqdm.notebook.tqdm(referenced.index)):

    # Load the observation
    obs_i = mm.flight.get_referenced_observation(ind)

    # Don't include images with weird offsets.
    dx, dy = obs_i.get_pixel_widths()
    if dx > 10:
        continue

    # Pad the image in preparation for autocorrelation
    pad_width = settings['pad_width']
    padded_img = np.pad(obs_i.img_int, ((pad_width, pad_width), (pad_width, pad_width), (0, 0)), constant_values=0)

    # Retrieve pixel distances
    pxs = np.arange(-pad_width, pad_width+1)
    pys = np.arange(-pad_width, pad_width+1)
    pxs_mesh, pys_mesh = np.meshgrid(pxs, pys)
    prs = np.sqrt(pxs_mesh**2. + pys_mesh**2.)

    # Retrieve physical distances
    rs = np.sqrt((dx * pxs_mesh)**2. + (dy * pys_mesh)**2.)

    # Store
    offset_data['pr'].append(prs.flatten())
    offset_data['r'].append(rs.flatten())

    # Template matching calculation
    for j, metric in enumerate(metrics):
        metric_matrix = cv2.matchTemplate(obs_i.img_int, padded_img, metric_flags[j])
        offset_data.setdefault(metric, []).append(metric_matrix.flatten())

In [None]:
# Format
for key, item in offset_data.items():
    offset_data[key] = np.concatenate(item)
offset_df = pd.DataFrame(offset_data)

In [None]:
g = sns.PairGrid(offset_df)
g.map_offdiag(sns.histplot)
g.map_diag(sns.histplot)

g.diag_vars
for i, ax_row in enumerate(g.axes):
    for j, ax in enumerate(ax_row):

        # Diag
        if i == j:
            continue

        # Offdiag
        if ax.get_ylabel() == 'ccoeff':
            ax.set_ylim(0, 1)

In [None]:
xs = offset_df['r']
binned_median = scipy.stats.binned_statistic(
    xs,
    offset_df['ccoeff'],
    statistic='median',
    bins=32,
)
binned_low = scipy.stats.binned_statistic(
    xs,
    offset_df['ccoeff'],
    statistic=lambda x: np.percentile(x, 16),
    bins=32,
)
binned_high = scipy.stats.binned_statistic(
    xs,
    offset_df['ccoeff'],
    statistic=lambda x: np.percentile(x, 84),
    bins=32,
)

In [None]:
bin_centers = 0.5 * (binned_median.bin_edges[1:] + binned_median.bin_edges[:-1])

In [None]:
fig = plt.figure()
ax = plt.gca()

ax.plot(
    bin_centers,
    binned_median.statistic,
    color = 'k',
    linewidth = 3,
)
ax.fill_between(
    bin_centers,
    binned_low.statistic,
    binned_high.statistic,
    alpha = 0.5,
    color = 'k',
)

ax.set_ylim(0, 1)
ax.set_xlim(binned_median.bin_edges[0], binned_median.bin_edges[-1])

ax.set_xlabel('offset [m]')
ax.set_ylabel('correlation coefficient (aka "r")')

In [None]:
interp = scipy.interpolate.interp1d(bin_centers, binned_median.statistic)
[interp(_) for _ in np.arange(10, 80, 10)]

This gives a decent understanding for now.
Rule of thumb for interpreting our dataset:
* $r > 0.8$: accurate to within 10 m
* $r > 0.7$: accurate to within 20 m
* $r \sim 0.5$: might as well be uncorrelated