# Sensor-based Georeferencing

# Setup

## Imports

In [None]:
import os

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
import yaml

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

In [None]:
from night_horizons import utils, preprocess, reference

## Settings

In [None]:
with open('./config.yml', "r", encoding='UTF-8') as file:
    settings = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
local_options = {
    'random_state': 16849,
    'train_size': 0.8,
}
settings.update(local_options)

## Parse Settings

In [None]:
for key, relpath in settings['paths_relative_to_data_dir'].items():
    settings[key] = os.path.join(settings['data_dir'], relpath)

In [None]:
random_state = check_random_state(settings['random_state'])

In [None]:
referenced_fps = utils.discover_data(settings['referenced_images_dir'], ['tif', 'tiff'], pattern=r'Geo\s\d+_\d.tif')

In [None]:
palette = sns.color_palette(settings['color_palette'])

In [None]:
crs = settings['crs']

# Extract/Transform/Load

## X values

In [None]:
metadata_preprocessing = preprocess.NITELitePreprocessor(
    unhandled_files='warn and drop',
)


In [None]:
X = metadata_preprocessing.fit_transform(
    referenced_fps,
    img_log_fp = settings['img_log_fp'],
    imu_log_fp = settings['imu_log_fp'],
    gps_log_fp = settings['gps_log_fp'],
)

## Y values
We use X['filepath'] as input because not all the referenced filepaths pass through successfully.
We only want to look at ys for which have xs.

In [None]:
y_pipeline = preprocess.GeoTIFFPreprocessor()

In [None]:
y = y_pipeline.fit_transform(X['filepath'])

# Data Split

In [None]:
train_inds, test_inds = train_test_split(X.index, train_size=settings['train_size'])

# Model

## Sensor Georeferencing

In [None]:
model = reference.SensorGeoreferencer()

In [None]:
model.fit(
    X=X.loc[train_inds],
    y=y.loc[train_inds, preprocess.GEOTRANSFORM_COLS],
)

In [None]:
y_pred = model.predict(X.loc[test_inds])

## Sensor Georeferencing, No Direct Estimate

In [None]:
model_nod = reference.SensorGeoreferencer(use_direct_estimate=False)

In [None]:
model_nod.fit(
    X=X.loc[train_inds],
    y=y.loc[train_inds, preprocess.GEOTRANSFORM_COLS],
)

In [None]:
y_pred_nod = model_nod.predict(X.loc[test_inds])

# Evaluate

## Predictions

In [None]:
fig = plt.figure()
ax = plt.gca()

sns.scatterplot(
    y_pred,
    x = 'x_min',
    y = 'y_max',
    label='predicted',
    ax=ax,
)
sns.scatterplot(
    y.loc[test_inds],
    x = 'x_min',
    y = 'y_max',
    label='actual',
    ax=ax,
)

ax.set_aspect('equal')

## Error

In [None]:
diffs = y_pred[['x_min', 'y_max']] - y.loc[test_inds, ['x_min', 'y_max']]
y_pred['offset'] = np.sqrt((diffs**2.).sum(axis=1))

In [None]:
sns.histplot(
    y_pred,
    x='offset',
)

## Error Estimate

In [None]:
y_pred['offset_error'] = np.abs(y_pred['spatial_error'] - y_pred['offset'])

In [None]:
sns.swarmplot(
    y_pred,
    x='camera_num',
    y='offset_error',
    palette=palette,
)

## Error Estimate, No Direct Estimate

In [None]:
diffs = y_pred_nod[['x_min', 'y_max']] - y.loc[test_inds, ['x_min', 'y_max']]
y_pred_nod['offset'] = np.sqrt((diffs**2.).sum(axis=1))
y_pred_nod['offset_error'] = np.abs(y_pred_nod['spatial_error'] - y_pred_nod['offset'])

In [None]:
sns.swarmplot(
    y_pred_nod,
    x='camera_num',
    y='offset_error',
    palette=palette,
)