# Sensor-based Georeferencing

# Setup

## Imports

In [None]:
import os

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

In [None]:
from night_horizons import preprocess, reference

## Settings

In [None]:
settings = {
    'image_dir': '/Users/Shared/data/nitelite/images/220513-FH135/raw',
    'referenced_image_dir': '/Users/Shared/data/nitelite/images/220513-FH135/referenced',
    'img_log_fp': '/Users/Shared/data/nitelite/metadata/220513-FH135/image.log',
    'imu_log_fp': '/Users/Shared/data/nitelite/metadata/220513-FH135/PresIMULog.csv',
    'gps_log_fp': '/Users/Shared/data/nitelite/metadata/220513-FH135/GPSLog.csv',
}

## Parse Settings

In [None]:
fps = preprocess.discover_data(settings['image_dir'], ['raw', 'tif', 'tiff'])
referenced_fps = preprocess.discover_data(settings['referenced_image_dir'], ['tif', 'tiff'], pattern=r'Geo\s\d+_\d.tif')

# Extract/Transform/Load

## X values

In [None]:
metadata_preprocessing = preprocess.NITELitePreprocesser(['filepath', 'sensor_x', 'sensor_y'])

In [None]:
X = metadata_preprocessing.fit_transform(
    referenced_fps,
    img_log_fp = settings['img_log_fp'],
    imu_log_fp = settings['imu_log_fp'],
    gps_log_fp = settings['gps_log_fp'],
)

## Y values
We use X['filepath'] as input because not all the referenced filepaths pass through successfully.
We only want to look at ys for which have xs.

In [None]:
y_pipeline = preprocess.GeoTIFFPreprocesser()

In [None]:
y = y_pipeline.fit_transform(X['filepath'])

# Model

In [None]:
train_inds, test_inds = train_test_split(X.index, test_size=0.2)

In [None]:
model = reference.SensorGeoreferencer()

In [None]:
model.fit(
    X=X.loc[train_inds, ['sensor_x', 'sensor_y']],
    y=y.loc[train_inds, preprocess.GEOTRANSFORM_COLS],
)

In [None]:
y_pred = model.predict(X.loc[test_inds, ['sensor_x', 'sensor_y']])

# Evaluate

In [None]:
diffs = y_pred[['x_min', 'y_max']] - y.loc[test_inds, ['x_min', 'y_max']]
offsets = np.sqrt((diffs**2.).sum(axis=1))

In [None]:
sns.histplot(offsets)

In [None]:
fig = plt.figure()
ax = plt.gca()

sns.scatterplot(
    y_pred,
    x = 'x_min',
    y = 'y_max',
    label='predicted',
    ax=ax,
)
sns.scatterplot(
    y.loc[test_inds],
    x = 'x_min',
    y = 'y_max',
    label='actual',
    ax=ax,
)

ax.set_aspect('equal')