# Referenced Mosaic


# Setup


## Imports


In [None]:
import os
import shutil

In [None]:
import numpy as np
import pandas as pd
from sklearn.utils import check_random_state
import yaml

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')

In [None]:
from night_horizons import mosaickers, preprocessers, utils

## Settings


In [None]:
with open('./config.yml', "r", encoding='UTF-8') as file:
    settings = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
local_settings = {
    'out_dir': 'mosaics/referenced',
    'overwrite': True,
    'random_state': 16849,
    'camera_nums': [0, 1, 2, ]
}
settings.update(local_settings)

## Parse Settings


In [None]:
settings['out_dir'] = os.path.join(settings['data_dir'], settings['out_dir'])

In [None]:
for key, relpath in settings['paths_relative_to_data_dir'].items():
    settings[key] = os.path.join(settings['data_dir'], relpath)

In [None]:
random_state = check_random_state(settings['random_state'])

In [None]:
referenced_fps = {
    i: utils.discover_data(
        settings['referenced_images_dir'],
        ['tif', 'tiff'],
        pattern=r'Geo\s\d+_' + f'{i}.tif'
    )
    for i in settings['camera_nums']
}
referenced_fps = pd.concat(referenced_fps)

In [None]:
assert len(referenced_fps) > 0

In [None]:
test_filenames = {
    'mosaic': 'mosaic.tiff',
    'settings': 'settings.yaml', 
    'checkpoint': os.path.join(
        'checkpoints',
        f"mosaic_i{100:06d}.tiff"
    ),
}
test_fps = {
    key: os.path.join(settings['out_dir'], value)
    for key, value in test_filenames.items()
}

# Extract/Transform/Load


In [None]:
preprocessing = preprocessers.GeoTIFFPreprocesser()

In [None]:
X = preprocessing.fit_transform(
    referenced_fps,
)

# Build Mosaic


In [None]:
if os.path.isdir(settings['out_dir']) and settings['overwrite']:
    shutil.rmtree(settings['out_dir'])

In [None]:
mosaicker = mosaickers.Mosaicker(settings['out_dir'])

In [None]:
mosaicker.fit_transform(X)

# Evaluate


## Check Output Exists


In [None]:
for key, fp in test_fps.items():
    if len(referenced_fps) < 100 and key == 'checkpoint':
        continue
    assert os.path.isfile(fp), f'Did not find file {key}: {fp}'

## Individual Inspection


In [None]:
ind = random_state.choice(X.index)
row = X.loc[ind]

In [None]:
original_img = utils.load_image(row['filepath'])
dataset = mosaicker.open_dataset()
mosaic_img = mosaicker.get_image_with_bounds(
    dataset, row['x_min'], row['x_max'], row['y_min'], row['y_max'])

In [None]:
subplot_mosaic = [['original', 'mosaic']]
n_rows = len(subplot_mosaic)
n_cols = len(subplot_mosaic[0])

fig = plt.figure(figsize=(10*n_cols, 10*n_rows))
ax_dict = fig.subplot_mosaic(subplot_mosaic)

ax = ax_dict['original']
ax.imshow(original_img)

ax = ax_dict['mosaic']
ax.imshow(mosaic_img)

## Overall Scores


In [None]:
r_median = mosaicker.score(X)

In [None]:
sns.histplot(mosaicker.scores_)

In [None]:
sns.scatterplot(
    x=np.arange(len(mosaicker.scores_)),
    y=mosaicker.scores_,
)

## Calculate the area


In [None]:
full_mosaic_img = mosaicker.get_image(dataset, 0, 0, mosaicker.x_size_, mosaicker.y_size_)

In [None]:
is_not_empty = ~np.isclose(full_mosaic_img, 0.)

In [None]:
full_area = is_not_empty.sum() * -mosaicker.pixel_height_ * mosaicker.pixel_width_
full_area_km = full_area / (1000.)**2.
print(f'The area covered by the referenced mosaic is {full_area_km:.2g} km^2')

In [None]:
# From google
actual_area_km = 953.

In [None]:
# We only check if the area is large enough because we image the area around Indianapolis too
assert full_area_km > actual_area_km

## Cleanup


In [None]:
dataset.FlushCache()
dataset = None