# Referenced Mosaic

# Setup

## Imports

In [None]:
import os

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
import yaml

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')

In [None]:
from night_horizons import utils, preprocess, mosaic, raster

## Settings

In [None]:
with open('./config.yml', "r", encoding='UTF-8') as file:
    settings = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
local_settings = {
    'mosaic_filepath': 'mosaics/referenced.tiff',
    'overwrite': True,
    'random_state': 16849,
}
settings.update(local_settings)

## Parse Settings

In [None]:
settings['mosaic_filepath'] = os.path.join(settings['data_dir'], settings['mosaic_filepath'])

In [None]:
for key, relpath in settings['paths_relative_to_data_dir'].items():
    settings[key] = os.path.join(settings['data_dir'], relpath)

In [None]:
random_state = check_random_state(settings['random_state'])

In [None]:
referenced_fps = {
    i: utils.discover_data(
        settings['referenced_images_dir'],
        ['tif', 'tiff'],
        pattern=r'Geo\s\d+_' + f'{i}.tif'
    )
    for i in range(3)
}
referenced_fps = pd.concat(referenced_fps)

In [None]:
assert len(referenced_fps) > 0

# Extract/Transform/Load

In [None]:
preprocessing = preprocess.GeoTIFFPreprocesser()

In [None]:
X = preprocessing.fit_transform(
    referenced_fps,
)

# Build Mosaic

In [None]:
if os.path.isfile(settings['mosaic_filepath']) and settings['overwrite']:
    os.remove(settings['mosaic_filepath'])

In [None]:
model = mosaic.ReferencedMosaic(settings['mosaic_filepath'])

In [None]:
model.fit_transform(X)

# Evaluate

## Individual Inspection

In [None]:
ind = random_state.choice(X.index)
row = X.loc[ind]

In [None]:
original_img = utils.load_image(row['filepath'])
mosaic_img = model.get_image_with_bounds(row['x_min'], row['x_max'], row['y_min'], row['y_max'])

In [None]:
subplot_mosaic = [['original', 'mosaic']]
n_rows = len(subplot_mosaic)
n_cols = len(subplot_mosaic[0])

fig = plt.figure(figsize=(10*n_cols, 10*n_rows))
ax_dict = fig.subplot_mosaic(subplot_mosaic)

ax = ax_dict['original']
ax.imshow(original_img)

ax = ax_dict['mosaic']
ax.imshow(mosaic_img)

## Overall Scores

In [None]:
r_median = model.score(X)

In [None]:
sns.histplot(model.scores_)

In [None]:
sns.scatterplot(
    x=np.arange(len(model.scores_)),
    y=model.scores_,
)

TODO: For a proper comparison between the less-referenced and the referenced mosaics, we must aim to replicate the order at least partially.

## Calculate the area

In [None]:
full_mosaic_img = model.get_image(0, 0, model.x_size_, model.y_size_)

In [None]:
is_not_empty = ~np.isclose(full_mosaic_img, 0.)

In [None]:
full_area = is_not_empty.sum() * -model.pixel_height_ * model.pixel_width_
full_area_km = full_area / (1000.)**2.
print(f'The area covered by the referenced mosaic is {full_area_km:.2g} km^2')

In [None]:
# From google
actual_area_km = 953.

In [None]:
# We only check if the area is large enough because we image the area around Indianapolis too
assert full_area_km > actual_area_km

# Close

In [None]:
model.close()