# Notes

If using perfect approximate georeferencing (exactgeo),
everything works perfectly,
even for large paddings (up to 2).


With the following settings we can get some pretty good results.

- Only referenced images.
- Exact georeferencing with small padding.

The following variations on training sample.

- Near perfect: 10 training samples per camera.
- Very good: 10 training samples for the nadir camera. 1 training sample per other camera.
- Good: 10 training samples for the nadir camera only.
- Okay: 5 training samples per camera.


With the following settings we can get some pretty good results.

- Only referenced images.
- Approximate georeferencing with 0.5 padding.
- 10 training samples per camera

The following variations on training sample.

- 1.0 padding


# Setup


## Imports


In [None]:
import os
import warnings

In [None]:
import numpy as np
import pandas as pd
import yaml

In [None]:
import matplotlib.pyplot as plt
import matplotlib.transforms as plt_transforms
import seaborn as sns

In [None]:
from night_horizons import utils

## Settings


In [None]:
with open('./config.yml', "r", encoding='UTF-8') as file:
    settings = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
local_settings = {
    'expected_count': 15000,
}
settings.update(local_settings)

In [None]:
# Until I reinstall seaborn, this helps things be less annoying...
warnings.simplefilter(action='ignore', category=FutureWarning)

# Extract, Transform, Load


## Load Data


In [None]:
# Discover y pred data
data = {}
y_pred_fps = utils.discover_data(settings['data_dir'], ['csv'], 'y_pred')
data['y_pred'] = [pd.read_csv(_, index_col=0) for _ in y_pred_fps]

In [None]:
# Get the other data
for var_name in ['y_train', 'y_test', 'X_train', 'X_test', 'y_pred']:
    var_fps = y_pred_fps.str.replace('y_pred', var_name)
    data[var_name] = [pd.read_csv(_, index_col=0) for _ in var_fps]

In [None]:
# Now get settings
mosaic_settings_fps = y_pred_fps.str.replace('y_pred.csv', 'settings.yaml')
nb_settings_fps = y_pred_fps.str.replace('y_pred.csv', 'nbsettings.yaml')

mosaic_settings = []
nb_settings = []
for i, settings_fp in enumerate(mosaic_settings_fps):
    with open(settings_fp, "r", encoding='UTF-8') as file:
        mosaic_settings_i = yaml.load(file, Loader=yaml.FullLoader)
    mosaic_settings.append(mosaic_settings_i)

    with open(nb_settings_fps.iloc[i], "r", encoding='UTF-8') as file:
        nb_settings_i = yaml.load(file, Loader=yaml.FullLoader)
    nb_settings.append(nb_settings_i)
    

In [None]:
# Finally, the logs
log_fps = y_pred_fps.str.replace('y_pred.csv', 'log.csv')
logs = [pd.read_csv(_, index_col=0) for _ in log_fps]

## Extract Quantities


In [None]:
df = pd.DataFrame({'y_pred_fp': y_pred_fps})

### Settings


In [None]:
# Save identifier
df['filename'] = df['y_pred_fp'].apply(
    os.path.basename
).str.replace('_y_pred.csv', '.tiff')

In [None]:
# Relevant settings
ts_cols = [f'cam{j}_train_size' for j in np.arange(3)]
for i, nb_settings_i in enumerate(nb_settings):
    for j in np.arange(3):
        df.loc[i, ts_cols[j]] = nb_settings_i['train_size'][j]

    for setting in ['padding_fraction', 'use_approximate_georeferencing']:
        df.loc[i, setting] = nb_settings_i[setting]

df['train_size'] = df[ts_cols].sum(axis=1)

### Evaluation Metrics


In [None]:
# Return codes
for i, log in enumerate(logs):
    ret_counts = log['return_code'].value_counts().astype(int)
    cols = [_ + '_count' for _ in ret_counts.index]
    df.loc[i, cols] = ret_counts.values
    df.loc[i, 'count'] = len(log)

In [None]:
# Fractions
for col in df.columns:
    if not '_count' in col:
        continue
    df[col.replace('count', 'frac')] = df[col] / df['count']

In [None]:
# Offset
for i, y_pred in enumerate(data['y_test']):
    df.loc[i, 'offset_median'] = y_pred['offset'].median()
    df.loc[i, 'offset_low'] = np.nanpercentile(y_pred['offset'], 16.)
    df.loc[i, 'offset_high'] = np.nanpercentile(y_pred['offset'], 84.)

In [None]:
# Durations
for i, log in enumerate(logs):
    df.loc[i, 'avg_join_duration'] = log['duration'].median()
    df.loc[i, 'avg_iter_duration'] = log['iter_duration'].median()
    df.loc[i, 'total_iter_duration'] = log['iter_duration'].sum()
df['expected_iter_duration_hr'] = np.round(
    df['avg_iter_duration'] * settings['expected_count'] / 3600.,
    1
)

# Summarize


In [None]:
x_vars=['train_size', 'padding_fraction']
y_vars=[
    'success_frac',
    'offset_median', 'offset_low', 'offset_high',
    'expected_iter_duration_hr',
]

In [None]:
scales = {
    'offset_median': 'log',
    'offset_low': 'log',
    'offset_high': 'log',
}
limits = {
    'success_frac': (0.9, 1.0),
    'offset_median': (1, df['offset_high'].max()),
    'offset_low': (1, df['offset_high'].max()),
    'offset_high': (1, df['offset_high'].max()),
}

In [None]:
acceptance_cuts = {
    'success_frac': (0.97, 1.0),
    'offset_median': (0., 100.),
    'expected_iter_duration_hr': (0., 7.),
    'train_size': (0, 40),
}

In [None]:
# Make the overall grid
g = sns.PairGrid(
    data=df,
    x_vars=x_vars,
    y_vars=y_vars,
    hue='use_approximate_georeferencing',
)
g.map(sns.scatterplot)
g.add_legend()

# Per-axis adjustments
for i, axes in enumerate(g.axes):
    y_var = y_vars[i]
    for j, ax in enumerate(axes):
        x_var = x_vars[j]

        if y_var in acceptance_cuts:
            ax.fill_between(
                x=[0, 1],
                y1=[acceptance_cuts[y_var][0], ] * 2,
                y2=[acceptance_cuts[y_var][1], ] * 2,
                transform = plt_transforms.blended_transform_factory(
                    ax.transAxes,
                    ax.transData,
                ),
                color='k',
                alpha=0.2,
            )
        if x_var in acceptance_cuts:
            ax.fill_betweenx(
                x1=[acceptance_cuts[x_var][0], ] * 2,
                x2=[acceptance_cuts[x_var][1], ] * 2,
                y=[0, 1],
                transform = plt_transforms.blended_transform_factory(
                    ax.transData,
                    ax.transAxes,
                ),
                color='k',
                alpha=0.2,
            )
        
        if y_var in scales:
            ax.set_yscale(scales[y_var])
        if y_var in limits:
            ax.set_ylim(limits[y_var])

        if x_var in scales:
            ax.set_xscale(scales[x_var])
        if x_var in limits:
            ax.set_xlim(limits[x_var])

In [None]:
# Identify parametetr sets that fit the criteria
valid = np.ones(df.index.size).astype(bool)
for var in acceptance_cuts:
    valid = valid & (df[var] >= acceptance_cuts[var][0])
    valid = valid & (df[var] <= acceptance_cuts[var][1])
valid_df = df.loc[valid]

In [None]:
valid_df