# Pipeline for estimating lakes area

### 0. Import libraries and load data

In [1]:
%reload_ext autoreload
%matplotlib inline
%autoreload 2

from src.model_trainer import *
from src.data_loader import *
from src.lake_analyzer import *

import numpy as np
import pandas as pd
from tqdm import tqdm

TRAIN_LAKES = ["abert", "mono", "nakuru"]
TEST_LAKES = ["walker"]
THRESHOLDS_NUMBER = 10


In [2]:
train_data, data_indices = load_lakes(TRAIN_LAKES)
test_data, _ = load_lakes(TEST_LAKES)

### 1. Train and optimize lake detector with cross-validation

In [3]:
f1_scores = []
kappa_scores = []

# define the space for thresholds
thresholds = [
    np.percentile(get_water_index_values(train_data)[2], percentile)
    for percentile in np.linspace(20, 100, THRESHOLDS_NUMBER)
]

# go over each threshold
for threshold in tqdm(thresholds):
    # compute the average metric for the current threshold
    avg_f1, avg_kappa = compute_cross_validation_scores(train_data, data_indices, threshold)

    f1_scores.append(avg_f1)
    kappa_scores.append(avg_kappa)

# display a data frame that contains the results
threshold_scores = pd.DataFrame(
    {"threshold": thresholds, "f1_score": f1_scores, "kappa_score": kappa_scores}
).sort_values("f1_score", ascending=False)
threshold_scores



100%|██████████| 10/10 [01:02<00:00,  6.26s/it]


Unnamed: 0,threshold,f1_score,kappa_score
6,0.129791,0.923993,0.804776
5,-0.376037,0.898843,0.747846
7,0.460974,0.861658,0.647398
4,-0.578368,0.825355,0.598186
8,0.485696,0.79202,0.427661
3,-0.721561,0.741499,0.45475
2,-0.831801,0.648653,0.328945
9,0.913376,0.62831,0.0
1,-0.923122,0.545651,0.22422
0,-1.020905,0.427203,0.14007


In [4]:
# get predictions for the train dataset
best_threshold = threshold_scores["threshold"].iloc[0]
train_detections = predict(train_data, best_threshold)

### 2. Test lake detector

In [5]:
# get predictions for the test dataset
test_detections = predict(test_data, best_threshold)

# compute the test metrics with the current threshold
avg_f1, avg_kappa = compute_cross_validation_scores(
    test_data, [list(range(len(test_data)))], best_threshold
)
print(f"Test F1: {avg_f1}\nTest kappa: {avg_kappa}")


Test F1: 0.9956829623534279
Test kappa: 0.9911412070637154


### 3. Analyze lake evolution

In [6]:
analyze_lake("abert", train_data, train_detections)


100%|██████████| 3/3 [00:00<00:00,  7.95it/s]


interactive(children=(IntSlider(value=0, description='index', max=2), Output()), _dom_classes=('widget-interac…

interactive(children=(IntSlider(value=0, description='index', max=2), Output()), _dom_classes=('widget-interac…