# Pipeline for estimating lakes area

### 0. Import libraries and load data

In [None]:
%reload_ext autoreload
%matplotlib inline
%autoreload 2

from src.model_trainer import *
from src.data_loader import *
from src.lake_analyzer import *

import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
warnings.filterwarnings("ignore")

TRAIN_LAKES = ["george", "walker", "melincue"]
TEST_LAKES = ["mono"]

In [None]:
train_data, data_indices = load_lakes(TRAIN_LAKES)
test_data, _ = load_lakes(TEST_LAKES)

train_data_indicies = [list(range(len(train_data)))]
test_data_indicies = [list(range(len(test_data)))]

### 1. Train and optimize lake detector with cross-validation

In [None]:
# run 3-fold cross-validation with the Random Forest model
f1_train, kappa_train, f1_val, kappa_val, _ = cross_validation_rf(train_data, data_indices)
print(f"F1 train: {f1_train}\nKappa train: {kappa_train}\n\nF1 val: {f1_val}\nKappa val: {kappa_val}")

In [None]:
# re-train model with the entire train set
_, _, _, _, trained_rf_classifier = cross_validation_rf(train_data, train_data_indicies)


In [None]:
# get predictions for the train dataset
train_detections = predict_rf(train_data, trained_rf_classifier)


### 2. Test lake detector

In [None]:
# get predictions for the test dataset
test_detections = predict_rf(test_data, trained_rf_classifier)


In [None]:
# compute the test metrics with the current threshold
f1_test, kappa_test, _, _, _ = cross_validation_rf(test_data, test_data_indicies, trained_rf_classifier)
print(f"Test F1: {f1_test}\nTest kappa: {kappa_test}")


### 3. Analyze model

In [None]:
plot_feature_importance(trained_rf_classifier)

### 4. Analyze lake evolution

In [None]:
analyze_lake("mono", test_data, test_detections)


In [None]:
analyze_lake("george", train_data, train_detections)


In [None]:
analyze_lake("walker", train_data, train_detections)


In [None]:
analyze_lake("melincue", train_data, train_detections)
