In [1]:
from pathlib import Path
import json

import pandas as pd
import tensorflow as tf
from keras.models import load_model

from data import constants
from data.eluc_data import ELUCData, ELUCEncoder
from prescriptors.esp.unileaf_prescriptor import UnileafPrescriptor, reco_to_context_actions
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor

### Generate Seeds

In [2]:
def create_template_model():
    """
    Creates keras template prescriptor given architecture from paper:
        Input layer for each context variable
        Dense layer for each context variable hidden size 16
        Tanh activation
        Output as reco_land_use vector
    """
    inputs = [tf.keras.Input(shape=(1,), name=f"{col}_input") for col in constants.CAO_MAPPING["context"]]
    dense = [tf.keras.layers.Dense(16, name=constants.CAO_MAPPING["context"][i])(inputs[i]) for i in range(len(inputs))]
    add4 = tf.keras.layers.Add()(dense)
    activation = tf.keras.layers.Activation("tanh", name="first_hidden_activation")(add4)
    output = tf.keras.layers.Dense(len(constants.RECO_COLS), name="reco_land_use")(activation)
    model = tf.keras.Model(inputs=inputs, outputs=output)
    return model

def seed_no_change(data_df: pd.DataFrame, encoded_data_df: pd.DataFrame, seed_dir: Path):
    """
    Creates seed model that attempts to prescribe zero change.
    This is now feasible because we no longer softmax the output but instead linearly scale them.
    """

    no_change_preds = data_df[constants.RECO_COLS].copy()
    y_train = no_change_preds.to_numpy()
    X_train = [encoded_data_df[col].values for col in constants.CAO_MAPPING["context"]]

    no_change_model = create_template_model()
    opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
    no_change_model.compile(optimizer=opt, loss='mean_absolute_error', metrics=['mae'])
    no_change_model.fit(X_train, y_train, epochs=300, batch_size=128, verbose=1)

    seed_dir.mkdir(parents=True, exist_ok=True)
    no_change_model.save(seed_dir / "1_1.h5")

def seed_max_change(data_df: pd.DataFrame, encoded_data_df: pd.DataFrame, seed_dir: Path, best_col="secdf"):
    """
    Creates seed model that attempts to prescribe maximum change.
    Moves all possible land use to best_col which is secdf by default.
    """
    # Move all the land use to secdf
    land_use = data_df[constants.RECO_COLS].sum(axis=1)
    max_change_preds = data_df[constants.RECO_COLS].copy()
    max_change_preds[constants.RECO_COLS] = 0
    max_change_preds[best_col] = land_use

    y_train = max_change_preds.to_numpy()
    X_train = [encoded_data_df[col].values for col in constants.CAO_MAPPING["context"]]
    
    max_change_model = create_template_model()
    opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
    max_change_model.compile(optimizer=opt, loss='mean_absolute_error', metrics=['mae'])
    max_change_model.fit(X_train, y_train, epochs=300, batch_size=128, verbose=1)

    seed_dir.mkdir(parents=True, exist_ok=True)
    max_change_model.save(seed_dir / "1_2.h5")

In [3]:
dataset = ELUCData()

In [4]:
# Take small subset for training, we really don't need more and just need the model to converge
seed_sample = dataset.train_df.sample(1000, random_state=42)
encoded_seed_sample = dataset.encoder.encode_as_df(seed_sample)
seed_dir = Path("prescriptors/esp/seeds/test")

In [5]:
seed_no_change(seed_sample, encoded_seed_sample, seed_dir)
seed_max_change(seed_sample, encoded_seed_sample, seed_dir)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

  saving_api.save_model(


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

  saving_api.save_model(


### Evaluate Seeds

In [6]:
def trained_prescribe_and_predict(encoded_context: pd.DataFrame, 
                                  prescriptor_path: Path, 
                                  encoder: ELUCEncoder, 
                                  dummy_prescriptor: UnileafPrescriptor) -> pd.DataFrame:
    """
    Loads a prescriptor model, prescribes land use for context, predicts ELUC and calculates change.
    """
    prescriptor = load_model(prescriptor_path, compile=False)
    reco_land_use = dummy_prescriptor.prescribe(prescriptor, encoded_context)
    reco_df = pd.DataFrame(reco_land_use["reco_land_use"].tolist(), columns=constants.RECO_COLS)
    context_actions_df = reco_to_context_actions(reco_df, encoded_context, encoder)
    prescribed_actions_df = context_actions_df[constants.DIFF_LAND_USE_COLS]
    eluc = dummy_prescriptor.predict_eluc(context_actions_df)
    change = (prescribed_actions_df[prescribed_actions_df > 0].sum(axis=1) / context_actions_df[constants.LAND_USE_COLS].sum(axis=1))

    context_actions_df["ELUC"] = eluc.values
    context_actions_df["change"] = change.values

    return context_actions_df

In [7]:
nnp = NeuralNetPredictor()
nnp.load("predictors/neural_network/trained_models/no_overlap_nn")

In [8]:
presc_config = None
with open("prescriptors/esp/unileaf_configs/config-loctime-crop-nosoft.json", "r") as f:
    presc_config = json.load(f)
dummy_prescriptor = UnileafPrescriptor(presc_config,
                                   dataset.train_df.iloc[:1],
                                   dataset.encoder,
                                   [nnp])

In [9]:
test_df = dataset.test_df.sample(frac=0.01, random_state=100)
encoded_test_df = dataset.encoder.encode_as_df(test_df)
context_df = test_df[constants.CAO_MAPPING["context"]]
encoded_context_df = encoded_test_df[constants.CAO_MAPPING["context"]]

In [10]:
result_df = trained_prescribe_and_predict(encoded_context_df, seed_dir / "1_1.h5", dataset.encoder, dummy_prescriptor)
print(f"Average ELUC of no change prescriptor on test set: {result_df['ELUC'].mean()}")
print(f"Average change of no change prescriptor on test set: {result_df['change'].mean()}")

Average ELUC of no change prescriptor on test set: 0.061359677324486044
Average change of no change prescriptor on test set: 0.011037839944058702


In [11]:
result_df = trained_prescribe_and_predict(encoded_context_df, seed_dir / "1_2.h5", dataset.encoder, dummy_prescriptor)
print(f"Average ELUC of max change prescriptor on test set: {result_df['ELUC'].mean()}")
print(f"Average change of max change prescriptor on test set: {result_df['change'].mean()}")

Average ELUC of max change prescriptor on test set: -24.228178468619262
Average change of max change prescriptor on test set: 0.4239889862987677
