In [1]:
%run common.ipynb

['LC_Type1', 'LST_Day_1km', 'pr', 'aet', 'NDVI', 'EVI']
['LST_Day_1km', 'pr', 'aet', 'NDVI', 'EVI']
['LC_Type1']


In [2]:
%run tuning_common.ipynb

Using TensorFlow backend


In [3]:
import optuna
import gc

In [4]:
UPSTREAM_OPTUNA_DIR = f'{UPSTREAM_FINETUNE_OPTUNA_DIR}'
SAMPLER_FILE = 'sampler.pkl'
DATABASE_FILE = 'sqlite:///optuna_progress.db'
PARAM_FILE = 'best_param.json'
MODEL_SAVE_DIR = f'{UPSTREAM_OPTUNA_DIR}model'

In [5]:
upstream_train_df = pd.read_csv(f'{DATA_FOLDER}{UPSTREAM_TRAIN_FILENAME}')
upstream_val_df = pd.read_csv(f'{DATA_FOLDER}{UPSTREAM_VAL_FILENAME}')
print(len(upstream_train_df))
print(upstream_train_df.head())
print(len(upstream_val_df))
print(upstream_val_df.head())

141623
   LC_Type1  LST_Day_1km        pr       aet      NDVI       EVI      Gpp
0      12.0    -0.064955  0.092342  0.308087  0.224185  0.727549   9841.0
1      12.0    -0.192976  0.049550  0.340180  0.243270  0.558092  10251.0
2      14.0    -0.149169  0.150901  0.378263  0.483407  0.886860  11607.0
3       9.0    -0.216264  0.454955  0.118528  0.455776  0.549467  12098.0
4       5.0    -0.251888  0.112613  0.357724  0.441817  0.393201  13593.0
23624
   LC_Type1  LST_Day_1km        pr       aet      NDVI       EVI      Gpp
0      12.0    -0.159366  0.513513  0.552418  0.288563  0.770675  11278.0
1      12.0    -0.136329  0.612613  0.493795  0.221051  0.611872   9801.0
2      12.0    -0.034743  0.536036  0.597775  0.144709  0.299340   8328.0
3      14.0    -0.075529  0.774775  0.693196  0.297394  0.671233   9543.0
4       5.0    -0.434290  1.313063  0.653402  0.452072  0.118214  11667.0


In [6]:
upstream_train_df = embed_catgorical_features(upstream_train_df, CATEGORICAL_FEATURES)
upstream_val_df = embed_catgorical_features(upstream_val_df, CATEGORICAL_FEATURES)

In [7]:
upstream_train_tf = df_to_dataset(upstream_train_df[FEATURES + [TARGET_FEATURE]], TARGET_FEATURE, shuffle=True, batch_size=512)
upstream_val_tf = df_to_dataset(upstream_val_df[FEATURES + [TARGET_FEATURE]], TARGET_FEATURE, shuffle=False, batch_size=512)

  dataset[key] = value[:, tf.newaxis]
  dataset[key] = value[:, tf.newaxis]


In [9]:
import json
import os.path


class CustomCallback(tf.keras.callbacks.Callback):
    static_cur_best_result = np.Inf
    def __init__(
        self,
        filepath,
        log_dump_filepath,
        monitor: str = "val_loss",
        mode: str = "auto",
        **kwargs,
    ):
        self.checkpoint = tf.keras.callbacks.ModelCheckpoint(
                                monitor=monitor,
                                mode=mode,
                                save_best_only=True,
                                filepath=filepath)
        self.log_dump_filepath = log_dump_filepath
        if os.path.exists(self.log_dump_filepath):
            with open(self.log_dump_filepath) as f: 
                data = f.read() 
                di = json.loads(data) 
                CustomCallback.static_cur_best_result = di['best_result']
                print(f'read best result {CustomCallback.static_cur_best_result} from {self.log_dump_filepath}')

    def set_params(self, params):
        self.checkpoint.set_params(params)

    def set_model(self, model):
        self.checkpoint.set_model(model)
    
    def on_train_begin(self, logs=None):
        self.checkpoint.on_train_begin(logs)

    def on_train_end(self, logs=None):
        keys = list(logs.keys())

    def on_epoch_begin(self, epoch, logs=None):
        self.checkpoint.on_epoch_begin(epoch, logs)

    def on_epoch_end(self, epoch, logs=None):
        current = logs.get(self.checkpoint.monitor) 
        if self.checkpoint.monitor_op(current, CustomCallback.static_cur_best_result):
            print('+++++ Get a better result, save the model +++++')
            self.checkpoint.on_epoch_end(epoch, logs)
            CustomCallback.static_cur_best_result = current
            # Data to be written
            dictionary = {
                "best_result": current,
            }

            # Serializing json
            # json_object = json.dumps(dictionary, indent=4)
            with open(self.log_dump_filepath, "w") as outfile:
                json.dump(dictionary, outfile)
        else:
            print('+++++ Not as good as the best so far, no saving +++++')
            self.checkpoint.epochs_since_last_save += 1

            

    def on_train_batch_end(self, batch, logs=None):
        self.checkpoint.on_train_batch_end(batch, logs)


In [10]:
def get_model(trial):
    d_embedding = trial.suggest_int('d_embedding', 32, 512, step=8)
    n_layers = trial.suggest_int('n_layers', 1, 8)
    ffn_factor = trial.suggest_uniform('ffn_factor', 2/3, 8/3)
    attention_dropout = trial.suggest_uniform('attention_dropout', 0.0, 0.5)
    ffn_dropout = trial.suggest_uniform('ffn_dropout', 0.0, 0.5)
    residual_dropout = trial.suggest_uniform('residual_dropout', 0.0, 0.2)

    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)

    mlp_head_unit_1 = trial.suggest_int('mlp_head_unit_1', 16, 128, step=8)
    mlp_head_unit_2 = trial.suggest_int('mlp_head_unit_1', 16, 128, step=8)

    model = compile_model(
        upstream_train_df,
        d_embedding=d_embedding, 
        n_layers=n_layers, 
        ffn_factor=ffn_factor,
        attention_dropout=attention_dropout,
        ffn_dropout=ffn_dropout,
        residual_dropout=residual_dropout,
        weight_decay=weight_decay,
        lr=lr,
        mlp_head_unit_1=mlp_head_unit_1,
        mlp_head_unit_2=mlp_head_unit_2
    )
    
    return model

In [11]:
def objective(trial):
    model = get_model(trial)
    
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor=finetune_objective[0].name, min_delta=10, mode=finetune_objective[0].direction, patience=20, restore_best_weights=True),
        CustomCallback(
            monitor=finetune_objective[0].name,
            mode=finetune_objective[0].direction,
            filepath=MODEL_SAVE_DIR,
            log_dump_filepath=f'{UPSTREAM_FINETUNE_DIR}log_best_result.txt',
        )
    ]
    

    history = model.fit(
        upstream_train_tf, 
        epochs=100, 
        validation_data=upstream_val_tf,
        callbacks=callbacks
    ) 
    
    # test_df = df_to_dataset(upstream_val_df[FEATURES], shuffle=False, batch_size=1)
    # preds = model.predict(test_df)
    # rmse = mean_squared_error(upstream_val_df[TARGET_FEATURE], preds['output'].ravel(), squared=False)
   
    preds = model.predict(test_df)
    rmse = mean_squared_error(downstream_test_df[TARGET_FEATURE], preds['output'].ravel(), squared=False)
    gc.collect()
    
    return rmse

## Fine tune with RMSE

### Fine tune

In [12]:
import pickle
import logging
import sys

In [13]:
patience = 20

In [None]:
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

pruner=optuna.pruners.PatientPruner(None, patience=patience)

if os.path.exists(f'{UPSTREAM_OPTUNA_DIR}{SAMPLER_FILE}'):
    restored_sampler = pickle.load(open(f'{UPSTREAM_OPTUNA_DIR}{SAMPLER_FILE}', "rb"))
    study = optuna.create_study(study_name="optuna_optimizer", direction='minimize', storage=DATABASE_FILE, load_if_exists=True, sampler=restored_sampler, pruner=pruner)
else:
    study = optuna.create_study(study_name="optuna_optimizer", direction='minimize', storage=DATABASE_FILE, load_if_exists=True, pruner=pruner)

study.optimize(objective, n_trials=50)

with open(f'{UPSTREAM_OPTUNA_DIR}{SAMPLER_FILE}', "wb") as fout:
    pickle.dump(study.sampler, fout)
    
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

  pruner=optuna.pruners.PatientPruner(None, patience=patience)
[I 2023-11-27 12:27:45,706] Using an existing study with name 'optuna_optimizer' instead of creating a new one.


Using an existing study with name 'optuna_optimizer' instead of creating a new one.


  ffn_factor = trial.suggest_uniform('ffn_factor', 2/3, 8/3)
  attention_dropout = trial.suggest_uniform('attention_dropout', 0.0, 0.5)
  ffn_dropout = trial.suggest_uniform('ffn_dropout', 0.0, 0.5)
  residual_dropout = trial.suggest_uniform('residual_dropout', 0.0, 0.2)
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)


read best result 1355.75244140625 from ./model/upstream-finetune_optuna/log_best_result.txt
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
 54/277 [====>.........................] - ETA: 3:34 - loss: 98833904.0000 - importances_loss: 96602984.0000 - output_loss: 2230921.0000 - importances_root_mean_squared_error: 9828.6816 - output_root_mean_squared_error: 1493.6268Epoch 50/100
Epoch 51/100


[I 2023-11-27 16:39:19,269] Trial 26 finished with value: 4948.494741698947 and parameters: {'d_embedding': 232, 'n_layers': 3, 'ffn_factor': 1.3090393122365798, 'attention_dropout': 0.2500039169935181, 'ffn_dropout': 0.2097711934063144, 'residual_dropout': 0.08857229040629458, 'lr': 3.393290629438069e-05, 'weight_decay': 1.1123425942780297e-05, 'mlp_head_unit_1': 104}. Best is trial 12 with value: 1367.1053634221596.


Trial 26 finished with value: 4948.494741698947 and parameters: {'d_embedding': 232, 'n_layers': 3, 'ffn_factor': 1.3090393122365798, 'attention_dropout': 0.2500039169935181, 'ffn_dropout': 0.2097711934063144, 'residual_dropout': 0.08857229040629458, 'lr': 3.393290629438069e-05, 'weight_decay': 1.1123425942780297e-05, 'mlp_head_unit_1': 104}. Best is trial 12 with value: 1367.1053634221596.


  ffn_factor = trial.suggest_uniform('ffn_factor', 2/3, 8/3)
  attention_dropout = trial.suggest_uniform('attention_dropout', 0.0, 0.5)
  ffn_dropout = trial.suggest_uniform('ffn_dropout', 0.0, 0.5)
  residual_dropout = trial.suggest_uniform('residual_dropout', 0.0, 0.2)
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)


read best result 1355.75244140625 from ./model/upstream-finetune_optuna/log_best_result.txt
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100

In [None]:
import json

best_value = study.best_value
print(f'best value = {best_value}')
best_trial = study.best_trial

for key, value in study.best_params.items():
    print("{}: {}".format(key, value))

with open(os.path.join(f'{UPSTREAM_OPTUNA_DIR}{PARAM_FILE}'), "w") as fp:
    json.dump(study.best_params, fp, indent = 4)

### Load best results and predict

In [9]:
model_reload = tf.keras.models.load_model(MODEL_SAVE_DIR)
get_rmse_on_upstream_val(model_reload, upstream_val_df)

In [11]:
downstream_test_df = pd.read_csv(f'{DATA_FOLDER}{DOWNSTREAM_TEST_FILENAME}')
downstream_test_df = embed_catgorical_features(downstream_test_df, CATEGORICAL_FEATURES)
test_df = df_to_dataset(downstream_test_df[FEATURES], shuffle=False, batch_size=1)

In [12]:
predict_results = model_reload.predict(test_df)
y_pred = predict_results['output'].ravel()
modis_compare_data = get_modis_compare_data(y_pred)
get_all_metrics(y_pred, modis_compare_data[TARGET_FEATURE], downstream_test_df[TARGET_FEATURE])
plot_result_distribution(downstream_test_df[TARGET_FEATURE], y_pred, modis_compare_data[TARGET_FEATURE])

 18/160 [==>...........................] - ETA: 0s

  dataset[key] = value[:, tf.newaxis]


Linear Encoding RMSE: 4858.5769
MOD17 RMSE: 4594.0967
