In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import StringLookup


In [2]:
flpop = pd.read_csv(r'TBSM_TRDAR_FLPOP_QU.csv',  encoding = "euc-kr", sep="|")
stores = pd.read_csv(r'TBSM_TRDAR_STOR_QU.csv',  encoding = "euc-kr", sep="|")
selling = pd.read_csv(r'TBSM_TRDAR_SELNG_QU.csv',  encoding = "euc-kr", sep="|")

In [3]:
train_data_file = "train_data_file.csv"
test_data_file = "test_data_file.csv"

In [4]:
geo_code = pd.read_csv(r'geo_code.csv', sep = ",", encoding='euc-kr')
sales_data = pd.read_csv(r'sales_data.csv', sep="|")

In [5]:
#Drop SVC_INDUTY_CD column for join the table
stores = stores.drop(['SVC_INDUTY_CD'], axis=1)
selling = selling.drop(['SVC_INDUTY_CD'], axis=1)

In [6]:
#Join the table
stores_joined = stores.merge(selling, on=['STDR_YY_CD', 'STDR_QU_CD', 'TRDAR_SE_CD', 'TRDAR_CD'])
stores_joined = stores_joined.merge(flpop, on=['STDR_YY_CD', 'STDR_QU_CD', 'TRDAR_SE_CD', 'TRDAR_CD'])

In [7]:
# One hot encoding on categorical data
stores_dummy = pd.get_dummies(stores_joined, columns=['STDR_YY_CD', 'STDR_QU_CD', 'TRDAR_SE_CD', 'TRDAR_CD'])

In [8]:
stores_joined.head()

Unnamed: 0,STDR_YY_CD,STDR_QU_CD,TRDAR_SE_CD,TRDAR_CD,STOR_CO_x,SIMILR_INDUTY_STOR_CO,OPBIZ_RT,OPBIZ_STOR_CO,CLSBIZ_RT,CLSBIZ_STOR_CO,...,FAG_60_ABOVE_SATTM_4_FLPOP_CO,FAG_60_ABOVE_SATTM_5_FLPOP_CO,FAG_60_ABOVE_SATTM_6_FLPOP_CO,FAG_60_ABOVE_SUNTM_1_FLPOP_CO,FAG_60_ABOVE_SUNTM_2_FLPOP_CO,FAG_60_ABOVE_SUNTM_3_FLPOP_CO,FAG_60_ABOVE_SUNTM_4_FLPOP_CO,FAG_60_ABOVE_SUNTM_5_FLPOP_CO,FAG_60_ABOVE_SUNTM_6_FLPOP_CO,RELM_AR
0,2019,1,A,2110319,4,4,25.0,1,25.0,1,...,3739,5551,3635,6119,5039,3413,3688,5493,3542,95378.7
1,2019,1,A,2110106,2,3,0.0,0,0.0,0,...,1062,1526,1045,2024,1517,927,1026,1607,1112,20550.44
2,2019,1,A,2110174,6,7,0.0,0,28.6,2,...,3920,5800,4823,10597,8153,4093,4130,5623,4561,103407.87
3,2019,1,D,2120170,7,11,0.0,0,9.1,1,...,2207,2650,1121,1608,1671,1722,1879,2289,1102,103761.7
4,2019,1,A,2110770,2,2,0.0,0,50.0,1,...,2115,2685,2533,6090,4642,2234,1944,2575,2483,47040.28


# target data convert to categorical data

In [9]:
df = stores_joined.sample(frac = 1)
df['per_INDUTY'] = df['SIMILR_INDUTY_STOR_CO'] / df["RELM_AR"]
# df['SIMILR_INDUTY_STOR_CO'] = pd.cut(df.SIMILR_INDUTY_STOR_CO, bins=5,
#                                  labels=False, include_lowest=True)
df['SIMILR_INDUTY_STOR_CO'] = pd.qcut(df.SIMILR_INDUTY_STOR_CO, q=5, precision=1, labels=["very low", "low", "medium", "high", "very high"])
# column type to str
df = df.astype({'STDR_YY_CD': 'str', 'STDR_QU_CD': 'str', 'TRDAR_SE_CD': 'str', 'TRDAR_CD': 'str'})

In [10]:
df.per_INDUTY.value_counts()

0.000135    15
0.000100    15
0.000030    15
0.000156    15
0.000026    15
            ..
0.000159     1
0.000062     1
0.000238     1
0.000156     1
0.000329     1
Name: per_INDUTY, Length: 6095, dtype: int64

In [11]:
stores_joined.SIMILR_INDUTY_STOR_CO.value_counts()

3      1654
4      1611
2      1608
5      1442
6      1229
       ... 
174       1
155       1
193       1
177       1
270       1
Name: SIMILR_INDUTY_STOR_CO, Length: 212, dtype: int64

In [12]:
df.SIMILR_INDUTY_STOR_CO

12029          low
7616           low
4323      very low
12680       medium
11998          low
           ...    
12814       medium
18351    very high
5964      very low
7565        medium
12083         high
Name: SIMILR_INDUTY_STOR_CO, Length: 18448, dtype: category
Categories (5, object): ['very low' < 'low' < 'medium' < 'high' < 'very high']

## test train split

In [13]:
split_idx = math.floor(len(df)*0.8)
train_data = df.iloc[:split_idx,:]
test_data = df.iloc[split_idx:,:]
train_data.to_csv("train_data_file.csv", index = False, header = False)
test_data.to_csv("test_data_file.csv", index = False, header = False)

In [14]:
df['SIMILR_INDUTY_STOR_CO'].value_counts()

very low     4493
medium       4142
very high    3557
high         3203
low          3053
Name: SIMILR_INDUTY_STOR_CO, dtype: int64

# Classification with Neural Decision Forest

- https://keras.io/examples/structured_data/deep_neural_decision_forests/

In [15]:
df.per_INDUTY.value_counts()

0.000135    15
0.000100    15
0.000030    15
0.000156    15
0.000026    15
            ..
0.000159     1
0.000062     1
0.000238     1
0.000156     1
0.000329     1
Name: per_INDUTY, Length: 6095, dtype: int64

In [16]:
# A list of the numerical feature names.
names = [list(df.columns)[4]]
names.extend(list(df.columns)[6:])
NUMERIC_FEATURE_NAMES =  names

# A dictionary of the categorical features and their vocabulary.
CATEGORICAL_FEATURES_WITH_VOCABULARY = {
    "STDR_YY_CD": sorted(list(train_data["STDR_YY_CD"].unique())),
    "STDR_QU_CD": sorted(list(train_data["STDR_QU_CD"].unique())),
    "TRDAR_SE_CD": sorted(list(train_data["TRDAR_SE_CD"].unique())),
    "TRDAR_CD": sorted(list(train_data["TRDAR_CD"].unique())),
}
# A list of the columns to ignore from the dataset.
IGNORE_COLUMN_NAMES = ["per_INDUTY"]
# A list of the categorical feature names.
CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())
# A list of all the input features.
FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES
# A list of column default values for each feature.
COLUMN_DEFAULTS = [
    [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else ["NA"]
    for feature_name in list(train_data.columns)
]
# The name of the target feature.
TARGET_FEATURE_NAME = "SIMILR_INDUTY_STOR_CO"
# A list of the labels of the target features.
TARGET_LABELS = ["very low", "low", "medium", "high", "very high"]

In [17]:
len(list(train_data.columns))

611

In [18]:
len(pd.read_csv("train_data_file.csv", header = None).columns)

611

In [19]:

target_label_lookup = StringLookup(
    vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0
)


def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):
    dataset = tf.data.experimental.make_csv_dataset(
        csv_file_path,
        batch_size=batch_size,
        column_names=list(df.columns),
        column_defaults=COLUMN_DEFAULTS,
        label_name=TARGET_FEATURE_NAME,
        num_epochs=1,
        header=False,
        na_value="?",
        shuffle=shuffle,
    ).map(lambda features, target: (features, target_label_lookup(target)))
    return dataset.cache()

  return bool(asarray(a1 == a2).all())


In [20]:
def create_model_inputs():
    inputs = {}
    for feature_name in FEATURE_NAMES:
        if feature_name in NUMERIC_FEATURE_NAMES:
            inputs[feature_name] = layers.Input(
                name=feature_name, shape=(), dtype=tf.float32
            )
        else:
            inputs[feature_name] = layers.Input(
                name=feature_name, shape=(), dtype=tf.string
            )
    return inputs

In [21]:
def encode_inputs(inputs):
    encoded_features = []
    for feature_name in inputs:
        if feature_name in CATEGORICAL_FEATURE_NAMES:
            vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]
            # Create a lookup to convert a string values to an integer indices.
            # Since we are not using a mask token, nor expecting any out of vocabulary
            # (oov) token, we set mask_token to None and num_oov_indices to 0.
            lookup = StringLookup(
                vocabulary=vocabulary, mask_token=None, num_oov_indices=0
            )
            # Convert the string input values into integer indices.
            value_index = lookup(inputs[feature_name])
            embedding_dims = int(math.sqrt(lookup.vocabulary_size()))
            # Create an embedding layer with the specified dimensions.
            embedding = layers.Embedding(
                input_dim=lookup.vocabulary_size(), output_dim=embedding_dims
            )
            # Convert the index values to embedding representations.
            encoded_feature = embedding(value_index)
        else:
            # Use the numerical features as-is.
            encoded_feature = inputs[feature_name]
            if inputs[feature_name].shape[-1] is None:
                encoded_feature = tf.expand_dims(encoded_feature, -1)

        encoded_features.append(encoded_feature)

    encoded_features = layers.concatenate(encoded_features)
    return encoded_features

In [22]:
class NeuralDecisionTree(keras.Model):
    def __init__(self, depth, num_features, used_features_rate, num_classes):
        super().__init__()
        self.depth = depth
        self.num_leaves = 2 ** depth
        self.num_classes = num_classes

        # Create a mask for the randomly selected features.
        num_used_features = int(num_features * used_features_rate)
        one_hot = np.eye(num_features)
        sampled_feature_indicies = np.random.choice(
            np.arange(num_features), num_used_features, replace=False
        )
        self.used_features_mask = one_hot[sampled_feature_indicies]

        # Initialize the weights of the classes in leaves.
        self.pi = tf.Variable(
            initial_value=tf.random_normal_initializer()(
                shape=[self.num_leaves, self.num_classes]
            ),
            dtype="float32",
            trainable=True,
        )

        # Initialize the stochastic routing layer.
        self.decision_fn = layers.Dense(
            units=self.num_leaves, activation="sigmoid", name="decision"
        )

    def call(self, features):
        batch_size = tf.shape(features)[0]

        # Apply the feature mask to the input features.
        features = tf.matmul(
            features, self.used_features_mask, transpose_b=True
        )  # [batch_size, num_used_features]
        # Compute the routing probabilities.
        decisions = tf.expand_dims(
            self.decision_fn(features), axis=2
        )  # [batch_size, num_leaves, 1]
        # Concatenate the routing probabilities with their complements.
        decisions = layers.concatenate(
            [decisions, 1 - decisions], axis=2
        )  # [batch_size, num_leaves, 2]

        mu = tf.ones([batch_size, 1, 1])

        begin_idx = 1
        end_idx = 2
        # Traverse the tree in breadth-first order.
        for level in range(self.depth):
            mu = tf.reshape(mu, [batch_size, -1, 1])  # [batch_size, 2 ** level, 1]
            mu = tf.tile(mu, (1, 1, 2))  # [batch_size, 2 ** level, 2]
            level_decisions = decisions[
                :, begin_idx:end_idx, :
            ]  # [batch_size, 2 ** level, 2]
            mu = mu * level_decisions  # [batch_size, 2**level, 2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (level + 1)

        mu = tf.reshape(mu, [batch_size, self.num_leaves])  # [batch_size, num_leaves]
        probabilities = keras.activations.softmax(self.pi)  # [num_leaves, num_classes]
        outputs = tf.matmul(mu, probabilities)  # [batch_size, num_classes]
        return outputs
class NeuralDecisionForest(keras.Model):
    def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):
        super().__init__()
        self.ensemble = []
        # Initialize the ensemble by adding NeuralDecisionTree instances.
        # Each tree will have its own randomly selected input features to use.
        for _ in range(num_trees):
            self.ensemble.append(
                NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)
            )

    def call(self, inputs):
        # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.
        batch_size = tf.shape(inputs)[0]
        outputs = tf.zeros([batch_size, num_classes])

        # Aggregate the outputs of trees in the ensemble.
        for tree in self.ensemble:
            outputs += tree(inputs)
        # Divide the outputs by the ensemble size to get the average.
        outputs /= len(self.ensemble)
        return outputs


In [23]:
learning_rate = 0.01
batch_size = 265
num_epochs = 10
hidden_units = [64, 64]


def run_experiment(model):

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )

    print("Start training the model...")
    train_dataset = get_dataset_from_csv(
        train_data_file, shuffle=False, batch_size=batch_size
    )


    model.fit(train_dataset, epochs=num_epochs)
    print("Model training finished")

    print("Evaluating the model on the test data...")
    test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)

    _, accuracy = model.evaluate(test_dataset)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

In [24]:
num_trees = 10
depth = 10
used_features_rate = 1.0
num_classes = len(TARGET_LABELS)


def create_tree_model():
    inputs = create_model_inputs()
    features = encode_inputs(inputs)
    features = layers.BatchNormalization()(features)
    num_features = features.shape[1]

    tree = NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)

    outputs = tree(features)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model


tree_model = create_tree_model()
run_experiment(tree_model)

Start training the model...
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


2023-02-08 00:21:37.407743: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model training finished
Evaluating the model on the test data...
      4/Unknown - 2s 72ms/step - loss: 0.3415 - sparse_categorical_accuracy: 0.8906

InvalidArgumentError: Graph execution error:

Detected at node 'model/string_lookup_4/Assert/Assert' defined at (most recent call last):
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 728, in start
      self.io_loop.start()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/asyncio/base_events.py", line 1906, in _run_once
      handle._run()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 423, in do_execute
      res = shell.run_cell(
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2945, in run_cell
      result = self._run_cell(
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3000, in _run_cell
      return runner(coro)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3203, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3382, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3442, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/dl/lcgqt2hd0vd6rfg593dgmnkh0000gn/T/ipykernel_39094/2483583521.py", line 21, in <module>
      run_experiment(tree_model)
    File "/var/folders/dl/lcgqt2hd0vd6rfg593dgmnkh0000gn/T/ipykernel_39094/3423854438.py", line 27, in run_experiment
      _, accuracy = model.evaluate(test_dataset)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 2040, in evaluate
      tmp_logs = self.test_function(iterator)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1820, in test_function
      return step_function(self, iterator)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1804, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1792, in run_step
      outputs = model.test_step(data)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1756, in test_step
      y_pred = self(x, training=False)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 561, in __call__
      return super().__call__(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/functional.py", line 511, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/functional.py", line 668, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/layers/preprocessing/index_lookup.py", line 747, in call
      lookups = self._lookup_dense(inputs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/layers/preprocessing/index_lookup.py", line 803, in _lookup_dense
      assertion = tf.Assert(tf.equal(tf.size(oov_indices), 0), [msg])
Node: 'model/string_lookup_4/Assert/Assert'
assertion failed: [When `num_oov_indices=0` all inputs should be in vocabulary, found OOV values [\"2110265\"], consider setting `num_oov_indices=1`.]
	 [[{{node model/string_lookup_4/Assert/Assert}}]] [Op:__inference_test_function_23387]

In [25]:
num_trees = 25
depth = 5
used_features_rate = 0.5


def create_forest_model():
    inputs = create_model_inputs()
    features = encode_inputs(inputs)
    features = layers.BatchNormalization()(features)
    num_features = features.shape[1]

    forest_model = NeuralDecisionForest(
        num_trees, depth, num_features, used_features_rate, num_classes
    )

    outputs = forest_model(features)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model


forest_model = create_forest_model()

run_experiment(forest_model)

  return bool(asarray(a1 == a2).all())


Start training the model...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model training finished
Evaluating the model on the test data...
      4/Unknown - 3s 66ms/step - loss: 0.2689 - sparse_categorical_accuracy: 0.9038

InvalidArgumentError: Graph execution error:

Detected at node 'model_1/string_lookup_8/Assert/Assert' defined at (most recent call last):
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 728, in start
      self.io_loop.start()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/asyncio/base_events.py", line 1906, in _run_once
      handle._run()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 423, in do_execute
      res = shell.run_cell(
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2945, in run_cell
      result = self._run_cell(
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3000, in _run_cell
      return runner(coro)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3203, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3382, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3442, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/dl/lcgqt2hd0vd6rfg593dgmnkh0000gn/T/ipykernel_39094/1727455595.py", line 23, in <module>
      run_experiment(forest_model)
    File "/var/folders/dl/lcgqt2hd0vd6rfg593dgmnkh0000gn/T/ipykernel_39094/3423854438.py", line 27, in run_experiment
      _, accuracy = model.evaluate(test_dataset)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 2040, in evaluate
      tmp_logs = self.test_function(iterator)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1820, in test_function
      return step_function(self, iterator)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1804, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1792, in run_step
      outputs = model.test_step(data)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 1756, in test_step
      y_pred = self(x, training=False)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/training.py", line 561, in __call__
      return super().__call__(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/functional.py", line 511, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/functional.py", line 668, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/layers/preprocessing/index_lookup.py", line 747, in call
      lookups = self._lookup_dense(inputs)
    File "/Users/min/.conda/envs/python-camp-contest/lib/python3.10/site-packages/keras/layers/preprocessing/index_lookup.py", line 803, in _lookup_dense
      assertion = tf.Assert(tf.equal(tf.size(oov_indices), 0), [msg])
Node: 'model_1/string_lookup_8/Assert/Assert'
assertion failed: [When `num_oov_indices=0` all inputs should be in vocabulary, found OOV values [\"2110265\"], consider setting `num_oov_indices=1`.]
	 [[{{node model_1/string_lookup_8/Assert/Assert}}]] [Op:__inference_test_function_69427]