In [1]:
import pandas as pd
import numpy as np
from src import configuration as config
from src.pipeline.evaluation.evaluation_utils import custom_train_test_split
from src.models.listwise_neural_network import sample_listwise, RankingModel
import tensorflow as tf
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs

In [2]:
# load the data
df = config.load_traindata_for_pointwise()
df = df.drop(columns=['cv_score'])
X_train, X_test, y_train, y_test = custom_train_test_split(df, factors=["dataset", "model", "tuning", "scoring"], target="rank")
df = pd.concat([X_train, y_train], axis=1)
df_test = pd.concat([X_test, y_test], axis=1)

In [3]:
# prepare the data
# train data
df['dataset'] = df['dataset'].astype(str)
df['features'] = df['dataset'].astype(str) + ' ' + df['model'] + ' ' + df['tuning'] + ' ' + df['scoring']
df = df.drop(columns=['dataset', 'model', 'tuning', 'scoring'])
print(df.dtypes)


# test data
df_test['dataset'] = df_test['dataset'].astype(str)
df_test['features'] = df_test['dataset'].astype(str) + ' ' + df_test['model'] + ' ' + df_test['tuning'] + ' ' + df_test['scoring']
df_test = df_test.drop(columns=['dataset', 'model', 'tuning', 'scoring'])

encoder      object
rank        float64
features     object
dtype: object


In [4]:
df_test.shape

(9065, 3)

In [5]:
df.head()

Unnamed: 0,encoder,rank,features
0,BE,21.0,1114 KNC no F1
1,BUCV10RGLMME,19.0,1114 KNC no F1
2,BUCV10TE,26.0,1114 KNC no F1
3,BUCV2RGLMME,12.0,1114 KNC no F1
4,BUCV2TE,28.0,1114 KNC no F1


In [6]:
df['encoder'].value_counts()

encoder
OE              869
DTEM10          867
DTEM5           867
CV5TE           866
TE              865
BUCV2TE         865
CBE             865
CV10TE          865
CV2TE           865
DTEM2           864
CE              863
DE              863
WOEE            861
BE              857
BUCV5TE         857
PBTE001         852
PBTE01          849
BUCV10TE        847
ME10E           844
ME01E           842
ME1E            840
CV2RGLMME       839
RGLMME          837
BUCV2RGLMME     836
CV5RGLMME       833
CV10RGLMME      827
BUCV5RGLMME     822
BUCV10RGLMME    819
PBTE0001        800
OHE             795
MHE             779
SE              769
Name: count, dtype: int64

In [7]:
df_tf = tf.data.Dataset.from_tensor_slices(dict(df))
df_tf_test = tf.data.Dataset.from_tensor_slices(dict(df_test))
print(type(df_tf))

<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>


In [8]:
df_listwise = sample_listwise(df_tf)
df_listwise_test = sample_listwise(df_tf_test)

In [9]:
df_listwise

<_TensorSliceDataset element_spec={'features': TensorSpec(shape=(), dtype=tf.string, name=None), 'encoder': TensorSpec(shape=(32,), dtype=tf.string, name=None), 'rank': TensorSpec(shape=(32,), dtype=tf.float64, name=None)}>

In [10]:
df_listwise_test

<_TensorSliceDataset element_spec={'features': TensorSpec(shape=(), dtype=tf.string, name=None), 'encoder': TensorSpec(shape=(32,), dtype=tf.string, name=None), 'rank': TensorSpec(shape=(32,), dtype=tf.float64, name=None)}>

In [11]:
import pprint
for example in df_listwise.take(1):
  pprint.pprint(example)

{'encoder': <tf.Tensor: shape=(32,), dtype=string, numpy=
array([b'OE', b'CV5TE', b'OHE', b'BUCV5TE', b'BUCV5RGLMME', b'TE',
       b'BUCV10TE', b'CV2RGLMME', b'PBTE01', b'BE', b'CV10TE', b'MHE',
       b'CV5RGLMME', b'ME10E', b'DTEM2', b'RGLMME', b'BUCV2RGLMME', b'CE',
       b'WOEE', b'DE', b'DTEM5', b'CV2TE', b'BUCV2TE', b'ME1E', b'CBE',
       b'SE', b'PBTE0001', b'DTEM10', b'BUCV10RGLMME', b'ME01E',
       b'CV10RGLMME', b'PBTE001'], dtype=object)>,
 'features': <tf.Tensor: shape=(), dtype=string, numpy=b'56 LGBMC no F1'>,
 'rank': <tf.Tensor: shape=(32,), dtype=float64, numpy=
array([2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0.,
       0., 2., 3., 0., 0., 0., 2., 1., 2., 0., 0., 0., 2., 0., 0.])>}


In [12]:
cached_train = df_listwise.shuffle(100_000).batch(8192).cache()
cached_test = df_listwise_test.batch(4096).cache()

In [13]:
cached_train

<CacheDataset element_spec={'features': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'encoder': TensorSpec(shape=(None, 32), dtype=tf.string, name=None), 'rank': TensorSpec(shape=(None, 32), dtype=tf.float64, name=None)}>

In [14]:
# convert to a array containing all unique combinations of model, tuning, scoring as byte strings
# unique_factor_combinations = np.unique(df_listwise[['model', 'tuning', 'scoring']])
# unique_factor_combinations = unique_factor_combinations.astype('S')
# print(unique_factor_combinations)

# unique_model_combinations = np.unique(df_listwise['model'])
# unique_model_combinations = unique_factor_combinations.astype('S')

# unique_tuning_combinations = np.unique(df_listwise['tuning'])
# unique_tuning_combinations = unique_factor_combinations.astype('S')

unique_factor_combinations = np.unique(df[['features']])
unique_factor_combinations = unique_factor_combinations.astype('S')

unique_encoder_rankings = np.unique(df[['encoder']])
unique_encoder_rankings = unique_encoder_rankings.astype('S')

In [15]:
listwise_model = RankingModel(tfr.keras.losses.ListMLELoss(), unique_factor_combinations, unique_encoder_rankings)
listwise_model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

In [16]:
listwise_model.fit(cached_train, epochs=10, verbose=True)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1a7b51434d0>

In [20]:
# save the listwise model
listwise_model.save('listwise_model')

INFO:tensorflow:Assets written to: listwise_model\assets


INFO:tensorflow:Assets written to: listwise_model\assets


In [21]:
reloaded = tf.keras.models.load_model('listwise_model')

In [22]:
listwise_model_result = reloaded.evaluate(cached_test, return_dict=True)
print("NDCG of the MSE Model: {:.4f}".format(listwise_model_result["ndcg_metric"]))

ValueError: in user code:

    File "c:\Users\Marco\Workspace\phase-2\venv\Lib\site-packages\keras\src\engine\training.py", line 1972, in test_function  *
        return step_function(self, iterator)
    File "c:\Users\Marco\Workspace\phase-2\venv\Lib\site-packages\keras\src\engine\training.py", line 1956, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Marco\Workspace\phase-2\venv\Lib\site-packages\keras\src\engine\training.py", line 1944, in run_step  **
        outputs = model.test_step(data)
    File "c:\Users\Marco\Workspace\phase-2\venv\Lib\site-packages\keras\src\engine\training.py", line 1850, in test_step
        y_pred = self(x, training=False)
    File "c:\Users\Marco\Workspace\phase-2\venv\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ValueError: Could not find matching concrete function to call loaded from the SavedModel. Got:
      Positional arguments (1 total):
        * {'encoder': <tf.Tensor 'features_1:0' shape=(None, 32) dtype=string>,
     'features': <tf.Tensor 'features:0' shape=(None,) dtype=string>,
     'rank': <tf.Tensor 'features_2:0' shape=(None, 32) dtype=float32>}
      Keyword arguments: {'training': False}
    
     Expected these arguments to match one of the following 2 option(s):
    
    Option 1:
      Positional arguments (1 total):
        * {'encoder': TensorSpec(shape=(None, 32), dtype=tf.string, name='encoder'),
     'features': TensorSpec(shape=(None,), dtype=tf.string, name='features_features')}
      Keyword arguments: {'training': True}
    
    Option 2:
      Positional arguments (1 total):
        * {'encoder': TensorSpec(shape=(None, 32), dtype=tf.string, name='encoder'),
     'features': TensorSpec(shape=(None,), dtype=tf.string, name='features_features')}
      Keyword arguments: {'training': False}


In [None]:
prediction = listwise_model.predict(cached_test)
prediction.shape



(7552, 32, 1)

In [None]:
prediction

array([[[ 0.69773453],
        [ 0.5882609 ],
        [ 0.56097853],
        ...,
        [ 0.82760644],
        [ 0.4474638 ],
        [ 1.0411758 ]],

       [[ 0.7542254 ],
        [ 1.0411758 ],
        [ 0.619869  ],
        ...,
        [ 0.841596  ],
        [ 0.663063  ],
        [ 0.69312024]],

       [[ 0.045226  ],
        [ 0.7616128 ],
        [ 0.06197588],
        ...,
        [ 0.9450989 ],
        [ 0.663063  ],
        [ 0.39793494]],

       ...,

       [[ 0.62416464],
        [ 0.619869  ],
        [ 0.4474638 ],
        ...,
        [-0.01573833],
        [ 2.6315248 ],
        [ 0.82760644]],

       [[ 1.0411758 ],
        [ 1.0975684 ],
        [ 0.15994318],
        ...,
        [ 0.4474638 ],
        [ 0.7127183 ],
        [ 0.62416464]],

       [[ 0.663063  ],
        [-0.01573833],
        [ 0.63231915],
        ...,
        [ 0.15994318],
        [ 0.69312024],
        [ 0.75579476]]], dtype=float32)