In [None]:
import warnings
import os
warnings.filterwarnings('ignore')
os.environ['PYTHONWARNINGS'] = 'ignore'

import pandas as pd
import numpy as np
from src import configuration as config
from src.pipeline.evaluation.evaluation_utils import custom_train_test_split
from src.models.listwise_neural_network import sample_listwise, RankingModel
import tensorflow as tf
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs

In [24]:
# load the data
df = config.load_traindata_for_pointwise()
df = df.drop(columns=['cv_score'])
X_train, X_test, y_train, y_test = custom_train_test_split(df, factors=["dataset", "model", "tuning", "scoring"], target="rank")
df = pd.concat([X_train, y_train], axis=1)
df_test = pd.concat([X_test, y_test], axis=1)

In [25]:
# prepare the data
# train data
df['dataset'] = df['dataset'].astype(str)
df['features'] = df['dataset'].astype(str) + ' ' + df['model'] + ' ' + df['tuning'] + ' ' + df['scoring']
df = df.drop(columns=['dataset', 'model', 'tuning', 'scoring'])
print(df.dtypes)


# test data
df_test['dataset'] = df_test['dataset'].astype(str)
df_test['features'] = df_test['dataset'].astype(str) + ' ' + df_test['model'] + ' ' + df_test['tuning'] + ' ' + df_test['scoring']
df_test = df_test.drop(columns=['dataset', 'model', 'tuning', 'scoring'])

encoder      object
rank        float64
features     object
dtype: object


In [26]:
df_test.shape

(9065, 3)

In [27]:
df.head()

Unnamed: 0,encoder,rank,features
0,BE,21.0,1114 KNC no F1
1,BUCV10RGLMME,19.0,1114 KNC no F1
2,BUCV10TE,26.0,1114 KNC no F1
3,BUCV2RGLMME,12.0,1114 KNC no F1
4,BUCV2TE,28.0,1114 KNC no F1


In [28]:
df['encoder'].value_counts()

encoder
OE              869
DTEM10          867
DTEM5           867
CV5TE           866
TE              865
BUCV2TE         865
CBE             865
CV10TE          865
CV2TE           865
DTEM2           864
CE              863
DE              863
WOEE            861
BE              857
BUCV5TE         857
PBTE001         852
PBTE01          849
BUCV10TE        847
ME10E           844
ME01E           842
ME1E            840
CV2RGLMME       839
RGLMME          837
BUCV2RGLMME     836
CV5RGLMME       833
CV10RGLMME      827
BUCV5RGLMME     822
BUCV10RGLMME    819
PBTE0001        800
OHE             795
MHE             779
SE              769
Name: count, dtype: int64

In [29]:
df_tf = tf.data.Dataset.from_tensor_slices(dict(df))
df_tf_test = tf.data.Dataset.from_tensor_slices(dict(df_test))
print(type(df_tf))

<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>


In [30]:
df_listwise = sample_listwise(df_tf)
df_listwise_test = sample_listwise(df_tf_test)

In [31]:
df_listwise

<_TensorSliceDataset element_spec={'features': TensorSpec(shape=(), dtype=tf.string, name=None), 'encoder': TensorSpec(shape=(32,), dtype=tf.string, name=None), 'rank': TensorSpec(shape=(32,), dtype=tf.float64, name=None)}>

In [32]:
df_listwise_test

<_TensorSliceDataset element_spec={'features': TensorSpec(shape=(), dtype=tf.string, name=None), 'encoder': TensorSpec(shape=(32,), dtype=tf.string, name=None), 'rank': TensorSpec(shape=(32,), dtype=tf.float64, name=None)}>

In [33]:
import pprint
for example in df_listwise.take(1):
  pprint.pprint(example)

{'encoder': <tf.Tensor: shape=(32,), dtype=string, numpy=
array([b'BUCV2RGLMME', b'CV5TE', b'CV5RGLMME', b'OHE', b'OE', b'DTEM5',
       b'DE', b'SE', b'ME10E', b'DTEM2', b'BUCV10RGLMME', b'CV10TE',
       b'TE', b'PBTE0001', b'CV2RGLMME', b'BUCV5TE', b'CE', b'ME1E',
       b'DTEM10', b'CV2TE', b'MHE', b'BUCV10TE', b'WOEE', b'BE',
       b'BUCV5RGLMME', b'ME01E', b'CBE', b'PBTE01', b'BUCV2TE', b'RGLMME',
       b'CV10RGLMME', b'PBTE001'], dtype=object)>,
 'features': <tf.Tensor: shape=(), dtype=string, numpy=b'56 LGBMC no F1'>,
 'rank': <tf.Tensor: shape=(32,), dtype=float64, numpy=
array([0., 0., 0., 0., 2., 0., 3., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0.,
       2., 0., 0., 0., 0., 2., 0., 0., 2., 1., 0., 0., 0., 0., 0.])>}


In [34]:
cached_train = df_listwise.shuffle(100_000).batch(8192).cache()
cached_test = df_listwise_test.batch(4096).cache()

In [35]:
cached_train

<CacheDataset element_spec={'features': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'encoder': TensorSpec(shape=(None, 32), dtype=tf.string, name=None), 'rank': TensorSpec(shape=(None, 32), dtype=tf.float64, name=None)}>

In [36]:
# convert to a array containing all unique combinations of model, tuning, scoring as byte strings
# unique_factor_combinations = np.unique(df_listwise[['model', 'tuning', 'scoring']])
# unique_factor_combinations = unique_factor_combinations.astype('S')
# print(unique_factor_combinations)

# unique_model_combinations = np.unique(df_listwise['model'])
# unique_model_combinations = unique_factor_combinations.astype('S')

# unique_tuning_combinations = np.unique(df_listwise['tuning'])
# unique_tuning_combinations = unique_factor_combinations.astype('S')

unique_factor_combinations = np.unique(df[['features']])
unique_factor_combinations = unique_factor_combinations.astype('S')

unique_encoder_rankings = np.unique(df[['encoder']])
unique_encoder_rankings = unique_encoder_rankings.astype('S')

In [37]:
listwise_model = RankingModel(tfr.keras.losses.ListMLELoss(), unique_factor_combinations, unique_encoder_rankings)
listwise_model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

In [38]:
listwise_model.fit(cached_train, epochs=200, verbose=True)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x1a7c2140390>

In [42]:
listwise_model_result = listwise_model.evaluate(cached_test, return_dict=True)
print("NDCG of the MSE Model: {:.4f}".format(listwise_model_result["ndcg_metric"]))

NDCG of the MSE Model: 0.6821


## Average Spearman

The following snippets were run on the server infrastructure.

In [None]:
import warnings
import os
warnings.filterwarnings('ignore')
os.environ['PYTHONWARNINGS'] = 'ignore'

from src.configuration import load_traindata_for_pointwise
from src.pipeline.neural_network_pipeline import pipeline

train_df = load_traindata_for_pointwise()

In [None]:
pipeline(train_df, None, 500)

Epoch 500/500

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.9470 - root_mean_squared_error: 16.0610 - loss: 40.7926 - regularization_loss: 0.0000e+00 - total_loss: 40.7926
1/1 [==============================] - 0s 27ms/step - ndcg_metric: 0.9470 - root_mean_squared_error: 16.0610 - loss: 40.7926 - regularization_loss: 0.0000e+00 - total_loss: 40.7926
Evaluating the model

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.6524 - root_mean_squared_error: 14.4686 - loss: 255.5971 - regularization_loss: 0.0000e+00 - total_loss: 255.5971
1/1 [==============================] - 1s 676ms/step - ndcg_metric: 0.6524 - root_mean_squared_error: 14.4686 - loss: 255.5971 - regularization_loss: 0.0000e+00 - total_loss: 255.5971
NDCG of the MSE Model: 0.6524

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 205ms/step
Average Spearman of the MSE Model: 0.1504

In [None]:
pipeline(train_df, None, 500)

Epoch 500/500

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.9484 - root_mean_squared_error: 15.7722 - loss: 39.5223 - regularization_loss: 0.0000e+00 - total_loss: 39.5223
1/1 [==============================] - 0s 42ms/step - ndcg_metric: 0.9484 - root_mean_squared_error: 15.7722 - loss: 39.5223 - regularization_loss: 0.0000e+00 - total_loss: 39.5223
Evaluating the model

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.6985 - root_mean_squared_error: 14.1739 - loss: 198.9219 - regularization_loss: 0.0000e+00 - total_loss: 198.9219
1/1 [==============================] - 1s 544ms/step - ndcg_metric: 0.6985 - root_mean_squared_error: 14.1739 - loss: 198.9219 - regularization_loss: 0.0000e+00 - total_loss: 198.9219
NDCG of the MSE Model: 0.6985

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 181ms/step
Average Spearman of the MSE Model: 0.1739

In [None]:
pipeline(train_df, None, 500)

Epoch 500/500

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.9548 - root_mean_squared_error: 11.6011 - loss: 38.1700 - regularization_loss: 0.0000e+00 - total_loss: 38.1700
1/1 [==============================] - 0s 54ms/step - ndcg_metric: 0.9548 - root_mean_squared_error: 11.6011 - loss: 38.1700 - regularization_loss: 0.0000e+00 - total_loss: 38.1700
Evaluating the model

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.6875 - root_mean_squared_error: 13.1623 - loss: 208.3167 - regularization_loss: 0.0000e+00 - total_loss: 208.3167
1/1 [==============================] - 1s 652ms/step - ndcg_metric: 0.6875 - root_mean_squared_error: 13.1623 - loss: 208.3167 - regularization_loss: 0.0000e+00 - total_loss: 208.3167
NDCG of the MSE Model: 0.6875

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 186ms/step
Average Spearman of the MSE Model: 0.1246

In [None]:
pipeline(train_df, None, 500)

Epoch 500/500

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.9561 - root_mean_squared_error: 12.4717 - loss: 39.4902 - regularization_loss: 0.0000e+00 - total_loss: 39.4902
1/1 [==============================] - 0s 42ms/step - ndcg_metric: 0.9561 - root_mean_squared_error: 12.4717 - loss: 39.4902 - regularization_loss: 0.0000e+00 - total_loss: 39.4902
Evaluating the model

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.6890 - root_mean_squared_error: 12.6760 - loss: 268.1295 - regularization_loss: 0.0000e+00 - total_loss: 268.1295
1/1 [==============================] - 1s 662ms/step - ndcg_metric: 0.6890 - root_mean_squared_error: 12.6760 - loss: 268.1295 - regularization_loss: 0.0000e+00 - total_loss: 268.1295
NDCG of the MSE Model: 0.6890

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 191ms/step
Average Spearman of the MSE Model: 0.1511

In [None]:
pipeline(train_df, None, 500)

Epoch 500/500

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.9498 - root_mean_squared_error: 11.2547 - loss: 37.4401 - regularization_loss: 0.0000e+00 - total_loss: 37.4401
1/1 [==============================] - 0s 44ms/step - ndcg_metric: 0.9498 - root_mean_squared_error: 11.2547 - loss: 37.4401 - regularization_loss: 0.0000e+00 - total_loss: 37.4401
Evaluating the model

1/1 [==============================] - ETA: 0s - ndcg_metric: 0.6790 - root_mean_squared_error: 11.3083 - loss: 197.4894 - regularization_loss: 0.0000e+00 - total_loss: 197.4894
1/1 [==============================] - 1s 633ms/step - ndcg_metric: 0.6790 - root_mean_squared_error: 11.3083 - loss: 197.4894 - regularization_loss: 0.0000e+00 - total_loss: 197.4894
NDCG of the MSE Model: 0.6790

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 196ms/step
Average Spearman of the MSE Model: 0.1319

In [None]:
ncdg_scores = [0.6524, 0.6985, 0.6875, 0.6890, 0.6790]
average_spearman_scores = [0.1504, 0.1739, 0.1246, 0.1511, 0.1319]