In [7]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import ltn
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from examples import commons
from collections import defaultdict

In [8]:
# Dataset
poker = fetch_ucirepo(id=158)
X, y = poker.data.features, poker.data.targets
# Split into train, validation and test
X_train, X_temp, y_train, y_temp = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
# Create tf datasets to stick as close to digit classification pipeline
X_np = X_train.to_numpy()
y_np = y_train.to_numpy()
ds_train = tf.data.Dataset.from_tensor_slices((X_np, y_np))
X_np = X_val.to_numpy()
y_np = y_val.to_numpy()
ds_val = tf.data.Dataset.from_tensor_slices((X_np, y_np))
X_np = X_test.to_numpy()
y_np = y_test.to_numpy()
ds_test = tf.data.Dataset.from_tensor_slices((X_np, y_np))

#making batches of 32
BATCH_SIZE = 32
ds_train = ds_train.batch(BATCH_SIZE)
ds_val   = ds_val.batch(BATCH_SIZE)
ds_test  = ds_test.batch(BATCH_SIZE)

In [9]:
NUM_CLASSES = 10           # 0 = “Nothing”, …, 9 = “Royal flush”
EMBED_DIM   = 8            # small embedding for each categorical feature
BATCH_SIZE  = 32
EPOCHS      = 10

# 1 · Pre-process ds_train / ds_val / ds_test

In [10]:
def prep(features, label):
    # features --------------------------------------------------------
    x = tf.cast(features, tf.int32) - 1           # suits 0-3, ranks 0-12

    # label -----------------------------------------------------------
    # squeeze away the spurious dimension so every sample has a scalar id
    label = tf.squeeze(label, axis=-1)            # shape () instead of (1,)
    label = tf.cast(label, tf.int32)              # make sure it's int32
    y     = tf.one_hot(label, NUM_CLASSES)        # shape (10,)

    return x, y

ds_train = ds_train.map(prep).prefetch(tf.data.AUTOTUNE)
ds_val   = ds_val  .map(prep).prefetch(tf.data.AUTOTUNE)
ds_test  = ds_test .map(prep).prefetch(tf.data.AUTOTUNE)

# 2 · Build the model

In [11]:
inputs  = tf.keras.Input(shape=(10,), dtype=tf.int32)

emb_layer = tf.keras.layers.Embedding(input_dim=13,
                                      output_dim=EMBED_DIM)

# Split the 10-long vector into suits and ranks
suits = inputs[:, ::2]
ranks = inputs[:, 1::2]

# Embed separately (different vocab sizes)
suit_embed = tf.keras.layers.Embedding(4,  EMBED_DIM)(suits)
rank_embed = tf.keras.layers.Embedding(13, EMBED_DIM)(ranks)

x = layers.Concatenate(axis=-1)([suit_embed, rank_embed])       # (B,5,2*EMBED_DIM)
x = layers.Flatten()(x)              # flatten

# Two hidden layers
x = tf.keras.layers.Dense(128, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(64,  activation="relu")(x)

outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)
model   = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

# 3 · Train

In [12]:
history = model.fit(ds_train,
                    validation_data=ds_val,
                    epochs=EPOCHS,
                    callbacks=[
                        tf.keras.callbacks.EarlyStopping(
                            patience=5, restore_best_weights=True)
                    ])

# 4 · Evaluate

In [13]:
loss, acc = model.evaluate(ds_test)
print(f"Test accuracy: {acc:.3%}")