In [64]:
!pip install -q tensorflow-recommenders
import pandas as pd
import tensorflow as tf
import numpy as np
import tensorflow_recommenders as tfrs
from typing import Dict, Text

In [65]:
product_data = pd.read_csv('product_data.csv', on_bad_lines='warn')
product_data['product_itemid'] = product_data['product_itemid'].astype('string')
product_data.head(5)

Unnamed: 0,product_itemid,product_category,product_name,product_price,status,shop_location
0,7919342724,Short Sleeves,NKD Korean Fashion Erich Basic Daily Polo Tie ...,300.0,1,"Taytay, Rizal"
1,7140015494,Short Sleeves,OnlyYouth Classic knitted blouse korean top sh...,,1,"San Nicolas, Metro Manila"
2,4843903203,Short Sleeves,SS Oversized Ringer Top Plus Size Korean Fashi...,200.0,1,"Taytay, Rizal"
3,11405348262,Short Sleeves,Women's T-shirt Short Sleeve Collar with small...,391.0,1,Mainland China
4,2553628745,Short Sleeves,SS Korean Tops Square Neck Fashion Blouse Semi...,300.0,1,"Taytay, Rizal"


In [66]:
rating_data = pd.read_csv('rating_data.csv', on_bad_lines='warn')
rating_data['product_itemid'] = rating_data['product_itemid'].astype('string')
rating_data['user_id'] = rating_data['user_id'].astype('string')
rating_data.head(5)

Unnamed: 0,rating_star,user_id,product_itemid
0,5,11077,3550379942
1,5,599,3550379942
2,5,18042,3550379942
3,5,18255,3550379942
4,5,19699,3550379942


In [67]:
product_data_ds = tf.data.Dataset.from_tensor_slices(dict(product_data))
rating_data_ds = tf.data.Dataset.from_tensor_slices(dict(rating_data))

In [68]:
rating_data_ds = rating_data_ds.map(lambda x: {
    "product_itemid": x["product_itemid"],
    "user_id": x["user_id"],
})
product_data_ds = product_data_ds.map(lambda x: x["product_itemid"])

print(rating_data_ds)
print(product_data_ds)

<MapDataset element_spec={'product_itemid': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_id': TensorSpec(shape=(), dtype=tf.string, name=None)}>
<MapDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>


In [69]:
tf.random.set_seed(42)
shuffled = rating_data_ds.shuffle(len(rating_data_ds), seed=42, reshuffle_each_iteration=False)

train = shuffled.take(int(len(shuffled)*0.8))
test = shuffled.skip(int(len(shuffled)*0.8)).take(int(len(shuffled)*0.2))

In [70]:
product_itemid = product_data_ds.batch(1_000)
user_id = rating_data_ds.batch(10_000).map(lambda x: x["user_id"])

unique_product = np.unique(np.concatenate(list(product_itemid)))
unique_user = np.unique(np.concatenate(list(user_id)))

In [71]:
embedding_dimension = 32

user_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(vocabulary=unique_user, mask_token=None),
  tf.keras.layers.Embedding(len(unique_user) + 1, embedding_dimension)
])
product_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(vocabulary=unique_product, mask_token=None),
  tf.keras.layers.Embedding(len(unique_product) + 1, embedding_dimension)
])

In [72]:
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
      candidates=product_data_ds.batch(128).map(product_model)
    )
)

In [73]:
class ProductModel(tfrs.Model):

  def __init__(self, user_model, product_model):
    super().__init__()
    self.product_model: tf.keras.Model = product_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings = self.user_model(features["user_id"])
    product_embeddings = self.product_model(features["product_itemid"])

    return self.task(user_embeddings, product_embeddings)

In [74]:
model = ProductModel(user_model, product_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [75]:
cached_train = train.shuffle(len(train)).batch(int(len(train)*0.1)).cache()
cached_test = test.batch(int(len(train)*0.02)).cache()

In [76]:
model.fit(cached_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x281fe492a60>

In [77]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.19882984459400177,
 'factorized_top_k/top_5_categorical_accuracy': 0.34412482380867004,
 'factorized_top_k/top_10_categorical_accuracy': 0.3767918050289154,
 'factorized_top_k/top_50_categorical_accuracy': 0.46894198656082153,
 'factorized_top_k/top_100_categorical_accuracy': 0.5191614031791687,
 'loss': 1721.0037841796875,
 'regularization_loss': 0,
 'total_loss': 1721.0037841796875}

In [79]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model, k=10)
index.index_from_dataset(
  tf.data.Dataset.zip((product_data_ds.batch(100), product_data_ds.batch(100).map(model.product_model)))
)
n="10795"
_, product = index(tf.constant([n]))
product = np.unique(product)
list_product = []
for x in product:
    x = str(int(x))
    product_name = product_data.loc[product_data['product_itemid'] == x].iloc[0]
    product_name = product_name['product_name']
    list_product.append(product_name)
for x in list_product:
    print(x)

xiaozhainv Korean Casual knitted cardigan long sleeve top
【COD & Ready Stock】Korean Women's Puff Sleeve Crop Top Sexy Square Neck Short Chiffon Long Sleeve Blouse
Knit crop top lettuce edge STRIPES Tops Korean top REA
NKD Korean Fashion Erich Basic Daily Polo Tie Top 1104
Yihua 2021 summer Korean version of the new square collar lace wood ears floral chiffon short shirt top women
