# import libraries

In [28]:
import os
import pprint 
import tempfile

from typing import Dict, Text 

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs
import pandas as pd

# Prepare Dataset

Firstly, we have to prepare the dataset and convert it the tensor style.

In [29]:
class PrepareDataset():

    def __init__(self, df):
        self.df_ = df[:100000]
        self.users_df = pd.DataFrame

    def feature_selection(self):
        # The first 100,000 records were selected because the original dataset was too large to train
        self.df_ = self.df_[self.df_['Quantity']>=1].dropna().reset_index(drop = True)

        # Select  essential features 
        self.users_df = self.df_[['Customer ID', 'Description']]
        help_ = self.users_df[['Description']].drop_duplicates()
        help_['product_id'] = [i+1 for i in range(help_.shape[0])]  
        self.users_df = self.users_df.merge(help_, on='Description'
                            ).drop('Description', axis=1
                            ).rename(columns = {'Description' : 'product_id', 'Customer ID': 'user_id'})
        return self.users_df
        
    def create_tensor_dataset(self):
        # Convert the dataframe to tensor format
        self.users_df.user_id = self.users_df.user_id.apply(lambda x: str(int(x)))
        self.users_df.product_id = self.users_df.product_id.apply(lambda x: str(x))

        self.users_df = self.users_df.sample(frac=1).reset_index(drop=True)
        self.products_df = self.users_df[['product_id']]   

        self.users_dataset = tf.data.Dataset.from_tensor_slices(dict(self.users_df))
        self.products_dataset = tf.data.Dataset.from_tensor_slices(dict(self.products_df))
        return self.users_dataset, self.products_dataset

# Implement recommender system

After converting the dataset, it's time to implement the recommender system on the generated dataset.

In [50]:
class data_manipulation:
    
    def __init__(self, users, products):
        self.users = users
        self.products = products
        
    # keep useful elements
    def keep_useful_elements(self):
        self.users = self.users.map(lambda x: {
                         'product_id' : x['product_id'],
                         'user_id' : x['user_id'],
                    })
        self.products = self.products.map(lambda x: x['product_id'])
        return self.users, self.products 
    
    # Train test split
    def train_test_generator(self, train_range=80_000, all_range=100_000):
        tf.random.set_seed(42)
        shuffled = self.users.shuffle(all_range, seed=42, reshuffle_each_iteration=False)
        train = shuffled.take(train_range)
        test = shuffled.skip(train_range).take(all_range - train_range)
        return train, test
    
    # Create a list of unique products and users
    def pass_unique(self):
        product_ids = self.products.batch(1_000)
        user_ids = self.users.batch(1_000_000).map(lambda x: x["user_id"])

        unique_product_ids = np.unique(np.concatenate(list(product_ids)))
        unique_user_ids = np.unique(np.concatenate(list(user_ids)))
        return unique_product_ids, unique_user_ids

In [31]:
class modelAndLoss:
    
    def __init__(self, unique_product_ids, unique_user_ids, products):
        self.unique_product_ids = unique_product_ids
        self.unique_user_ids = unique_user_ids
        self.products = products

    # Here, we're going to use Keras preprocessing layers to first convert user ids to integers, and then convert those
    # to user embeddings via an Embedding layer.
    def implement_model(self, embedding_dimension = 32):
        user_model = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
              vocabulary=self.unique_user_ids, mask_token=None),
          # Add an additional embedding to account for unknown tokens.
          tf.keras.layers.Embedding(len(self.unique_user_ids) + 1, embedding_dimension)
        ])
        
        # the candidate tower
        self.product_model = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
          vocabulary=self.unique_product_ids, mask_token=None),
        tf.keras.layers.Embedding(len(self.unique_product_ids) + 1, embedding_dimension)
        ])
        return user_model, self.product_model
    
    def metrics_loss(self, batch_size = 128):
        metrics = tfrs.metrics.FactorizedTopK(
          candidates= self.products.batch(batch_size).map(self.product_model)
        )
        task = tfrs.tasks.Retrieval(
            metrics=metrics)
        return task

In [32]:
class userProductModel(tfrs.Model):

    def __init__(self, user_model, product_model):
        super().__init__()
        self.product_model: tf.keras.Model = product_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    # Now it's time to implement the full model
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        # We pick out the user features and pass them into the user model.
        self.user_embeddings = self.user_model(features["user_id"])
        # And pick out the product features and pass them into the product model,
        # getting embeddings back.
        self.positive_product_embeddings = self.product_model(features["product_id"])

        # The task computes loss and the metrics.
        return self.task(self.user_embeddings, self.positive_product_embeddings)

In [52]:
# fitting and evaluating
class fitAndEvaluateModel:

    # As the final stage, we create, compile, fit, and evaluate our model
    
    def __init__(self, user_model, product_model, train, test):
        self.user_model = user_model
        self.product_model = product_model
        self.model = None
        self.train = train
        self.test = test
        
    def create_model(self):
        self.model = userProductModel(self.user_model, self.product_model) 
    
    def compile_model(self):
        self.model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
    
    def fit_model(self):
        cached_train = train.shuffle(200_000).batch(8192).cache()
        self.cached_test = test.batch(4096).cache()
        self.model.fit(cached_train, epochs=10)
        return self.model
    
    def evaluate_model(self):
        self.model.evaluate(self.cached_test, return_dict=True)
        return self.model

# Recommend items to users

After fitting and evaluating the model it's time to make suggestions to users.
In this example we considered user numnber "15865" as our sample, however, any user that exists in the dataset could be used.

In [53]:
def make_predictions(model, products, product_model):
    # Create a model that takes in raw query features, and
    index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
    # recommends products out of the entire products dataset.
    index.index_from_dataset(
      tf.data.Dataset.zip((products.batch(100), products.batch(100).map(model.product_model)))
)

    _, ids = index(tf.constant(['15865']))
    return ids, index

# Main

In [36]:
path = './Datasets/online_retail_II.xlsx'
df = pd.read_excel(path)

dataset = PrepareDataset(df)
users_df = dataset.feature_selection()
users, products = dataset.create_tensor_dataset()

data = data_manipulation(users, products)
users, products = data.keep_useful_elements()
train, test = data.train_test_generator()
unique_product_ids, unique_user_ids = data.pass_unique()

pre_model = modelAndLoss(unique_product_ids, unique_user_ids, products)
user_model, product_model = pre_model.implement_model()
task = pre_model.metrics_loss()
product_model

model = fitAndEvaluateModel(user_model, product_model, train, test)
model.create_model()
model.compile_model()
model_ = model.fit_model()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Recommend products to users

In [45]:
titles, index = make_predictions(model_, products, product_model)
print(f"Recommendations for user 15865: {titles[0, :3]}")

Recommendations for user 15865: [b'2741' b'2740' b'1196']
