# import libraries

In [1]:
import os
import pprint 
import tempfile

from typing import Dict, Text 

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs
import pandas as pd

# data preprocessing

## read data

In [2]:
path = '../Datasets/online_retail_II.xlsx'

In [3]:
users_df = pd.read_excel(path)

## drop unnecessory columns

In [4]:
users_df = users_df.dropna()
users_df = users_df[['Customer ID', 'Description']]
help_ = users_df[['Description']].drop_duplicates()
help_['product_id'] = [i+1 for i in range(help_.shape[0])]
users_df = users_df.merge(help_, on='Description'
                         ).drop('Description', axis=1
                         ).rename(columns = {'Description' : 'product_id', 'Customer ID': 'user_id'})
users_df.user_id = users_df.user_id.apply(lambda x: str(int(x)))
users_df.product_id = users_df.product_id.apply(lambda x: str(x))

## convert float nan to str

In [5]:
users_df = users_df.sample(frac=1).reset_index(drop=True)
products_df = users_df[['product_id']]

In [6]:
users_dataset = tf.data.Dataset.from_tensor_slices(dict(users_df))
products_dataset = tf.data.Dataset.from_tensor_slices(dict(products_df))

In [7]:
for x in users_dataset.take(1000).as_numpy_iterator():
    if x['user_id'] == b'14606':
        pprint.pprint(x)

{'product_id': b'267', 'user_id': b'14606'}
{'product_id': b'1601', 'user_id': b'14606'}
{'product_id': b'324', 'user_id': b'14606'}
{'product_id': b'2824', 'user_id': b'14606'}
{'product_id': b'2506', 'user_id': b'14606'}
{'product_id': b'196', 'user_id': b'14606'}
{'product_id': b'918', 'user_id': b'14606'}
{'product_id': b'88', 'user_id': b'14606'}
{'product_id': b'686', 'user_id': b'14606'}
{'product_id': b'631', 'user_id': b'14606'}
{'product_id': b'363', 'user_id': b'14606'}
{'product_id': b'1547', 'user_id': b'14606'}
{'product_id': b'3208', 'user_id': b'14606'}
{'product_id': b'2781', 'user_id': b'14606'}
{'product_id': b'237', 'user_id': b'14606'}
{'product_id': b'3402', 'user_id': b'14606'}
{'product_id': b'1220', 'user_id': b'14606'}


# implement recommender system

# prepare dataset

In [8]:
class prepare_data:
    
    # Init
    def __init__(self, users, products):
        self.users = users
        self.products = products
    # keep useful elements
    def keep_useful_elements(self):
        self.users = self.users.map(lambda x: {
                         'product_id' : x['product_id'],
                         'user_id' : x['user_id'],
                    })
        self.products = self.products.map(lambda x: x['product_id'])
        return self.users, self.products 
    
    def train_test_generator(self, train_range=80_000, all_range=100_000):
        tf.random.set_seed(42)
        shuffled = self.users.shuffle(all_range, seed=42, reshuffle_each_iteration=False)
        train = shuffled.take(train_range)
        test = shuffled.skip(train_range).take(all_range - train_range)
        return train, test
    
    def pass_unique(self):
        product_ids = self.products.batch(1_000)
        user_ids = self.users.batch(1_000_000).map(lambda x: x["user_id"])

        unique_product_ids = np.unique(np.concatenate(list(product_ids)))
        unique_user_ids = np.unique(np.concatenate(list(user_ids)))
        return unique_product_ids, unique_user_ids

In [9]:
class modelAndLoss:
    
    def __init__(self, unique_product_ids, unique_user_ids, products):
        self.unique_product_ids = unique_product_ids
        self.unique_user_ids = unique_user_ids
        self.products = products
        # self.product_model = None
    def implement_model(self, embedding_dimension = 32):
            
        # Here, we're going to use Keras preprocessing layers to first convert user ids to integers, and then convert those
        # to user embeddings via an Embedding layer.
        user_model = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
              vocabulary=self.unique_user_ids, mask_token=None),
          # We add an additional embedding to account for unknown tokens.
          tf.keras.layers.Embedding(len(self.unique_user_ids) + 1, embedding_dimension)
        ])
        
        # the candidate tower
        self.product_model = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
          vocabulary=self.unique_product_ids, mask_token=None),
        tf.keras.layers.Embedding(len(self.unique_product_ids) + 1, embedding_dimension)
        ])
        return user_model, self.product_model
    
    def metrics_loss(self, batch_size = 128):
        metrics = tfrs.metrics.FactorizedTopK(
          candidates= self.products.batch(batch_size).map(self.product_model)
        )
        task = tfrs.tasks.Retrieval(
            metrics=metrics)
        return task

# full model

In [10]:
class userProductModel(tfrs.Model):

    def __init__(self, user_model, product_model):
        super().__init__()
        self.product_model: tf.keras.Model = product_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        # We pick out the user features and pass them into the user model.
        self.user_embeddings = self.user_model(features["user_id"])
        # And pick out the product features and pass them into the product model,
        # getting embeddings back.
        self.positive_product_embeddings = self.product_model(features["product_id"])

        # The task computes the loss and the metrics.
        return self.task(self.user_embeddings, self.positive_product_embeddings)

## fit and evaluating model

In [11]:
# fitting and evaluating
class fitAndEvaluateModel:
    
    def __init__(self, user_model, product_model, train, test):
        self.user_model = user_model
        self.product_model = product_model
        self.model = None
        self.train = train
        self.test = test
        
    def create_model(self):
        self.model = userProductModel(self.user_model, self.product_model) 
    
    def compile_model(self):
        self.model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
    
    def fit_model(self):
        cached_train = train.shuffle(200_000).batch(8192).cache()
        self.cached_test = test.batch(4096).cache()
        self.model.fit(cached_train, epochs=10)
        return self.model
    
    def evaluate_model(self):
        self.model.evaluate(self.cached_test, return_dict=True)
        return self.model

# recommend item to user

In [12]:
def make_predictions(model, products, product_model):
    # Create a model that takes in raw query features, and
    index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
    # recommends products out of the entire products dataset.
    index.index_from_dataset(
      tf.data.Dataset.zip((products.batch(100), products.batch(100).map(model.product_model)))
    )

    # Get recommendations.
    _, titles = index(tf.constant([b"13085"]))
    return titles, index
    # print(f"Recommendations for user 42: {titles[0, :3]}")    

# call classes and functions to see the results

In [13]:
ratings, info = tfds.load("movielens/100k-ratings", split="train", with_info = True)
# for x in ratings.take(10000).as_numpy_iterator():
#     if x['user_id'] == b'357':
#         pprint.pprint(x)

In [None]:
users = users_dataset
products = products_dataset

data = prepare_data(users, products) # delete
users, products = data.keep_useful_elements()
train, test = data.train_test_generator()
unique_product_ids, unique_user_ids = data.pass_unique()

pre_model = modelAndLoss(unique_product_ids, unique_user_ids, products)
user_model, product_model = pre_model.implement_model()
task = pre_model.metrics_loss()
product_model

model = fitAndEvaluateModel(user_model, product_model, train, test)
model.create_model()
model.compile_model()
model_ = model.fit_model()
# model_ = model.evaluate_model()
# model_

Epoch 1/10
Epoch 2/10


In [15]:
titles, index = make_predictions(model_, products, product_model)
print(f"Recommendations for user 42: {titles[0, :3]}")

Recommendations for user 42: [b'2' b'2' b'2']


# Model saving

In [26]:
# Export the query model.
x = ''
with tempfile.TemporaryDirectory() as tmp:
    path = os.path.join(tmp, "model")

    # Save the index.
    tf.saved_model.save(index, p)

    # Load it back; can also be done in TensorFlow Serving.
    loaded = tf.saved_model.load(p)

    # Pass a user id in, get top predicted movie titles back.
    scores, titles = loaded(["90"])
    x = path
    print(f"Recommendations: {titles[0][:3]}")



INFO:tensorflow:Assets written to: C:\Users\Lenovo\Desktop\intern\assets


INFO:tensorflow:Assets written to: C:\Users\Lenovo\Desktop\intern\assets


Recommendations: [b'243' b'243' b'243']


In [25]:
p = 'C:\\Users\\Lenovo\\Desktop\\intern'
path

'C:\\Users\\Lenovo\\AppData\\Local\\Temp\\tmp4o57w0gs\\model'

In [25]:
# Load it back; can also be done in TensorFlow Serving.

# path = 'C:\\Users\\Lenovo\\AppData\\Local\\Temp\\tmput2dmwc9\\model'
loaded = tf.saved_model.load(path)

# Pass a user id in, get top predicted movie titles back.
scores, titles = loaded(["9"])
x = path
print(f"Recommendations: {titles[0][:3]}")

OSError: SavedModel file does not exist at: C:\Users\Lenovo\AppData\Local\Temp\tmph0hfa9g7\model\{saved_model.pbtxt|saved_model.pb}

In [22]:
path

'C:\\Users\\Lenovo\\AppData\\Local\\Temp\\tmput2dmwc9\\model'

In [29]:
model.evaluate_model()



<__main__.userProductModel at 0x21597962370>