In [2]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q scann

[K     |████████████████████████████████| 85 kB 3.7 MB/s 
[K     |████████████████████████████████| 462 kB 18.7 MB/s 
[K     |████████████████████████████████| 4.2 MB 7.9 MB/s 
[K     |████████████████████████████████| 10.9 MB 10.2 MB/s 
[?25h

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Text

import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds

Load the data, use the necessary columns, and convert the data to tensorflow dataset

In [2]:
user_profile_df = pd.read_csv('dataset\\user_activities.csv')
foods_df = pd.read_csv('dataset\\dataset.csv')
user_profile_df = user_profile_df[['Disease','food_name']]
foods_df = foods_df.drop(['description','Price'],axis=1)
user_profile_ds = tf.data.Dataset.from_tensor_slices(dict(user_profile_df))
foods_ds = tf.data.Dataset.from_tensor_slices(dict(foods_df))

In [3]:
foods_ds = foods_ds.map(lambda x: x["Name"])

Split the training and test into 80:20

In [4]:
tf.random.set_seed(42)
shuffled = user_profile_ds.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.take(20_000)

In [None]:
for tensor in train:
    print(tensor)

Get the unique disease and food names

In [8]:
food_names = foods_ds.batch(20)
disease_names = user_profile_ds.batch(20).map(lambda x: x['Disease'])
#for tensor in list(disease_names):
    #print(tensor)
unique_food_names = np.unique(np.concatenate(list(food_names)))
unique_disease_names = np.unique(np.concatenate(list(disease_names)))

unique_disease_names[:100]

array([b'anemia kidney_disease goitre rickets diabeties pregnancy',
       b'anemia kidney_disease goitre rickets pregnancy', b'cancer',
       b'cancer anemia obesity kidney_disease goitre pregnancy',
       b'cancer anemia obesity kidney_disease pregnancy',
       b'cancer hypertension goitre heart_disease scurvy',
       b'cancer kidney_disease obesity anemia diabeties scurvy pregnancy',
       b'cancer kidney_disease obesity anemia heart_disease diabeties',
       b'cancer kidney_disease obesity hypertension anemia goitre heart_disease diabeties rickets pregnancy',
       b'diabeties goitre', b'diabeties hypertension', b'goitre',
       b'goitre hypertension', b'goitre kidney_disease', b'hypertension',
       b'hypertension heart_disease',
       b'hypertension obesity anemia goitre heart_disease diabeties rickets pregnancy',
       b'kidney_disease goitre hypertension',
       b'kidney_disease hypertension anemia goitre heart_disease diabeties scurvy pregnancy',
       b'kidney_di

In [9]:
embedding_dimension = 32

Create the embedding model for disease and food names

In [10]:
disease_model = tf.keras.Sequential([
    tf.keras.layers.StringLookup(vocabulary= unique_disease_names, mask_token=None),
    tf.keras.layers.Embedding(len(unique_disease_names)+1, embedding_dimension)
    ])
#disease_vocab.adapt(user_profile_ds.map(lambda x: x['Disease']))

food_model = tf.keras.Sequential([
    tf.keras.layers.StringLookup(vocabulary=unique_food_names,mask_token=None),
    tf.keras.layers.Embedding(len(unique_food_names)+1, embedding_dimension)
    ])

In [12]:
metrics = tfrs.metrics.FactorizedTopK(
    candidates=foods_ds.batch(20).map(food_model)
)

In [13]:
task = tfrs.tasks.Retrieval(
    metrics=metrics
)

In [14]:
class DRSModel(tfrs.Model):
    def __init__(self, user_model, query_model, task):
        super().__init__()

        self.user_model: tf.keras.Model = user_model
        self.query_model: tf.keras.Model = query_model

        self.task: tf.keras.layers.Layer = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        disease_embeddings = self.user_model(features["Disease"])
        diet_embeddings = self.query_model(features["food_name"])

        return self.task(disease_embeddings, diet_embeddings)

Compile the model

In [15]:
model = DRSModel(disease_model, food_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [16]:
cached_train = train.shuffle(100).batch(50).cache()
cached_test = test.batch(4).cache()

In [17]:
model.fit(cached_train, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1d8a7a9f250>

In [18]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.23762376606464386,
 'factorized_top_k/top_5_categorical_accuracy': 0.6435643434524536,
 'factorized_top_k/top_10_categorical_accuracy': 0.8811880946159363,
 'factorized_top_k/top_50_categorical_accuracy': 0.9900990128517151,
 'factorized_top_k/top_100_categorical_accuracy': 0.9900990128517151,
 'loss': 0.0,
 'regularization_loss': 0,
 'total_loss': 0.0}

Create a bruteforce layer to make prediction

In [19]:
# Create a model that takes in raw query features, and
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
# recommends movies out of the entire movies dataset.
index.index_from_dataset(
  foods_ds.batch(20).map(lambda  name: (name, model.query_model(name)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x1d8a879e7a0>

In [31]:
_, diets = index(np.array(["kidney_disease"]))
print(f"Top 5 recommendation for user: {diets[0, :10]}")

Top 5 recommendation for user: [b'gluten-free christmas cake' b'andhra pan fried pomfret'
 b'spinach and feta crepes' b'fried rice with soya chunks'
 b'baked namakpara with roasted almond dip'
 b'saewoo bokumbop (shrimp fried rice)' b'summer squash salad'
 b'grilled almond barfi (sugar free)'
 b'cajun spiced turkey wrapped with bacon' b'spicy chicken masala']
