In [3]:
# IMPORT

import numpy as np
import pandas as pd
# import matplotlib as plt

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

from tensorflow.keras import layers

In [4]:
# Data raw interaction user & recipes
interaction_data = pd.read_csv("../../Dataset/RAW_interactions.csv")
recipes_data = pd.read_csv("../../Dataset/RAW_recipes.csv")

# Data Training & Validation
interaction_train = pd.read_csv("../../Dataset/interactions_train.csv")
interaction_test = pd.read_csv("../../Dataset/interactions_test.csv")

In [5]:
interaction_data.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


In [6]:
recipes_data.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [7]:
# Convert Int => String
interaction_data = interaction_data.astype({'user_id': 'string', 'recipe_id':'string'})
interaction_train = interaction_train.astype({'user_id': 'string', 'recipe_id':'string'})
interaction_test = interaction_test.astype({'user_id': 'string', 'recipe_id':'string'})

In [8]:
# Jadi Array & membuang duplikat
unique_userId = interaction_data.user_id.unique()
unique_recipeId = interaction_data.recipe_id.unique()

In [9]:
class RankingModel(tf.keras.Model):

    def __init__(self):

        super().__init__()
        embedding_dimension = 32

        self.user_embeddings = tf.keras.Sequential([
            layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_userId, mask_token=None
            ),
            # additional embedding for to account for unknown tokens
            layers.Embedding(len(unique_userId)+1, embedding_dimension)
        ])

        self.recipe_embeddings = tf.keras.Sequential([
            layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_recipeId, mask_token=None
            ),
            # additional embedding for to account for unknown tokens
            layers.Embedding(len(unique_recipeId)+1, embedding_dimension)
        ])

        self.ratings = tf.keras.Sequential([
            layers.Dense(256, activation="relu"),
            layers.Dense(64, activation="relu"),
            layers.Dense(1)
        ])

    
    def call(self, inputs):

        user_id, recipe_id = inputs

        user_embedding = self.user_embeddings(user_id)
        recipe_embedding = self.recipe_embeddings(recipe_id)

        return self.ratings(tf.concat([user_embedding, recipe_embedding], axis=1))


In [10]:
class RecipesModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.layer = tfrs.tasks.Ranking(
            loss = tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )

    def call(self, features):
        return self.ranking_model(
            (features["user_id"], features["recipe_id"])
        )
    
    def compute_loss(self, features, training=False):
        labels = features.pop("user_rating")
        rating_prediction = self(features)

    # def compute_loss(self, features, training=False):
    #     rating_predictions = self.ranking_model(
    #         (features["user_id"], features["recipe_id"])
    #     )

        # Task computes the loss and the metrics
        # return self.task(labels=features["user_rating"], predictions=rating_predictions)
        return self.task(labels=labels, predictions=rating_prediction)

In [11]:
train_data = tf.data.Dataset.from_tensor_slices(
{
    "user_id":tf.cast(interaction_train.user_id.values, tf.string),
    "recipe_id":tf.cast(interaction_train.recipe_id.values, tf.string),
    "user_rating":tf.cast(interaction_train.rating.values, tf.float32)
})

test_data = tf.data.Dataset.from_tensor_slices(
{
    "user_id":tf.cast(interaction_test.user_id.values, tf.string),
    "recipe_id":tf.cast(interaction_test.recipe_id.values, tf.string),
    "user_rating":tf.cast(interaction_test.rating.values, tf.float32)
})

In [12]:
model = RecipesModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.5))

cached_train = train_data.shuffle(100_000).batch(8192).cache()
cached_test = test_data.batch(5000).cache()

# Train modelnya
epoch = 8
history = model.fit(cached_train, epochs=epoch)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [13]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.3021416664123535,
 'loss': 1.9944878816604614,
 'regularization_loss': 0,
 'total_loss': 1.9944878816604614}

In [14]:
test_user_id = unique_userId[200]
model({
    "user_id" : np.array([test_user_id]),
    "recipe_id" : np.array(["40893"])
})

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[4.5858045]], dtype=float32)>

In [15]:
recipes_data.loc[recipes_data["id"] == 40893]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
225877,white bean green chile pepper soup,40893,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9


In [16]:
# Test modelnya
test_user_id = unique_userId[200]

test_ratings = {}
for recipe in test_data.take(10):

    recipeid = int(recipe["recipe_id"].numpy())
    
    test_ratings[recipe["recipe_id"].numpy()] = model({
        "user_id" : np.array([test_user_id]),
        "recipe_id" : np.array([str(recipeid)])
    })

print(f"Top 10 Recipes for {test_user_id} :")
for m, score in sorted(test_ratings.items(), key=lambda x :x[1], reverse=True):
    title = recipes_data.loc[recipes_data["id"] == int(m)]["name"].item()
    print(f"{title}, {score}")

Top 10 Recipes for 450004 :
breakfast biscuit sandwiches, [[4.9134293]]
sweet and sour pork ribs, [[4.824863]]
skinny style chocolatey hot cocoa, [[4.8178554]]
lemon herb chicken w mushrooms low carb, [[4.8086576]]
ww 3 points   grilled beef fajitas, [[4.7357306]]
strawberry fudge, [[4.702412]]
ranch egg salad croissant sandwiches, [[4.6703973]]
peanut butter spirals, [[4.6430655]]
blueberry pancake syrup   low carb, [[4.6401615]]
azumaya pot stickers, [[4.625444]]


In [18]:
tf.saved_model.save(model, "export")

INFO:tensorflow:Assets written to: export\assets


INFO:tensorflow:Assets written to: export\assets


In [19]:
converter = tf.lite.TFLiteConverter.from_saved_model("export")
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)

67310348