In [None]:
!pip install tensorflow-recommenders
!pip install mysql-connector-python

In [None]:
import os
import json
from typing import Dict, Text
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
import mysql.connector
import tempfile

# Load Data

In [None]:
def get_mysql_credentials():
    with open("config2.json") as config_file: # Please upload the config2.json file to root folder
        config = json.load(config_file)
        return config.get("mysql", {})

def connect_to_database():
    credentials = get_mysql_credentials()

    cnx = mysql.connector.connect(
        user=credentials.get("user"),
        password=credentials.get("password"),
        host=credentials.get("host"),
        database=credentials.get("database"),
    )
    return cnx

In [None]:
cnx = connect_to_database()
cursor = cnx.cursor()

query_tbl_userview = "SELECT DISTINCT id_user, id_umkm from tbl_userview"
cursor.execute(query_tbl_userview)

col_userview = ["id_user", "id_umkm"]

user = pd.DataFrame(cursor.fetchall(), columns=col_userview)

umkm = user["id_umkm"].unique()
umkm = pd.DataFrame({"id_umkm": umkm})

# Preparing Data

In [None]:
# Convert relevant object columns to strings in the UMKM dataset
user["id_user"] = user["id_user"].astype(str)
user["id_umkm"] = user["id_umkm"].astype(str)
umkm["id_umkm"] = umkm["id_umkm"].astype(str)

# Convert pandas DataFrames to TensorFlow tensors
def map_function(row):
    return {"id_user": row["id_user"], "id_umkm": row["id_umkm"]}

ratings = tf.data.Dataset.from_tensor_slices(dict(user))
ratings = ratings.map(map_function)

umkm = tf.data.Dataset.from_tensor_slices(umkm["id_umkm"])

ratings = ratings.map(
    lambda x: {
        "id_umkm": x["id_umkm"],
        "id_user": x["id_user"],
    }
)
umkm = umkm.map(lambda x: x)

# Preprocessing Data

In [None]:
# Shuffle the dataset
shuffled = ratings.shuffle(buffer_size=len(ratings), seed=42)

# Take first 4000 elements for train
train = shuffled.take(4000)

# Skip 4000 elements and take next 320 for test
test = shuffled.skip(4000).take(320)

ID_umkm = umkm.batch(100)
ID_user = ratings.batch(1000).map(lambda x: x["id_user"])

unique_ID_umkm = np.unique(np.concatenate(list(ID_umkm)))
unique_ID_user = np.unique(np.concatenate(list(ID_user)))

# Build Model

In [None]:
embedding_dimension = 32

user_model = tf.keras.Sequential(
    [
        tf.keras.layers.StringLookup(vocabulary=unique_ID_user, mask_token=None),
        # We add an additional embedding to account for unknown tokens.
        tf.keras.layers.Embedding(len(unique_ID_user) + 1, embedding_dimension),
    ]
)

umkm_model = tf.keras.Sequential(
    [
        tf.keras.layers.StringLookup(vocabulary=unique_ID_umkm, mask_token=None),
        tf.keras.layers.Embedding(len(unique_ID_umkm) + 1, embedding_dimension),
    ]
)

metrics = tfrs.metrics.FactorizedTopK(candidates=umkm.batch(128).map(umkm_model))
task = tfrs.tasks.Retrieval(metrics=metrics)

In [None]:
class UmkmlensModel(tfrs.Model):
    def __init__(self, user_model, umkm_model):
        super().__init__()
        self.umkm_model: tf.keras.Model = umkm_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # We pick out the user features and pass them into the user model.
        user_embeddings = self.user_model(features["id_user"])
        # And pick out the umkm features and pass them into the umkm model,
        # getting embeddings back.
        positive_umkm_embeddings = self.umkm_model(features["id_umkm"])

        # The task computes the loss and the metrics.
        return self.task(user_embeddings, positive_umkm_embeddings)

In [None]:
class NoBaseClassUmkmlensModel(tf.keras.Model):
    def __init__(self, user_model, umkm_model):
        super().__init__()
        self.umkm_model: tf.keras.Model = umkm_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    def train_step(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
        # Set up a gradient tape to record gradients.
        with tf.GradientTape() as tape:
            # Loss computation.
            user_embeddings = self.user_model(features["id_user"])
            positive_umkm_embeddings = self.umkm_model(features["id_umkm"])
            loss = self.task(user_embeddings, positive_umkm_embeddings)

            # Handle regularization losses as well.
            regularization_loss = sum(self.losses)

            total_loss = loss + regularization_loss

        gradients = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        metrics = {metric.name: metric.result() for metric in self.metrics}
        metrics["loss"] = loss
        metrics["regularization_loss"] = regularization_loss
        metrics["total_loss"] = total_loss

        return metrics

    def test_step(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
      # Loss computation.
      user_embeddings = self.user_model(features["id_user"])
      positive_umkm_embeddings = self.umkm_model(features["id_umkm"])
      loss = self.task(user_embeddings, positive_umkm_embeddings)

      # Handle regularization losses as well.
      regularization_loss = sum(self.losses)

      total_loss = loss + regularization_loss

      metrics = {metric.name: metric.result() for metric in self.metrics}
      metrics["loss"] = loss
      metrics["regularization_loss"] = regularization_loss
      metrics["total_loss"] = total_loss

      return metrics

In [None]:
model = UmkmlensModel(user_model, umkm_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

cached_train = train.shuffle(1000).batch(100).cache()
cached_test = test.batch(100).cache()

In [None]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x789fcff50be0>

# Evaluation

In [None]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.06562499701976776,
 'factorized_top_k/top_5_categorical_accuracy': 0.4937500059604645,
 'factorized_top_k/top_10_categorical_accuracy': 0.831250011920929,
 'factorized_top_k/top_50_categorical_accuracy': 1.0,
 'factorized_top_k/top_100_categorical_accuracy': 1.0,
 'loss': 17.88996124267578,
 'regularization_loss': 0,
 'total_loss': 17.88996124267578}

In [None]:
# Create a model that takes in raw query features, and
index_model = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
# recommends movies out of the entire movies dataset.
index_model.index_from_dataset(
    tf.data.Dataset.zip((umkm.batch(100), umkm.batch(100).map(model.umkm_model)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x789fcc0e3280>

## Prediction Test

In [None]:
# Get recommendations.
_, ID_umkm = index_model(tf.constant(["44"]))
print(f"Recommendations for user 44: {ID_umkm[0, :5]}")

Recommendations for user 44: [b'134' b'54' b'132' b'2' b'137']


# Save Model

In [None]:
# Get the current working directory
current_directory = os.getcwd()

# Specify the directory you want to join
specific_directory = "model"

# Use os.path.join to create the complete path
full_path = os.path.join(current_directory, specific_directory)
print(full_path)

with tempfile.TemporaryDirectory() as tmp:
    path = os.path.join(tmp, full_path)

    # Save the index.
    tf.saved_model.save(index_model, path)

/content/model


