<a href="https://colab.research.google.com/github/NVREND/Coursera/blob/main/Colab_Capstone_NusaGo_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q tensorflow-recommenders

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m92.2/96.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import tensorflow_recommenders as tfrs
from typing import Dict, Text

In [None]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import drive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id_1 = '1kFmj22am8p8_PkXpZjiZeLrIc_2V4hmX'
download_1 = drive.CreateFile({'id': file_id_1})
download_1.GetContentFile('tourism_with_id.csv')

file_id_2 = '1hzr76LlqTeCziP34GAA57uQheva6Md7v'
download_2 = drive.CreateFile({'id': file_id_2})
download_2.GetContentFile('user.csv')

file_id_3 = '1kGo_JryF9qfcV_zeSmA6MNq5vQPDqtLC'
download_3 = drive.CreateFile({'id': file_id_3})
download_3.GetContentFile('tourism_rating.csv')

# Load data
rating = pd.read_csv("tourism_with_id.csv")
place = pd.read_csv("tourism_rating.csv")
user = pd.read_csv("user.csv")




In [None]:
# Drop unnecessary columns
place = place.drop(["Unnamed: 11", "Unnamed: 12", "Time_Minutes"], axis=1)

In [None]:
# Merge dataframes
merged_df = pd.merge(rating, place, how="outer", on="Place_Id")
merged_df = pd.merge(merged_df, user, how="outer", on="User_Id")

# Convert "User_Id" and "Place_Id" to strings
merged_df["User_Id"] = merged_df["User_Id"].astype(str)
merged_df["Place_Id"] = merged_df["Place_Id"].astype(str)
merged_df["Place_Name"] = merged_df["Place_Name"].astype(str)

# Split the data
training_size = 0.8
rating_merge_place = merged_df.sample(frac=1).reset_index(drop=True)
train_df, test_df = train_test_split(rating_merge_place, test_size=1 - training_size)

In [None]:
# Define the model
embedding_dimension = 32
user_model = tf.keras.Sequential(
    [
        tf.keras.layers.StringLookup(
            vocabulary=merged_df["User_Id"].unique(),
            mask_token=None,
            name="user_id_lookup",
        ),
        tf.keras.layers.Embedding(
            input_dim=len(merged_df["User_Id"].unique()) + 1,
            output_dim=embedding_dimension,
            name="user_embedding",
        ),
    ]
)

place_model = tf.keras.Sequential(
    [
        tf.keras.layers.StringLookup(
            vocabulary=merged_df["Place_Name"].unique(),
            mask_token=None,
            name="Place_Name_lookup",
        ),
        tf.keras.layers.Embedding(
            input_dim=len(merged_df["Place_Name"].unique()) + 1,
            output_dim=embedding_dimension,
            name="place_embedding",
        ),
    ]
)

# Define the task
task = tfrs.tasks.Ranking(
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()],
)


# Build the model
class RecommenderModel(tfrs.Model):
    def __init__(self, user_model, place_model, task):
        super().__init__()
        self.place_model: tf.keras.Model = place_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        user_embeddings = self.user_model(features["User_Id"])
        positive_place_embeddings = self.place_model(features["Place_Name"])
        return self.task(user_embeddings, positive_place_embeddings)


# Create the model instance
model = RecommenderModel(user_model, place_model, task)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Create input pipelines
train_batch_size = 64
train_data = tf.data.Dataset.from_tensor_slices(dict(train_df))
train_data = train_data.batch(train_batch_size)

test_data = tf.data.Dataset.from_tensor_slices(dict(test_df))
test_data = test_data.batch(train_batch_size)

# Train the model
model.fit(train_data, epochs=10)

# Evaluate the model
model.evaluate(test_data)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.03800969198346138, 0.001397767337039113, 0, 0.001397767337039113]

In [None]:
model.save('model.h5')

In [None]:
# Make recommendations
user_ids = merged_df["User_Id"].unique()
for user_id in user_ids[:5]:  # Make recommendations for the first 5 users
    user_id_str = str(user_id)  # Convert user ID to string
    user_id = np.array([[user_id_str]])

    # Ensure the dtype is consistent with the StringLookup layer
    user_id = tf.convert_to_tensor(user_id, dtype=tf.string)

    user_embeddings = model.user_model(user_id)

    # Convert embeddings to string dtype to match the StringLookup layer in the recommendation model
    user_embeddings_str = tf.strings.as_string(user_embeddings)

    recommended_place_ids = model.place_model.predict(user_embeddings_str)
    print(f"Recommendations for User {user_id_str}: {recommended_place_ids.flatten()}")

Recommendations for User 1: [-0.00042299  0.04967078  0.03912396 ... -0.03964134 -0.02665569
  0.04725445]
Recommendations for User 22: [-0.00042299  0.04967078  0.03912396 ... -0.03964134 -0.02665569
  0.04725445]
Recommendations for User 40: [-0.00042299  0.04967078  0.03912396 ... -0.03964134 -0.02665569
  0.04725445]
Recommendations for User 49: [-0.00042299  0.04967078  0.03912396 ... -0.03964134 -0.02665569
  0.04725445]
Recommendations for User 74: [-0.00042299  0.04967078  0.03912396 ... -0.03964134 -0.02665569
  0.04725445]
