In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
from sklearn.preprocessing import LabelEncoder

In [4]:
# Sample data
user_data = pd.DataFrame({
    "user_id": np.arange(1000),
    "user_name": [f"user_{i}" for i in range(1000)],
    "full_name": [f"Fullname_{i}" for i in range(1000)],
    "password": "123456",
    "age": np.random.randint(18, 65, size=1000),
    "gender": np.random.choice(["Male", "Female"], size=1000),
})
print(user_data)
location_data = pd.DataFrame({
    "location_id": [i for i in range(1000)],
    "location_name": [f"Location_{i}" for i in range(1000)],
    "category": np.random.choice(["Restaurant", "Cafe", "Bar", "Park"], size=1000),
    "description" :"It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English.",
})
print(location_data)
ratings = pd.DataFrame({
    "user_id": np.random.choice(user_data["user_id"], size=5000),
    "location_id": np.random.choice(location_data["location_id"], size=5000),
    "rating": np.random.randint(1, 6, size=5000)
})
print(ratings)

     user_id user_name     full_name password  age  gender
0          0    user_0    Fullname_0   123456   39    Male
1          1    user_1    Fullname_1   123456   41    Male
2          2    user_2    Fullname_2   123456   43    Male
3          3    user_3    Fullname_3   123456   45  Female
4          4    user_4    Fullname_4   123456   22  Female
..       ...       ...           ...      ...  ...     ...
995      995  user_995  Fullname_995   123456   43    Male
996      996  user_996  Fullname_996   123456   39  Female
997      997  user_997  Fullname_997   123456   26  Female
998      998  user_998  Fullname_998   123456   31    Male
999      999  user_999  Fullname_999   123456   18    Male

[1000 rows x 6 columns]
     location_id location_name    category  \
0              0    Location_0  Restaurant   
1              1    Location_1  Restaurant   
2              2    Location_2         Bar   
3              3    Location_3  Restaurant   
4              4    Location_4  Resta

In [5]:
location_encoder = LabelEncoder()
location_data["location_index"] = location_encoder.fit_transform(location_data["location_id"])
ratings["location_index"] = location_encoder.transform(ratings["location_id"])

In [6]:
# Create train and test datasets
train = tf.data.Dataset.from_tensor_slices((
    {"user_id": ratings["user_id"], "location_id": ratings["location_index"]}, 
    ratings["rating"]
)).shuffle(len(ratings))

In [7]:
test = tf.data.Dataset.from_tensor_slices((
    {"user_id": ratings["user_id"], "location_id": ratings["location_index"]}, 
    ratings["rating"]
)).batch(len(location_data))

In [13]:
# Model architecture
user_id = tf.keras.Input(shape=(), name="user_id", dtype=tf.int32)
location_id = tf.keras.Input(shape=(), name="location_id", dtype=tf.int32)
user_embedding = tf.keras.layers.Embedding(input_dim=len(user_data), output_dim=32)(user_id)
location_embedding = tf.keras.layers.Embedding(input_dim=len(location_data), output_dim=32)(location_id)
dot_product = tf.keras.layers.Dot(axes=1)([user_embedding, location_embedding])
model = tf.keras.Model(inputs=[user_id, location_id], outputs=dot_product)

In [14]:
# Compile and evaluate the model
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1), loss=tf.keras.losses.MeanSquaredError(), metrics=["accuracy"])

In [15]:
model.evaluate(test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0000e+00 - loss: 10.9548  


[10.966470718383789, 0.0]

In [22]:
specific_user_id = 0

In [23]:
# Create a dataset for the specific userId
locations_for_specific_user = tf.data.Dataset.from_tensor_slices({
    "user_id": np.repeat(specific_user_id, len(location_data)),
    "location_id": np.arange(len(location_data))  # Use location_index instead of location_name
})

# Reshape the input tensors to match the expected shapes
locations_for_specific_user = locations_for_specific_user.map(lambda x: {
    "user_id": tf.reshape(x["user_id"], (1,)),
    "location_id": tf.reshape(x["location_id"], (1,))
})

# Predict ratings for locations for the specific userId
predicted_ratings = model.predict(locations_for_specific_user)

# Combine location_index with predicted ratings
predicted_ratings_with_indexes = list(zip(np.arange(len(location_data)), predicted_ratings))

# Sort by predicted ratings
recommended_locations_indexes = sorted(predicted_ratings_with_indexes, key=lambda x: x[1], reverse=True)

# Choose the desired number of recommended locations
num_recommendations = 10
top_recommendations_indexes = recommended_locations_indexes[:num_recommendations]

# Decode location indexes back to location names
top_recommendations = [(location_encoder.inverse_transform([index])[0], rating) for index, rating in top_recommendations_indexes]

# Print the top recommended locations
print(f"Top {num_recommendations} recommended locations for user {specific_user_id}:")
for location, rating in top_recommendations:
    print(f"{location}: Predicted rating - {rating}")



[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 753us/step
Top 10 recommended locations for user 0:
200: Predicted rating - [0.01997084]
335: Predicted rating - [0.01312447]
522: Predicted rating - [0.01312197]
159: Predicted rating - [0.0128419]
339: Predicted rating - [0.01268517]
851: Predicted rating - [0.01267172]
733: Predicted rating - [0.01252961]
774: Predicted rating - [0.01228201]
130: Predicted rating - [0.01205788]
527: Predicted rating - [0.01180554]
