In [1]:
from dotenv import load_dotenv
import MySQLdb
import numpy as np
import os
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Connect to the MySQL database
load_dotenv()

connection = MySQLdb.connect(
  host=os.getenv("HOST"),
  user=os.getenv("USERNAME"),
  passwd=os.getenv("PASSWORD"),
  db=os.getenv("DATABASE"),
  autocommit=True,
  ssl_mode="VERIFY_IDENTITY",
  ssl={"ca": "/etc/ssl/cert.pem"},
)

cursor = connection.cursor()

In [3]:
# Fetch all posts
query = "SELECT id, content FROM Post"
cursor.execute(query)

posts = pd.DataFrame(cursor.fetchall(), columns=["id", "content"])

print(posts)

                                     id  \
0  00dd2dc5-2596-4f5a-8686-269fbaec8a37   
1  07c738cd-bb63-43ab-b2c5-3039ff0df9fb   
2  0ae3dfce-c863-4e4d-ba4e-e0546c280c82   
3  11a44b0e-12e5-4348-843a-438acb10c1a8   
4  1aecd20a-3658-43a7-bf4d-8463c0754219   
5  5be3d665-d85c-4b96-b637-17bfb3848f06   
6  8d5f5dac-605a-4dfe-87a7-244c85b71c75   
7  b364a0b7-cb8b-42d8-8d94-0e65fa1df257   
8  e96f6ff7-9947-47ac-b139-ccbddb1d8a33   

                                   content  
0                                     yooo  
1   this is an epic website, 10/10 stars\n  
2                                very cool  
3  Nathan's dad makes good chicken papaya!  
4                                   Hello!  
5                  https://chat.openai.com  
6                                  epicccc  
7                        ¯\_( ͡❛ ͜ʖ ͡❛)_/¯  
8                                ~('v'~)\n  


In [4]:
# The function to embed the text
def embed(texts):
  hub_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
  with tf.device("/cpu:0"):
    embedding = hub.load(hub_url)(texts)

  embedding = np.array(embedding)

  return embedding

In [5]:
post_embeddings = embed(posts["content"].tolist())

print(post_embeddings.shape)
print(post_embeddings)

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-06-07 21:50:30.830271: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


(9, 512)
[[ 0.00106538 -0.00854104  0.06823314 ... -0.09808023  0.01103214
  -0.01372389]
 [-0.04650597  0.00439887 -0.01854666 ... -0.04169654  0.04049069
  -0.03430885]
 [ 0.01422869  0.04488054  0.02140874 ... -0.04481297 -0.00456409
  -0.00886978]
 ...
 [-0.02576295  0.03394373  0.02542277 ... -0.05315149  0.08471154
  -0.06853922]
 [ 0.00813886  0.03888463  0.02779601 ... -0.02551527  0.06516523
  -0.0162978 ]
 [ 0.00890347 -0.01208538  0.04783745 ... -0.07256813  0.0883611
  -0.03279964]]


In [6]:
user_id = "clic1dtrw0004la08gavufafb"

query = f"""
  SELECT p.id, p.content
  FROM Post p
  JOIN `Like` l ON p.id = l.postId
  WHERE l.userId = '{user_id}';
"""
cursor.execute(query)

liked_posts = pd.DataFrame(cursor.fetchall(), columns=["id", "content"])

print(liked_posts)

                                     id  \
0  07c738cd-bb63-43ab-b2c5-3039ff0df9fb   
1  11a44b0e-12e5-4348-843a-438acb10c1a8   

                                   content  
0   this is an epic website, 10/10 stars\n  
1  Nathan's dad makes good chicken papaya!  


In [7]:
# Embed the liked posts
liked_post_embeddings = embed(liked_posts["content"].tolist())

print(liked_post_embeddings.shape)
print(liked_post_embeddings)

(2, 512)
[[-0.04650597  0.00439885 -0.01854667 ... -0.04169655  0.04049069
  -0.03430884]
 [ 0.04226284  0.0172637   0.02388385 ...  0.01653581  0.04116938
  -0.05893382]]


In [8]:
# Calculate the similarity scores
similarity_scores = cosine_similarity(liked_post_embeddings, post_embeddings)

recommend_index = np.argsort(-similarity_scores)[0]

print(recommend_index)

[1 2 0 7 4 8 5 6 3]


In [9]:
for index in recommend_index:
  print(posts.iloc[index]["id"], '|' ,posts.iloc[index]["content"])

07c738cd-bb63-43ab-b2c5-3039ff0df9fb | this is an epic website, 10/10 stars

0ae3dfce-c863-4e4d-ba4e-e0546c280c82 | very cool
00dd2dc5-2596-4f5a-8686-269fbaec8a37 | yooo
b364a0b7-cb8b-42d8-8d94-0e65fa1df257 | ¯\_( ͡❛ ͜ʖ ͡❛)_/¯
1aecd20a-3658-43a7-bf4d-8463c0754219 | Hello!
e96f6ff7-9947-47ac-b139-ccbddb1d8a33 | ~('v'~)

5be3d665-d85c-4b96-b637-17bfb3848f06 | https://chat.openai.com
8d5f5dac-605a-4dfe-87a7-244c85b71c75 | epicccc
11a44b0e-12e5-4348-843a-438acb10c1a8 | Nathan's dad makes good chicken papaya!
