In [1]:
import pandas as pd

# User-Post Interaction Data
interactions_data = {
    "user_id": [1, 1, 2, 2, 3, 3, 4],
    "post_id": [1, 2, 2, 3, 4, 5, 1],
    "interaction_type": ["like", "view", "like", "comment", "like", "view", "share"]
}

# Post Metadata
post_data = {
    "post_id": [1, 2, 3, 4, 5],
    "content": [
        "Artificial Intelligence in healthcare is transformative.",
        "Deep learning advances in image recognition.",
        "AI and data science are the future.",
        "How to use Python for data visualization.",
        "Understanding neural networks in machine learning."
    ],
    "tags": [
        "AI, healthcare",
        "Deep learning, AI",
        "AI, data science",
        "Python, data visualization",
        "Machine learning, neural networks"
    ]
}

# Convert to DataFrame
interactions = pd.DataFrame(interactions_data)
posts = pd.DataFrame(post_data)


In [2]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Load pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create embeddings for posts
posts['embedding'] = posts['content'].apply(lambda x: model.encode(x))


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [3]:
from sklearn.metrics.pairwise import cosine_similarity

# Create user-item interaction matrix
interaction_matrix = interactions.pivot_table(index='user_id', columns='post_id', aggfunc='size', fill_value=0)

# Compute cosine similarity between users
user_similarity = cosine_similarity(interaction_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=interaction_matrix.index, columns=interaction_matrix.index)

print(user_similarity_df)


user_id         1    2    3         4
user_id                              
1        1.000000  0.5  0.0  0.707107
2        0.500000  1.0  0.0  0.000000
3        0.000000  0.0  1.0  0.000000
4        0.707107  0.0  0.0  1.000000


In [4]:
def recommend_posts(user_id, top_n=3):
    # Get similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]

    # Get posts liked by similar users
    similar_user_posts = interactions[interactions['user_id'].isin(similar_users)]['post_id'].unique()

    # Compute content similarity
    user_interacted_posts = interactions[interactions['user_id'] == user_id]['post_id'].unique()
    user_embeddings = np.array([posts[posts['post_id'] == pid]['embedding'].values[0] for pid in user_interacted_posts])

    recommended_posts = []
    for pid in similar_user_posts:
        post_embedding = posts[posts['post_id'] == pid]['embedding'].values[0]
        similarity = cosine_similarity([post_embedding], user_embeddings).mean()
        recommended_posts.append((pid, similarity))

    # Sort and return top N
    recommended_posts = sorted(recommended_posts, key=lambda x: x[1], reverse=True)
    recommended_ids = [rec[0] for rec in recommended_posts[:top_n]]
    return posts[posts['post_id'].isin(recommended_ids)][['post_id', 'content']]

# Example Recommendation for User 1
recommendations = recommend_posts(user_id=1)
print("Recommendations for User 1:")
print(recommendations)


Recommendations for User 1:
   post_id                                            content
0        1  Artificial Intelligence in healthcare is trans...
1        2       Deep learning advances in image recognition.
4        5  Understanding neural networks in machine learn...
