# ðŸ“Œ Interest-Based Content Recommendation System
---
Assignment Notebook

In [1]:

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


## 1. Load and Explore Data

In [2]:

# Load CSV files
users_df = pd.read_csv("Users.csv")
posts_df = pd.read_csv("Posts.csv")
eng_df = pd.read_csv("Engagements.csv")

# Preview datasets
print("Users: ", users_df.shape)
print(users_df.head(), "\n")
print("Posts: ", posts_df.shape)
print(posts_df.head(), "\n")
print("Engagements: ", eng_df.shape)
print(eng_df.head())


Users:  (50, 5)
  user_id  age gender          top_3_interests  past_engagement_score
0      U1   24      F      sports, art, gaming                   0.61
1      U2   32      F    travel, food, fashion                   0.93
2      U3   28  Other  sports, travel, fashion                   0.40
3      U4   25      M     fashion, music, tech                   0.53
4      U5   24      M   fashion, food, fitness                   0.80 

Posts:  (100, 4)
  post_id creator_id content_type            tags
0      P1        U44        video    sports, food
1      P2        U26        video   music, travel
2      P3        U32         text  sports, travel
3      P4         U6        image   music, gaming
4      P5        U32        image   food, fashion 

Engagements:  (1000, 3)
  user_id post_id  engagement
0      U1     P52           1
1      U1     P44           0
2      U1      P1           1
3      U1      P4           1
4      U1     P65           0


## 2. Preprocessing

In [3]:

# Fill missing values (if any)
users_df['top_3_interests'] = users_df['top_3_interests'].fillna('')
posts_df['tags'] = posts_df['tags'].fillna('')


## 3. Represent Users and Posts with TF-IDF

In [4]:

# Vectorize user interests
user_vectorizer = TfidfVectorizer()
user_tfidf = user_vectorizer.fit_transform(users_df['top_3_interests'])

# Vectorize post tags
post_vectorizer = TfidfVectorizer()
post_tfidf = post_vectorizer.fit_transform(posts_df['tags'])


## 4. Compute User-Post Similarity

In [5]:

# Compute cosine similarity
similarity_matrix = cosine_similarity(user_tfidf, post_tfidf)
similarity_matrix.shape


(50, 100)

## 5. Incorporate Past Engagement Score

In [6]:

# Adjust similarity by past engagement
engagement_scores = users_df['past_engagement_score'].values.reshape(-1,1)
final_scores = similarity_matrix * (1 + engagement_scores)
final_scores.shape


(50, 100)

## 6. Generate Top-3 Recommendations

In [7]:

recommendations = {}

for i, user in enumerate(users_df['user_id']):
    scores = list(enumerate(final_scores[i]))
    ranked = sorted(scores, key=lambda x: x[1], reverse=True)
    top3_posts = [posts_df.iloc[idx]['post_id'] for idx, _ in ranked[:3]]
    recommendations[user] = top3_posts

# Show example recommendations
for user, recs in list(recommendations.items())[:5]:
    print(user, "->", recs)


U1 -> ['P22', 'P78', 'P10']
U2 -> ['P42', 'P80', 'P5']
U3 -> ['P3', 'P34', 'P39']
U4 -> ['P37', 'P53', 'P96']
U5 -> ['P74', 'P7', 'P26']


## 7. Evaluate with Precision@3

In [8]:

def precision_at_k(user_id, k=3):
    actual_engaged = set(eng_df[(eng_df['user_id']==user_id) & (eng_df['engagement']==1)]['post_id'])
    recommended = set(recommendations[user_id][:k])
    if len(recommended) == 0:
        return 0
    return len(actual_engaged & recommended) / k

precisions = [precision_at_k(u, 3) for u in users_df['user_id']]
print("Average Precision@3:", np.mean(precisions))


Average Precision@3: 0.06666666666666667
