In [1]:
# importing libraries and making a new directory
import pandas as pd, numpy as np, random, os
from sklearn.metrics import ndcg_score
from sklearn.decomposition import TruncatedSVD
os.makedirs("data", exist_ok=True)


In [2]:
# Users Data
users = pd.DataFrame({
    "user_id": [201,202,203],
    "preferred_categories": [["tutorial","case-study"],["opinion"],["tutorial","opinion"]],
    "historical_avg_sentiment": [0.8,-0.3,0.5],
    "avg_engagement_time": [200,120,250]
})
users.to_csv("data/users.csv", index=False)


In [3]:
# Loading Scored Posts from assignment1 folder
posts = pd.read_csv(r"C:\Users\jhasn\OneDrive\Desktop\Projects\dl-intern-tasks\assignment1\scored_posts.csv")

In [4]:
# creating recommendation Logic
def recommend_for_user(user, posts, top_k=5):
    df = posts.copy()
    df = df[df["category"].isin(user["preferred_categories"])]
    df["personalization_score"] = (
        -abs(df["sentiment_score"] - user["historical_avg_sentiment"])
        -abs(df["avg_read_time_seconds"] - user["avg_engagement_time"]) / 400
    )
    df["final_score"] = df["relevance_score"] + 0.3*df["personalization_score"]
    df = df.sort_values("final_score", ascending=False)
    return df.groupby("category").head(3).head(top_k)

In [5]:
# Evaluation Metrics
def precision_at_k(recommended, relevant, k=5):
    return len(set(recommended[:k]) & set(relevant)) / k

def ndcg_at_k(recommended, relevant, k=5):
    y_true = [[1 if pid in relevant else 0 for pid in recommended[:k]]]
    y_score = [[1]*len(recommended[:k])]
    return ndcg_score(y_true, y_score)



In [6]:
# Simulate relevant posts for evaluation
relevant_items = {201:[1,3], 202:[2,4], 203:[3,5]}

for _, user in users.iterrows():
    recs = recommend_for_user(user, posts)
    recommended = recs["post_id"].tolist()
    prec = precision_at_k(recommended, relevant_items[user["user_id"]])
    ndcg = ndcg_at_k(recommended, relevant_items[user["user_id"]])
    print(f"\n🔹 User {user['user_id']} → P@5={prec:.2f}, NDCG@5={ndcg:.2f}")
    print(recs[["post_id","category","final_score"]])



🔹 User 201 → P@5=0.40, NDCG@5=1.00
     post_id    category  final_score
98         3    tutorial      1.27125
67         3    tutorial      1.27125
136        3    tutorial      1.27125
94         1  case-study      0.79200
88         1  case-study      0.79200

🔹 User 202 → P@5=0.20, NDCG@5=1.00
    post_id category  final_score
6         2  opinion       0.1095
7         2  opinion       0.1095
16        2  opinion       0.1095

🔹 User 203 → P@5=0.20, NDCG@5=0.83
     post_id  category  final_score
1          3  tutorial      1.21875
13         3  tutorial      1.21875
132        3  tutorial      1.21875
7          2   opinion      0.34200
134        2   opinion      0.34200


In [7]:
# Collaborative Filtering (Matrix Factorization)
interaction_df = pd.DataFrame({
    "user_id": random.choices(users["user_id"], k=50),
    "post_id": random.choices(posts["post_id"], k=50),
    "clicks": random.choices([0,1], k=50)
})

pivot = interaction_df.pivot_table(index="user_id", columns="post_id", values="clicks", fill_value=0)
svd = TruncatedSVD(n_components=3)
latent_matrix = svd.fit_transform(pivot)
latent_posts = svd.components_

pred_matrix = np.dot(latent_matrix, latent_posts)
pred_df = pd.DataFrame(pred_matrix, index=pivot.index, columns=pivot.columns)

for user in pred_df.index:
    print(f"\n🔹 CF Recommendations for User {user}: {pred_df.loc[user].sort_values(ascending=False).head(5).index.tolist()}")



🔹 CF Recommendations for User 201: [2, 5, 1, 4, 3]

🔹 CF Recommendations for User 202: [1, 3, 2, 5, 4]

🔹 CF Recommendations for User 203: [5, 3, 4, 2, 1]
