In [4]:
pip install faker

Collecting faker
  Downloading faker-37.4.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.4.0-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
    --------------------------------------- 0.0/1.9 MB 1.9 MB/s eta 0:00:01
   ----------- ---------------------------- 0.5/1.9 MB 8.6 MB/s eta 0:00:01
   ----------------------------- ---------- 1.4/1.9 MB 11.3 MB/s eta 0:00:01
   ---------------------------------------- 1.9/1.9 MB 13.7 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.4.0
Note: you may need to restart the kernel to use updated packages.




In [14]:
from faker import Faker
import random
from datetime import datetime, timedelta
import pandas as pd

faker = Faker()

#Simulate users and post
users = [faker.user_name() for _ in range(100)]
posts = [f"post_{i}" for i in range(50)]

#Simulate feed interactions
feed_data =[]

for _ in range(1000): # 1000 total events
    entry ={
        "user": random.choice(users),
        "post": random.choice(posts),
        "likes" : random.randint(0,50),
        "comments": random.randint(0,15),
        "timestamp":datetime.now() - timedelta(hours =random.randint(0,48)) #last 48 hours
    }
    feed_data.append(entry)

#convert to dataframe for easier analysis
df = pd.DataFrame(feed_data)
df.head()

Unnamed: 0,user,post,likes,comments,timestamp
0,adamburnett,post_21,6,4,2025-06-26 15:47:17.788805
1,janet84,post_35,32,8,2025-06-26 05:47:17.788805
2,sandra63,post_34,0,8,2025-06-26 17:47:17.788805
3,adamburnett,post_44,45,3,2025-06-25 12:47:17.788805
4,ojohnson,post_46,43,14,2025-06-26 20:47:17.788805


In [79]:
import heapq

def rank_posts_with_decay_and_ratio(df, top_n =10):
    now =datetime.now()
    cutoff = now - timedelta(hours =24)

    #Step 1: Filter to last 24 hours
    recent_df = df[df["timestamp"] >=cutoff]

    #Step 2: Agrregate post scores
    post_scores = {}

    for _, row in recent_df.iterrows():
        post =row["post"]

        #Time decay logic
        hours_old = (now -row["timestamp"]).total_seconds() /3600
        freshness_weight = max(1,24 - hours_old)

        #Ratio-based Score
        likes =row['likes']
        comments =row['comments']
        enagement_score =(likes/ (comments+1)) # Avoid division by zero
        # Weighted score with freshness
        raw_score =row["likes"]*2 +row["comments"]*3
        final_score =enagement_score * (freshness_weight/24)
        
        if post in post_scores:
            post_scores[post] += final_score
        else:
            post_scores[post] = final_score

    #Step 3 : Max Heap for top-N
    max_heap =[(-score, post) for post, score in post_scores.items()]
    heapq.heapify(max_heap)

    #Step 4: Extract top-N
    top_posts =[]
    for _ in range(min(top_n, len(max_heap))):
        score , post =heapq.heappop(max_heap)
        top_posts.append((post, -score))

    return top_posts

def personalized_feed(user_id, df,top_n =5):
    now =datetime.now()
    cutoff =  now - timedelta(hours =24)

    #Step 1 : Filter to last 24h and only current user
    user_df =df[(df["timestamp"] >= cutoff) & (df['user'] == user_id)]

    #Step 2: compute post scores form that user's interactions
    post_scores ={}

    for _, row in user_df.iterrows():
        post = row['post']
        hours_old =(now -row['timestamp']).total_seconds() /3600
        freshness_weight = max(1, 24-hours_old)

        likes = row['likes']
        comments = row['comments']
        enagement_score =(likes/ (comments +1))
        final_score = enagement_score * (freshness_weight /24)

        if post in post_scores:
            post_scores[post] += final_score
        else:
            post_scores[post]  = final_score

    #Step 3: Heap to get top-N posts
    max_heap =[(-score, post) for post, score in post_scores.items()]
    heapq.heapify(max_heap)

    #Step 4: Extract top-N
    top_posts =[]
    for _ in range(min(top_n, len(max_heap))):
        score , post =heapq.heappop(max_heap)
        top_posts.append((post, -score))

    return top_posts

In [81]:
top_trending = rank_posts(df, top_n=5)
for post, score in top_trending:
    print(f"{post} → Score: {score}")

post_31 → Score: 1348
post_9 → Score: 1201
post_28 → Score: 1141
post_34 → Score: 1059
post_48 → Score: 1055


In [83]:
top_decay = rank_posts_with_decay(df, top_n=5)
for post, score in top_decay:
    print(f"{post} → Score: {score}")

post_9 → Score: 676.5562273917014
post_31 → Score: 675.5817745134724
post_27 → Score: 598.2510529352085
post_34 → Score: 586.6593836867708
post_21 → Score: 574.7769768530903


In [85]:
top_quality_posts = rank_posts_with_decay_and_ratio(df, top_n=5)
for post, score in top_quality_posts:
    print(f"{post} → Score: {score}")

post_44 → Score: 53.73369508713443
post_45 → Score: 52.01374250845293
post_18 → Score: 47.90948815600835
post_40 → Score: 44.37867891847404
post_23 → Score: 43.75765054150933


In [87]:
personal_feed = personalized_feed("janet84", df, top_n=5)
for post, score in personal_feed:
    print(f"📥 {post} (for user 'janet84') → Score: {score}")

📥 post_5 (for user 'janet84') → Score: 22.953204355324075
📥 post_20 (for user 'janet84') → Score: 3.6302979648553237
📥 post_28 (for user 'janet84') → Score: 2.1685666264768515
📥 post_44 (for user 'janet84') → Score: 1.8771922613194443
📥 post_1 (for user 'janet84') → Score: 1.7530691658472224
