#### Importing the requried libries

In [3]:
import requests
import os
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
from datetime import datetime
import traceback

In [4]:
BASE_URL = 'https://api.socialverseapp.com'
HEADERS = {'Flic-Token': 'flic_6e2d8d25dc29a4ddd382c2383a903cf4a688d1a117f6eb43b35a1e7fadbb84b8'}

#### Fetching the data

In [5]:
def fetch_paginated_data(endpoint, params=None):
    if params is None:
        params = {}
    
    params['resonance_algorithm'] = 'resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if'

    all_data = []
    page = 1
    
    while True:
        try:
            current_params = {**params, 'page': page, 'page_size': 1000}
            url = f"{BASE_URL}{endpoint}"
            response = requests.get(url, headers=HEADERS, params=current_params)
            response.raise_for_status()
            data = response.json()

            if data.get('status') != 'success':
                break

            posts = data.get('posts', [])
            if not posts:
                break

            all_data.extend(posts)

            if len(posts) < current_params['page_size']:
                break

            page += 1

        except requests.exceptions.RequestException as e:
            print(f"Error fetching page {page} from {endpoint}: {str(e)}")
            break

    return {'data': all_data}

In [6]:
def fetch_data():
    try:
        posts = fetch_paginated_data('/posts/summary/get')
        views = fetch_paginated_data('/posts/view')
        likes = fetch_paginated_data('/posts/like')
        ratings = fetch_paginated_data('/posts/rating')
        users = fetch_paginated_data('/users/get_all')

        return posts, views, likes, ratings, users
    except Exception as e:
        print(f"Error fetching data: {str(e)}")
        raise

#### Preprocess data

In [7]:
def preprocess_data(posts):
    try:
        print("Starting data preprocessing...")

        posts_data = pd.DataFrame(posts.get('data', []))
        interactions = defaultdict(lambda: defaultdict(float))

        if not posts_data.empty:
            for _, post in posts_data.iterrows():
                user_id = post.get('username')
                post_id = post.get('id')
                if user_id and post_id:
                    view_weight = post.get('view_count', 0) * 1
                    upvote_weight = post.get('upvote_count', 0) * 3
                    rating_weight = post.get('average_rating', 0) * post.get('rating_count', 0) * 5

                    total_interaction = view_weight + upvote_weight + rating_weight
                    interactions[user_id][post_id] = total_interaction

        interaction_matrix = pd.DataFrame.from_dict(interactions, orient='index').fillna(0)

        features = []
        if not posts_data.empty:
            for _, post in posts_data.iterrows():
                features.append({
                    'id': post.get('id'),
                    'category_id': post['category'][0]['id'] if isinstance(post.get('category'), list) and post['category'] else 0,
                    'comment_count': post.get('comment_count', 0),
                    'upvote_count': post.get('upvote_count', 0),
                    'view_count': post.get('view_count', 0),
                    'rating_count': post.get('rating_count', 0),
                    'average_rating': post.get('average_rating', 0),
                    'share_count': post.get('share_count', 0),
                    'is_locked': 1 if post.get('is_locked') else 0,
                    'timestamp': post.get('created_at', 0)
                })

        content_features = pd.DataFrame(features).set_index('id') if features else pd.DataFrame()

        if not content_features.empty:
            numeric_columns = content_features.select_dtypes(include=[np.number]).columns
            content_features[numeric_columns] = (
                content_features[numeric_columns] - content_features[numeric_columns].mean()
            ) / content_features[numeric_columns].std()
            content_features = content_features.fillna(0)

        popularity_scores = calculate_popularity_scores(posts)
        return interaction_matrix, content_features, popularity_scores

    except Exception as e:
        print(f"Error preprocessing data: {str(e)}")
        traceback.print_exc()
        raise

#### Calculate popularity scores

In [8]:
def calculate_popularity_scores(posts):
    popularity = defaultdict(float)

    if isinstance(posts, dict) and 'data' in posts:
        current_time = datetime.now().timestamp()
        RECENCY_WINDOW = 7 * 24 * 60 * 60

        for post in posts['data']:
            post_id = post.get('id')
            if not post_id:
                continue

            created_at = post.get('created_at', current_time)
            if isinstance(created_at, str):
                try:
                    created_at = pd.to_datetime(created_at).timestamp()
                except:
                    created_at = current_time

            time_diff = current_time - created_at
            recency_factor = max(0, 1 - (time_diff / RECENCY_WINDOW))

            engagement = (
                post.get('view_count', 0) * 1 +
                post.get('upvote_count', 0) * 3 +
                post.get('rating_count', 0) * post.get('average_rating', 0) * 0.05 +
                post.get('share_count', 0) * 4
            )

            popularity[post_id] = engagement * recency_factor

        if popularity:
            max_score = max(popularity.values())
            if max_score > 0:
                popularity = {k: v / max_score for k, v in popularity.items()}

    return popularity

#### Get recommendations

In [9]:
def get_recommendations(user_id, interaction_matrix, content_features, popularity_scores, n_recommendations=10):
    try:
        if interaction_matrix.empty or user_id not in interaction_matrix.index:
            return get_popular_recommendations(popularity_scores, n_recommendations)
        else:
            return get_personalized_recommendations(user_id, interaction_matrix, content_features, popularity_scores, n_recommendations)
    except Exception as e:
        print(f"Error getting recommendations: {str(e)}")
        return get_popular_recommendations(popularity_scores, n_recommendations)

#### Popular recommendations

In [10]:
def get_popular_recommendations(popularity_scores, n_recommendations):
    return sorted(popularity_scores.items(), key=lambda x: x[1], reverse=True)[:n_recommendations]

#### Personalized recommendations

In [11]:
def get_personalized_recommendations(user_id, interaction_matrix, content_features, popularity_scores, n_recommendations):
    user_interactions = interaction_matrix.loc[user_id]
    uninteracted_posts = [post_id for post_id in popularity_scores.keys() if post_id not in user_interactions or user_interactions[post_id] == 0]

    recommendations = []
    for post_id in uninteracted_posts:
        score = popularity_scores[post_id]
        if not content_features.empty and post_id in content_features.index:
            similar_posts = get_similar_posts(post_id, content_features)
            for similar_post_id, similarity in similar_posts:
                if similar_post_id in user_interactions:
                    score += similarity * user_interactions[similar_post_id]

        recommendations.append((post_id, score))

    return sorted(recommendations, key=lambda x: x[1], reverse=True)[:n_recommendations]

#### Get similar posts

In [12]:
def get_similar_posts(post_id, content_features):
    if post_id not in content_features.index:
        return []

    post_features = content_features.loc[post_id].values.reshape(1, -1)
    similarities = cosine_similarity(post_features, content_features.values)[0]

    similar_posts = sorted([(pid, sim) for pid, sim in zip(content_features.index, similarities) if pid != post_id], key=lambda x: x[1], reverse=True)
    return similar_posts

#### Save recommendations to CSV

In [13]:
def save_recommendations_to_csv(user_id, recommendations, posts, filename=None):
    try:
        post_map = {post.get('id'): post for post in posts.get('data', [])}
        enriched_recommendations = [
            {
                'id': post_id,
                'name': post_map.get(post_id, {}).get('title', 'Unknown Title'),
                'link': post_map.get(post_id, {}).get('link', ''),
                'score': score
            }
            for post_id, score in recommendations
        ]

        recommendations_df = pd.DataFrame(enriched_recommendations)
        recommendations_df['user_id'] = user_id
        recommendations_df['timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        if filename is None:
            filename = f"recommendations_{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"

        os.makedirs('recommendations', exist_ok=True)
        filepath = os.path.join('recommendations', filename)
        recommendations_df.to_csv(filepath, index=False)

        print(f"Recommendations saved to {filepath}")
        return filepath
    except Exception as e:
        print(f"Error saving recommendations to CSV: {str(e)}")
        return None

#### Main execution

In [14]:
if __name__ == "__main__":
    try:
        posts, views, likes, ratings, users = fetch_data()
        interaction_matrix, content_features, popularity_scores = preprocess_data(posts)

        user_id = "kinha"
        recommendations = get_recommendations(user_id, interaction_matrix, content_features, popularity_scores)

        filepath = save_recommendations_to_csv(user_id, recommendations, posts)
        if filepath:
            print(pd.read_csv(filepath))

    except Exception as e:
        print(f"An error occurred: {str(e)}")

Starting data preprocessing...
Recommendations saved to recommendations\recommendations_kinha_20241212_032908.csv
    id                      name  link          score user_id  \
0   19      WE ARE MADE OF STARS   NaN  147912.448795   kinha   
1   31               Don't laugh   NaN  145898.205835   kinha   
2   44         escape the matrix   NaN  142970.798560   kinha   
3   38              meditate🧘‍♂️   NaN  139709.978549   kinha   
4   59   push through the pain 🫡   NaN  138551.356533   kinha   
5   43         why are you here?   NaN  134350.861654   kinha   
6   33          Enough is enough   NaN  133811.645844   kinha   
7  633  The Perspective Limiter.   NaN  133241.484843   kinha   
8   40     trials to test you...   NaN  131774.222755   kinha   
9   52             Matthew 25:29   NaN  131515.618389   kinha   

             timestamp  
0  2024-12-12 03:29:08  
1  2024-12-12 03:29:08  
2  2024-12-12 03:29:08  
3  2024-12-12 03:29:08  
4  2024-12-12 03:29:08  
5  2024-12-12 03:29:

In [15]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

def evaluate_recommendations(interaction_matrix, recommendations, test_data):

    try:
        # Flatten recommendations into a DataFrame
        predicted_scores = []
        actual_scores = []
        
        for user_id, user_recommendations in recommendations.items():
            for post_id, predicted_score in user_recommendations:
                # Find actual interaction score in the test data
                actual_score = test_data.get((user_id, post_id), 0)  # Default to 0 if not in test set
                if actual_score > 0:  # Only evaluate where actual score exists
                    predicted_scores.append(predicted_score)
                    actual_scores.append(actual_score)
        
        # Calculate evaluation metrics
        mae = mean_absolute_error(actual_scores, predicted_scores)
        rmse = np.sqrt(mean_squared_error(actual_scores, predicted_scores))
        
        return {"MAE": mae, "RMSE": rmse}
    
    except Exception as e:
        print(f"Error in evaluation: {str(e)}")
        return None

if __name__ == "__main__":
    # Simulated interaction matrix (ground truth)
    interaction_matrix = pd.DataFrame([
        {"user_id": "u1", "post_id": "p1", "actual_score": 5},
        {"user_id": "u1", "post_id": "p2", "actual_score": 3},
        {"user_id": "u2", "post_id": "p3", "actual_score": 4},
        {"user_id": "u3", "post_id": "p1", "actual_score": 1}
    ])
    
    # Simulated recommendations
    recommendations = {
        "u1": [("p1", 4.8), ("p2", 3.2)],
        "u2": [("p3", 3.9)],
        "u3": [("p1", 1.2)]
    }
    
    # Ground truth test data (user-post interaction scores)
    test_data = {
        ("u1", "p1"): 5,
        ("u1", "p2"): 3,
        ("u2", "p3"): 4,
        ("u3", "p1"): 1
    }
    
    metrics = evaluate_recommendations(interaction_matrix, recommendations, test_data)
    print("Evaluation Metrics:")
    print(f"MAE: {metrics['MAE']:.4f}")
    print(f"RMSE: {metrics['RMSE']:.4f}")


Evaluation Metrics:
MAE: 0.1750
RMSE: 0.1803
