In [1]:
import requests
import pandas as pd

# Constants
BASE_URL = "https://api.socialverseapp.com"
HEADERS = {"Flic-Token": "flic_6e2d8d25dc29a4ddd382c2383a903cf4a688d1a117f6eb43b35a1e7fadbb84b8"}
PAGE_SIZE = 1000

def fetch_data(endpoint, page=1):
    url = f"{BASE_URL}{endpoint}?page={page}&page_size={PAGE_SIZE}"
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.json()
    else:
        response.raise_for_status()

def fetch_all_data(endpoint):
    page = 1
    all_data = []
    while True:
        data = fetch_data(endpoint, page)
        if not data['posts']:
            break
        all_data.extend(data['posts'])
        page += 1
    return all_data

def fetch_all_users(endpoint):
    page = 1
    all_data = []
    while True:
        data = fetch_data(endpoint, page)
        if not data['users']:
            break
        all_data.extend(data['users'])
        page += 1
    return all_data
    
# Fetch all data
viewed_posts = fetch_all_data("/posts/view")
liked_posts =fetch_all_data("/posts/like")
inspired_posts= fetch_all_data("/posts/inspire")
rated_posts =fetch_all_data("/posts/rating")
all_posts =fetch_all_data("/posts/summary/get")
all_users =fetch_all_users("/users/get_all")

# Convert to DataFrames
df_viewed = pd.DataFrame(viewed_posts)
df_liked= pd.DataFrame(liked_posts)
df_inspired = pd.DataFrame(inspired_posts)
df_rated= pd.DataFrame(rated_posts)
df_posts= pd.DataFrame(all_posts)
df_users =pd.DataFrame(all_users)

# Handle missing values 
df_viewed.fillna(0, inplace=True)
df_liked.fillna(0, inplace=True)
df_inspired.fillna(0, inplace=True)
df_rated.fillna(0, inplace=True)
df_posts.fillna(0, inplace=True)
df_users.fillna(0, inplace=True)




In [2]:
required_columns = ['slug', 'title', 'id', 'username', 'upvoted', 'bookmarked', 'following', 'post_summary']
engagement_columns = ['comment_count', 'upvote_count', 'view_count', 'exit_count', 'rating_count', 'share_count']
# Create a new DataFrame with the required columns
df_selected = df_posts[required_columns].copy()

# Create a new column that sums up the engagement metrics
df_selected['engagement_score'] = df_posts[engagement_columns].sum(axis=1)

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel



# Ensure the post_summary column contains only string values
df_selected['post_summary'] = df_selected['post_summary'].apply(lambda x: x['description'] if isinstance(x, dict) and 'description' in x else str(x))

# Fit the TfidfVectorizer on the cleaned post_summary column
tfidf =TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_selected['post_summary'])

# Compute the cosine similarity matrix
cosine_sim= linear_kernel(tfidf_matrix, tfidf_matrix)

def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the video that matches the title
    idx =df_selected[df_selected['title'] == title].index[0]

    # Get the pairwise similarity scores of all videos with that video
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the videos based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar videos
    sim_scores = sim_scores[1:11]

    # Get the video indices
    video_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar videos
    return df_selected['title'].iloc[video_indices]

# Example usage
recommendations = get_recommendations('Peak Performance?')
print(recommendations)


0                                Recipe for a flow state
830    Let them be… #melrobbins #inspiration #dailymo...
30                              Change your environment!
17                                        Learn to think
875    “You can’t just make it happen…” 💔 - Speaker_ ...
925    “No one’s coming…” 🤍 - Ed Mylett - #lifeadvice...
845    Rest isn’t a luxury—it’s an absolute NECESSITY...
858    I cant make what it is… #dailywisdom #motivati...
516                                            John 3-16
511                        tiktok removed my last vid  -
Name: title, dtype: object


In [4]:

# Consolidate user interaction data
df_interactions = pd.concat([df_viewed, df_inspired, df_rated, df_liked], ignore_index=True)

#Create a composite rating by summing engagement metrics
df_interactions['rating']= df_interactions[['comment_count', 'upvote_count', 'view_count', 'exit_count', 'rating_count', 'share_count']].sum(axis=1)

# Ensure user IDs and video IDs are of the correct type
df_interactions['username'] =df_interactions['username'].astype(str)
df_interactions['id'] = df_interactions['id'].astype(str)

# Create a user-item interaction matrix
user_item_matrix= df_interactions[['username', 'id', 'rating']]

# Display the user-item interaction matrix
print(user_item_matrix.head())


  username    id  rating
0    kinha  1102     111
1    kinha   774     107
2    kinha   948     111
3    kinha   817     136
4    kinha   499     128


In [6]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

# Create a Reader object
reader = Reader(rating_scale=(df_selected['engagement_score'].min(), df_selected['engagement_score'].max()))

# Load the data into the surprise Dataset
data = Dataset.load_from_df(df_selected[['username', 'id', 'engagement_score']], reader)

# Use the SVD algorithm
algo = SVD() #singular value decomposition 

# Evaluate the algorithm
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Train the algorithm on the whole dataset
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_recommendations(user_id, num_recommendations=10):
    #Get all video ids
    all_video_ids=df_selected['id'].unique()

    #Get the video ids that the user has already interacted with
    viewed_video_ids=df_selected[df_selected['username']==user_id]['id'].unique()

    #Get the video ids that the user has not interacted with
    candidate_video_ids=[video_id for video_id in all_video_ids if video_id not in viewed_video_ids]

    #Predict the ratings for the candidate videos
    predictions=[(video_id, algo.predict(user_id, video_id).est) for video_id in candidate_video_ids]

    #Sort the predictions by the estimated rating
    predictions=sorted(predictions, key=lambda x: x[1], reverse=True)

    #Get the top N recommendations
    top_n_recommendations=predictions[:num_recommendations]

    #Get the video titles for the top N recommendations
    recommended_video_titles=df_selected[df_selected['id'].isin([video_id for video_id, _ in top_n_recommendations])]['title']

    return recommended_video_titles

# Example usage
collaborative_recommendations = get_collaborative_recommendations('q_03f1i5c6l')
print(collaborative_recommendations)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    474.4545459.1939468.9852481.4687479.1163472.64377.9666  
MAE (testset)     443.1001426.2732433.9508456.3736455.1869442.976911.7393 
Fit time          0.01    0.00    0.01    0.01    0.01    0.01    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
54                how are the 2023 fitness goals going?
55                     Decide to be extraodinary and do
56                                Clarity brings power.
57                                 Just focus on today.
58    Fun fact: Oscar Wilde was a homosexual. I thin...
59    Revolutionizing Men's Mental Health: Your $0.9...
68                                                     
69                                                     
70                                                     
71                                                     
Name: title

In [7]:
def get_hybrid_recommendations(user_id, title, num_recommendations=10, cosine_sim=cosine_sim):
    # Get content-based recommendations
    content_recommendations = get_recommendations(title, cosine_sim)

    # Get collaborative filtering recommendations
    collaborative_recommendations = get_collaborative_recommendations(user_id, num_recommendations)

    # Combine and deduplicate recommendations
    hybrid_recommendations = pd.concat([content_recommendations, collaborative_recommendations]).drop_duplicates()

    # Return the top N recommendations
    return hybrid_recommendations.head(num_recommendations)

# Example usage
hybrid_recommendations = get_hybrid_recommendations('q_03f1i5c6l', 'Peak Performance?')
print(hybrid_recommendations)

0                                Recipe for a flow state
830    Let them be… #melrobbins #inspiration #dailymo...
30                              Change your environment!
17                                        Learn to think
875    “You can’t just make it happen…” 💔 - Speaker_ ...
925    “No one’s coming…” 🤍 - Ed Mylett - #lifeadvice...
845    Rest isn’t a luxury—it’s an absolute NECESSITY...
858    I cant make what it is… #dailywisdom #motivati...
516                                            John 3-16
511                        tiktok removed my last vid  -
Name: title, dtype: object


In [8]:
mood_keywords = {
    'happy': ['joy', 'happy', 'fun', 'laugh', 'smile', 'excited'],
    'sad': ['sad', 'cry', 'tear', 'depressed', 'sorrow'],
    'excited': ['excited', 'thrill', 'eager', 'enthusiastic'],
    'relaxed': ['relax', 'calm', 'peace', 'serene', 'chill'],
    'energetic': ['energetic', 'vibrant', 'lively', 'dynamic', 'active']
}

# Function to assign mood based on post_summary
def assign_mood(post_summary, mood_keywords):
    for mood, keywords in mood_keywords.items():
        if any(keyword in post_summary.lower() for keyword in keywords):
            return mood
    return 'neutral'  # Default mood if no keywords are found

# Apply the function to add the mood column
df_selected['mood'] = df_selected['post_summary'].apply(lambda x: assign_mood(x, mood_keywords))

# Display the DataFrame
print(df_selected.head())


                        slug                      title  id   username  \
0    recipe-for-a-flow-state    Recipe for a flow state  11  afrobeezy   
1                 why-fit-in              Why fit in..?  12  afrobeezy   
2   transcending-singularity   Transcending Singularity  13  afrobeezy   
3           peak-performance          Peak Performance?  14  afrobeezy   
4  our-existential-situation  Our Existential Situation  15  afrobeezy   

   upvoted  bookmarked  following  \
0     True       False       True   
1    False       False       True   
2    False       False       True   
3    False       False       True   
4    False       False       True   

                                        post_summary  engagement_score  \
0  The video unfolds as a dynamic exploration of ...               124   
1  The video is structured as an emotional and in...                96   
2  The video presents a compelling visual narrati...               140   
3  The video presents a seamless nar

In [9]:
def recommend_for_new_user(user_mood, num_recommendations=10):
    # Filter videos based on the user's mood
    mood_videos = df_selected[df_selected['mood'] == user_mood]

    # Sort the videos by engagement score (or any other relevant metric)
    mood_videos = mood_videos.sort_values(by='engagement_score', ascending=False)

    # Get the top N recommendations
    recommended_video_titles = mood_videos['title'].head(num_recommendations)

    return recommended_video_titles

# Example usage
new_user_recommendations = recommend_for_new_user('energetic')
print(new_user_recommendations)


1039                             Follow your obsessions 🌀
1040                         Dream the impossible dream 🌀
1044    Spiritual ego 👀. You can’t be in a good, “high...
1084    Don’t be afraid of the hard work 🙏\n#motivatio...
957                            Solana Coins vs Base Coins
1085                                                Game2
345                                                      
1073                  Bullet Train _ Sleepwalker _ 4K.mp4
878     This is why we live in a simulation 🤯 - Speake...
920     Am I traumitized_ #dailyinspiration #wisdom #m...
Name: title, dtype: object


In [10]:
# Save the DataFrame to a CSV file
df_selected.to_csv('df_selected.csv', index=False)
df_interactions.to_csv('df_interactions.csv',index=False)

In [16]:
def get_hybrid_recommendations(user_id, title,mood, num_recommendations=10,cosine_sim=cosine_sim):
    # Get content-based recommendations
    content_recommendations = get_recommendations(title, cosine_sim)
    mood_based=recommend_for_new_user(mood)
    # Get collaborative filtering recommendations
    collaborative_recommendations = get_collaborative_recommendations(user_id, num_recommendations)

    # Combine and deduplicate recommendations
    hybrid_recommendations = pd.concat([content_recommendations, collaborative_recommendations,mood_based]).drop_duplicates()

    # Return the top N recommendations
    return hybrid_recommendations.head(num_recommendations)

# Example usage
hybrid_recommendations = get_hybrid_recommendations('q_03f1i5c6l', 'Peak Performance?','happy')
print(hybrid_recommendations)

0                                Recipe for a flow state
830    Let them be… #melrobbins #inspiration #dailymo...
30                              Change your environment!
17                                        Learn to think
875    “You can’t just make it happen…” 💔 - Speaker_ ...
925    “No one’s coming…” 🤍 - Ed Mylett - #lifeadvice...
845    Rest isn’t a luxury—it’s an absolute NECESSITY...
858    I cant make what it is… #dailywisdom #motivati...
516                                            John 3-16
511                        tiktok removed my last vid  -
Name: title, dtype: object
