## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Data Extraction

In [2]:
def fetch_video_data(page=1):
    url = f'https://api.socialverseapp.com/feed?page={page}'
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        if 'posts' in data:
            return data['posts']
        else:
            print(f"Unexpected data structure: {data}")
            return []
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return []

def fetch_all_video_data():
    all_posts = []
    page = 1
    while True:
        posts = fetch_video_data(page)
        if not posts:
            break
        all_posts.extend(posts)
        page += 1
    return pd.DataFrame(all_posts)

# Fetch and store post data
post_data = fetch_all_video_data()

# Check if DataFrame is empty
if post_data.empty:
    print("No data fetched. Please check the API response or data fetching code.")
else:
    # Save data to CSV
    post_data.to_csv('post_data.csv', index=False)
    print("Data saved to 'post_data.csv'")


Data saved to 'post_data.csv'


In [3]:
post_data.head(5)

Unnamed: 0,id,category,slug,title,identifier,comment_count,upvote_count,view_count,exit_count,rating_count,...,created_at,first_name,last_name,username,upvoted,bookmarked,thumbnail_url,gif_thumbnail_url,following,picture_url
0,1202,"{'id': 2, 'name': 'Vible', 'count': 486, 'desc...",fd7077974fcb7be24d83b05e18843ddb30f64ac3,The Higher Self Podcast + TAG your personal he...,1aON1eV,0,0,1,441,0,...,1724760272000,Sachin,Kinha,kinha,False,False,https://video-cdn.socialverseapp.com/kinha_530...,https://video-cdn.socialverseapp.com/kinha_530...,False,https://assets.socialverseapp.com/profile/kinh...
1,1198,"{'id': 2, 'name': 'Vible', 'count': 486, 'desc...",19ee6d8c100f305f5adbb8be3f230faecc638b3d,“That’s not pride…” 🤍 - Jordan Peterson - #rea...,ytkXm9g,0,0,4,455,2,...,1724671753000,Sanjana,Yadav,sanjana,False,False,https://video-cdn.socialverseapp.com/sanjana_7...,,False,https://assets.socialverseapp.com/profile/2.png
2,1196,"{'id': 2, 'name': 'Vible', 'count': 486, 'desc...",98d3a3a36536df8a0e0c9944cf06c16066988848,This hits different as a parent… #dailymotivat...,82T9MUh,0,1,3,481,0,...,1724671702000,Sanjana,Yadav,sanjana,False,False,https://video-cdn.socialverseapp.com/sanjana_6...,,False,https://assets.socialverseapp.com/profile/2.png
3,1195,"{'id': 2, 'name': 'Vible', 'count': 486, 'desc...",a9868532b4b975ee1fa00609b0e923ab0ee9ba19,They are never going to be able to find anyone...,BExS9nU,0,0,3,230,0,...,1724671591000,Sachin,Kinha,kinha,False,False,https://video-cdn.socialverseapp.com/kinha_ddd...,,False,https://assets.socialverseapp.com/profile/kinh...
4,1194,"{'id': 2, 'name': 'Vible', 'count': 486, 'desc...",23e30acd0710b702881ebcb386e6e254414cc831,“If a girl really likes you…” ❤️_🩹 - Justin Wa...,pVxAh3Q,0,0,2,340,0,...,1724671542000,Sachin,Kinha,kinha,False,False,https://video-cdn.socialverseapp.com/kinha_11a...,,False,https://assets.socialverseapp.com/profile/kinh...


## Creating Dummy Dataset

In [4]:
# Create a dummy user dataset
user_data = pd.DataFrame({
    'user_id': [1, 2, 3],
    'watch_history': [
        ['The Higher Self Podcast', '“That’s not pride…” 🤍'],
        ['They are never going to be able to find anyone like you.'],
        ['“If a girl really likes you…” ❤️_🩹', 'The Higher Self Podcast']
    ],
    'language': ['English', 'Spanish', 'Hindi'],
    'location': ['US', 'IN', 'UK']
})

# Save the user data to a CSV file
user_data.to_csv('user_data.csv', index=False)
print("User data saved to 'user_data.csv'")


User data saved to 'user_data.csv'


## Data preprocessing

In [5]:
# Load video data
post_data = pd.read_csv('post_data.csv')

# Handle missing values in 'title' column
post_data['title'] = post_data['title'].fillna('Unknown Title')

# Preprocessing video data
post_data['title'] = post_data['title'].str.lower()  # Convert titles to lowercase
post_data['category'] = post_data['category'].apply(lambda x: eval(x)['name'])  # Extract category name

# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(post_data['title'])

# Prepare numerical features
numerical_columns = ['comment_count', 'rating_count', 'view_count', 'share_count', 'average_rating', 'exit_count']
numerical_features_matrix = post_data[numerical_columns].values

# Combine TF-IDF features with numerical features
features_matrix = np.hstack([tfidf_matrix.toarray(), numerical_features_matrix])


## Recommendation System

In [6]:
# Load user data
user_data = pd.read_csv('user_data.csv')

# Define lists of possible languages and locations for one-hot encoding
languages = ['English', 'Spanish', 'Hindi']
locations = ['US', 'IN', 'UK']

def create_user_features_vector(user_watch_history, user_language, user_location):
    # Convert user's watch history to a single text feature
    user_history_text = ' '.join(user_watch_history).lower()
    user_text_vector = tfidf_vectorizer.transform([user_history_text]).toarray()

    # One-hot encode user's language and location
    language_vector = np.zeros(len(languages))
    location_vector = np.zeros(len(locations))

    if user_language in languages:
        language_vector[languages.index(user_language)] = 1
    if user_location in locations:
        location_vector[locations.index(user_location)] = 1

    # Create user features vector: text features + one-hot encoded language and location
    user_features_vector = np.hstack([
        user_text_vector[0],
        np.zeros(len(numerical_columns)),
        language_vector,
        location_vector
    ])

    # Adjust feature vector dimensions to match the features matrix
    if user_features_vector.shape[0] > features_matrix.shape[1]:
        user_features_vector = user_features_vector[:features_matrix.shape[1]]
    elif user_features_vector.shape[0] < features_matrix.shape[1]:
        user_features_vector = np.pad(user_features_vector, (0, features_matrix.shape[1] - user_features_vector.shape[0]), mode='constant')

    return user_features_vector

def recommend_videos(user_watch_history, user_language, user_location, top_n=5):
    # Create user features vector
    user_features_vector = create_user_features_vector(user_watch_history, user_language, user_location)

    # Debug shapes
    print("user_features_vector shape:", user_features_vector.shape)
    print("features_matrix shape:", features_matrix.shape)

    # Compute cosine similarity between user features and video content
    cosine_similarities = cosine_similarity([user_features_vector], features_matrix).flatten()

    # Get top N video indices
    top_indices = cosine_similarities.argsort()[-top_n:][::-1]

    # Return recommended video titles
    return post_data.iloc[top_indices]['title'].values

## Testing

In [7]:
user_id = 1
user_watch_history = user_data.loc[user_data['user_id'] == user_id, 'watch_history'].values[0]
user_language = user_data.loc[user_data['user_id'] == user_id, 'language'].values[0]
user_location = user_data.loc[user_data['user_id'] == user_id, 'location'].values[0]

# Convert watch history from string to list
user_watch_history = user_watch_history.strip('[]').replace('\'', '').split(', ')

# Get recommendations
recommended_videos = recommend_videos(user_watch_history, user_language, user_location)

# Display recommendations
print("Recommended Videos for User ID", user_id, ":")
for video in recommended_videos:
    print(video)

user_features_vector shape: (1440,)
features_matrix shape: (892, 1440)
Recommended Videos for User ID 1 :
interesting podcast with dr bret in the diary of a ceo hosted by @steven bartlett 🤯 - #lifeadvices #careeradvice #careertips #thediaryofaceo
when we become like little children we let go of our pride and follow god with our heart
as you love yourself more, you’ll grow intimacy with your natural genius, discover your truest powers and honor your highest self.#leadership #productivity #service #selflove #lo#loveappiness
share this potent principle from my recent appearance on lewis howes' podcast. worldly success without a joyful heart is fool’s gold.mp4
interesting podcast with james in the diary of a ceo hosted by @steven bartlett 🤯 - #relationshipadvice #relationshiptip #loveadvice


In [8]:
user_id = 2
user_watch_history = user_data.loc[user_data['user_id'] == user_id, 'watch_history'].values[0]
user_language = user_data.loc[user_data['user_id'] == user_id, 'language'].values[0]
user_location = user_data.loc[user_data['user_id'] == user_id, 'location'].values[0]

# Convert watch history from string to list
user_watch_history = user_watch_history.strip('[]').replace('\'', '').split(', ')

# Get recommendations
recommended_videos = recommend_videos(user_watch_history, user_language, user_location)

# Display recommendations
print("Recommended Videos for User ID", user_id, ":")
for video in recommended_videos:
    print(video)


user_features_vector shape: (1440,)
features_matrix shape: (892, 1440)
Recommended Videos for User ID 2 :
are you going to make it?
you are like the lost shepherd needing to be guided by god
when we become like little children we let go of our pride and follow god with our heart
if you’re going through something uncomfortable…remember this
words are like keys


In [9]:
user_id = 3
user_watch_history = user_data.loc[user_data['user_id'] == user_id, 'watch_history'].values[0]
user_language = user_data.loc[user_data['user_id'] == user_id, 'language'].values[0]
user_location = user_data.loc[user_data['user_id'] == user_id, 'location'].values[0]

# Convert watch history from string to list
user_watch_history = user_watch_history.strip('[]').replace('\'', '').split(', ')

# Get recommendations
recommended_videos = recommend_videos(user_watch_history, user_language, user_location)

# Display recommendations
print("Recommended Videos for User ID", user_id, ":")
for video in recommended_videos:
    print(video)


user_features_vector shape: (1440,)
features_matrix shape: (892, 1440)
Recommended Videos for User ID 3 :
this proves that the ramayana really happened
interesting podcast with dr bret in the diary of a ceo hosted by @steven bartlett 🤯 - #lifeadvices #careeradvice #careertips #thediaryofaceo
as you love yourself more, you’ll grow intimacy with your natural genius, discover your truest powers and honor your highest self.#leadership #productivity #service #selflove #lo#loveappiness
share this potent principle from my recent appearance on lewis howes' podcast. worldly success without a joyful heart is fool’s gold.mp4
interesting podcast with james in the diary of a ceo hosted by @steven bartlett 🤯 - #relationshipadvice #relationshiptip #loveadvice
