In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("Instagram_data.csv")
df.head(3)

Unnamed: 0,Date,Impressions,From Home,From Hashtags,From Explore,From Other,Saves,Comments,Shares,Likes,Profile Visits,Follows,Conversion Rate,Caption,Hashtags
0,2021-12-10,3920,2586,1028,619,56,98,9,5,162,35,2,5.714286,Here are some of the most important data visua...,#finance #money #business #investing #investme...
1,2021-12-11,5394,2727,1838,1174,78,194,7,14,224,48,10,20.833333,Here are some of the best data science project...,#healthcare #health #covid #data #datascience ...
2,2021-12-12,4021,2085,1188,0,533,41,11,1,131,62,12,19.354839,Learn how to train a machine learning model an...,#data #datascience #dataanalysis #dataanalytic...


In [3]:
df['user_id'] = df.index % 5 + 1

In [4]:
df['post_id'] = df.index + 101

In [5]:
df['interaction'] = df['Likes'] + df['Comments'] + df['Shares']

In [6]:
# Create a user-item matrix based on interactions
user_item_matrix = df.pivot_table(
    index='user_id', columns='post_id', values='interaction', fill_value=0)

In [7]:
# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(
    user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

In [8]:
# Function to get recommendations for a user
def recommend_posts(user_id, user_item_matrix, user_similarity_df, num_recommendations=2):
    # Find similar users to the given user_id
    similar_users = user_similarity_df[user_id].sort_values(
        ascending=False).index[1:]

    # Find posts liked by similar users but not by the given user
    similar_users_posts = user_item_matrix.loc[similar_users].sum(axis=0)
    user_posts = user_item_matrix.loc[user_id]

    # Filter out posts the user has already interacted with
    recommendations = similar_users_posts[user_posts == 0].sort_values(
        ascending=False).head(num_recommendations)

    return recommendations.index.tolist()

## Colaborative Based

# Create synthetic user-post interaction data
user_ids = ['user_1', 'user_2', 'user_3', 'user_4', 'user_5']
content_ids = final_data['url']  # Use URLs or post IDs from the final_data dataframe

In [None]:
# Randomly generate likes (interaction between users and posts)
np.random.seed(42)
interactions = np.random.randint(0, 2, size=(len(user_ids), len(content_ids)))

In [None]:
# Create a user-item interaction DataFrame
interaction_df = pd.DataFrame(
    interactions, index=user_ids, columns=content_ids)

# Visualize the interaction matrix
print(interaction_df.head())

In [None]:
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Perform SVD on the interaction matrix
svd = TruncatedSVD(n_components=5)  # You can tune the number of components
svd_matrix = svd.fit_transform(interaction_df)

# Compute cosine similarity between content
item_similarity = cosine_similarity(svd_matrix.T)

# Convert the similarity matrix to a DataFrame for ease of use
item_sim_df = pd.DataFrame(
    item_similarity, index=content_ids, columns=content_ids)
print(item_sim_df.head())

In [None]:
def get_collaborative_recommendations(content_id, num_recommendations=5):
    # Get similarity scores for the given content ID
    sim_scores = item_sim_df[content_id].sort_values(ascending=False)

    # Get the top N recommendations, excluding the current item
    top_recommendations = sim_scores.iloc[1:num_recommendations+1].index

    return top_recommendations


# Example usage:
print(get_collaborative_recommendations(content_ids[0], num_recommendations=5))

In [None]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Get the original interaction matrix
interaction_matrix = interaction_df.values

# Predict the full matrix using SVD
predicted_matrix = np.dot(svd_matrix, svd.components_)

# Calculate the RMSE between actual and predicted interactions
rmse = sqrt(mean_squared_error(interaction_matrix, predicted_matrix))
print(f"RMSE: {rmse}")

In [None]:
import pickle

# Save the SVD model
with open('svd_model.pkl', 'wb') as f:
    pickle.dump(svd, f)

# Save the item similarity matrix
item_sim_df.to_csv('item_similarity_matrix.csv')

In [None]:
# Load SVD model
with open('svd_model.pkl', 'rb') as f:
    svd = pickle.load(f)

# Load item similarity matrix
item_sim_df = pd.read_csv('item_similarity_matrix.csv', index_col=0)

In [None]:
# Get recommendations for a specific post or reel
content_id = content_ids[0]  # Example: First post
recommendations = get_collaborative_recommendations(
    content_id, num_recommendations=5)
print(f"Top 5 recommendations for {content_id}: {recommendations}")