In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
file_path = 'instagram_reach.csv'
df = pd.read_csv(file_path)

# Step 1: Combine Captions and Hashtags into a single "Item" column for simplicity
df['Item'] = df['Caption'].fillna('') + " " + df['Hashtags'].fillna('')

# Step 2: Create a pivot table (User-Item matrix) where rows are users and columns are items (captions + hashtags)
user_item_matrix = df.pivot_table(index='USERNAME', columns='Item', values='Likes', fill_value=0)

# Step 3: Use SVD for collaborative filtering
svd = TruncatedSVD(n_components=50)  # Reduce to 50 latent factors
user_factors = svd.fit_transform(user_item_matrix)
item_factors = svd.components_

# Step 4: Initialize the TF-IDF vectorizer for content-based filtering
vectorizer = TfidfVectorizer(stop_words='english')
item_matrix = vectorizer.fit_transform(df['Item'].values.astype('U'))  # Fit on entire dataset

# Function to recommend captions and hashtags based on a clicked caption
def recommend_based_on_click(clicked_caption, df, user_item_matrix, user_factors, item_factors, vectorizer, top_n=5):
    # Step 1: Transform the clicked caption using the TF-IDF vectorizer
    clicked_caption_vector = vectorizer.transform([clicked_caption])

    # Step 2: Calculate content similarity between the clicked caption and the items in the dataset
    content_similarity_scores = cosine_similarity(clicked_caption_vector, item_matrix).flatten()

    # Step 3: Get the indices of the most similar captions based on content
    content_top_indices = content_similarity_scores.argsort()[-(top_n+1):-1][::-1]
    content_based_recommendations = df.iloc[content_top_indices][['Caption', 'Hashtags', 'Item']]

    # Step 4: Collaborative filtering recommendations using SVD
    collaborative_scores = {}

    for item in content_based_recommendations['Item']:
        if item in user_item_matrix.columns:
            # Find the index of the item in the item_factors matrix
            item_index = user_item_matrix.columns.get_loc(item)

            # Calculate similarity with other items using collaborative filtering
            item_vector = item_factors[:, item_index]
            item_similarities = cosine_similarity(item_vector.reshape(1, -1), item_factors.T).flatten()
            top_item_indices = item_similarities.argsort()[-(top_n+1):-1][::-1]

            for idx in top_item_indices:
                recommended_item = user_item_matrix.columns[idx]
                collaborative_scores[recommended_item] = collaborative_scores.get(recommended_item, 0) + item_similarities[idx]

    # Step 5: Combine content-based and collaborative recommendations
    final_recommendations = {}

    for item in content_based_recommendations['Item']:
        final_recommendations[item] = 0.7 * content_similarity_scores[content_top_indices[content_based_recommendations['Item'].values == item][0]] + \
                                       0.3 * collaborative_scores.get(item, 0)

    # Sort final recommendations based on combined scores
    sorted_recommendations = sorted(final_recommendations.items(), key=lambda x: x[1], reverse=True)

    # Prepare the final recommendation output
    recommended_items = [(item, df.loc[df['Item'] == item, ['Caption', 'Hashtags']].values[0]) for item, _ in sorted_recommendations]

    return recommended_items

# Example usage: New user clicked on a caption
clicked_caption = "what is life"
recommendations = recommend_based_on_click(
    clicked_caption, df, user_item_matrix, user_factors, item_factors, vectorizer
)

# Display the recommendations
print("Recommended Captions and Hashtags:")
for item, details in recommendations:
    # print(f"Caption: {details[0]}\n Hashtags: {details[1]}\n Item: {item}")
     print(f"Caption: {details[0]}\n\nHashtags: {details[1]}\n")

Recommended Captions and Hashtags:
Caption: We are coming up with the Best 21 Books that will change your mind about Life, Money and Your self Campaign this week!!We will post this books with genuine reviews from Amazon.com ! Hope you will enjoy this and try to read this amazing books!! Stay Tuned !! 😉❤️

Hashtags: #books #book #motivation #inspiration #life#booklover #lifebook2018 #love #finance#personality #training #growth#development #musthave #instadaily#trending #sales #happy #knowledge#knowledgeispower #amazon #fiction #scifi#hotsale #art #biography  #autobiography#selfhelp  #offers

Caption: Life is all about the next step💎

Hashtags: #Entrepreneur#Business#Entrepreneurship#WontStop#Mindset#Success#Hustle#Freedom#BusinessOwner#OnlineBusiness#Coaching#Ambition#Inspire#ThinkBig#Startup#HardWork#Businessman#BeYourOwnBoss#SmallBusiness#Believe#Motivate#Mentor or #mentoring#Givingback#InternetBusiness#Successe

Caption: Interesting, most of them prefer that because all need high acc

In [3]:
import pickle

# Save SVD model
with open('svd_model.pkl', 'wb') as svd_file:
    pickle.dump(svd, svd_file)

# Save TF-IDF vectorizer
with open('tfidf_vectorizer_cr.pkl', 'wb') as tfidf_file:
    pickle.dump(vectorizer, tfidf_file)

# Save User-Item Matrix
user_item_matrix.to_pickle('user_item_matrix.pkl')

# Save User Factors and Item Factors
with open('user_factors.pkl', 'wb') as user_factors_file:
    pickle.dump(user_factors, user_factors_file)

with open('item_factors.pkl', 'wb') as item_factors_file:
    pickle.dump(item_factors, item_factors_file)

In [4]:
# Load SVD model
with open('svd_model.pkl', 'rb') as svd_file:
    svd = pickle.load(svd_file)

# Load TF-IDF vectorizer
with open('tfidf_vectorizer_cr.pkl', 'rb') as tfidf_file:
    vectorizer = pickle.load(tfidf_file)

# Load User-Item Matrix
user_item_matrix = pd.read_pickle('user_item_matrix.pkl')

# Load User Factors and Item Factors
with open('user_factors.pkl', 'rb') as user_factors_file:
    user_factors = pickle.load(user_factors_file)

with open('item_factors.pkl', 'rb') as item_factors_file:
    item_factors = pickle.load(item_factors_file)

In [None]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
file_path = 'instagram_reach.csv'
df = pd.read_csv(file_path)

# Step 1: Create a User-Item matrix where rows are users and columns are items (captions + hashtags)
user_item_matrix = df.pivot_table(index='USERNAME', columns='Item', values='Likes', fill_value=0)

# Step 2: Use SVD for collaborative filtering
svd = TruncatedSVD(n_components=50)  # Reduce to 50 latent factors
user_factors = svd.fit_transform(user_item_matrix)
item_factors = svd.components_

# Function to recommend captions and hashtags based on a clicked caption
def recommend_based_on_clicked_caption(clicked_caption, df, user_item_matrix, item_factors, top_n=5):
    # Step 1: Find the index of the clicked caption in the user-item matrix
    if clicked_caption not in user_item_matrix.columns:
        print("Caption not found in the dataset.")
        return []

    item_index = user_item_matrix.columns.get_loc(clicked_caption)

    # Step 2: Calculate similarity scores of the clicked item with all other items
    item_vector = item_factors[:, item_index]
    item_similarities = cosine_similarity(item_vector.reshape(1, -1), item_factors.T).flatten()

    # Step 3: Get the indices of the most similar items based on collaborative filtering
    similar_item_indices = item_similarities.argsort()[-(top_n + 1):-1][::-1]

    # Prepare the final recommendation output
    recommended_items = []

    for idx in similar_item_indices:
        recommended_item = user_item_matrix.columns[idx]
        recommended_items.append((recommended_item, df.loc[df['Item'] == recommended_item, ['Caption', 'Hashtags']].values[0]))

    return recommended_items

# Example usage: New user clicked on a caption
clicked_caption = "Learning about AI and Data Science!"
recommendations = recommend_based_on_clicked_caption(
    clicked_caption, df, user_item_matrix, item_factors
)

# Display the recommendations
print("Recommended Captions and Hashtags:")
for item, details in recommendations:
    print(f"Caption: {details[0]}\nHashtags: {details[1]}\n")

In [3]:
# Detailed Explanation of How It Works
# Data Preparation:

# Combining Captions and Hashtags: The first step combines the Caption and Hashtags into a single column named Item. This allows for a unified approach when calculating similarities later on.
# Creating the User-Item Matrix:

# A pivot table is created where each row represents a user, each column represents an item (a combination of captions and hashtags), and the values represent the likes for those items. If a user has not liked a particular item, it is filled with 0.
# Collaborative Filtering Using SVD:

# Truncated SVD: Singular Value Decomposition reduces the dimensions of the user-item matrix into latent factors. This helps identify patterns in user preferences without needing to analyze the entire matrix directly.
# User Factors: This results in a matrix that represents users in a reduced latent space.
# Item Factors: The item factors matrix represents items in the same latent space. These factors capture underlying relationships between users and items.
# Content-Based Filtering Using TF-IDF:

# TF-IDF Vectorization: The Item column is converted into a TF-IDF matrix. TF-IDF scores reflect how important a word is to a document in a collection, emphasizing more distinctive terms.
# Cosine Similarity Calculation: The cosine similarity is computed between the clicked caption (transformed into TF-IDF) and the item matrix, allowing us to find how similar other items are to the clicked caption.
# Generating Recommendations:

# Content-Based Recommendations: The top similar items are identified based on content similarity. This gives us a starting point for recommendations based on the user's interest.
# Collaborative Recommendations: For each content-based recommended item:
# The index of the item is found in the user-item matrix.
# The corresponding item vector from the item factors is retrieved.
# Similarities to other items are calculated, and scores are aggregated.
# Combining Scores:

# Final Recommendations: The final recommendations are generated by combining scores from both content and collaborative filtering. The content score is weighted more heavily (0.7) than the collaborative score (0.3) in this example.
# Sorting Recommendations: The items are then sorted based on the combined score, ensuring that the most relevant recommendations are presented first.
# Output: The function returns a list of recommended items, including their captions and hashtags, formatted for easy readability.