In [1]:
from flask import Flask, request, jsonify, session
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [2]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [15]:
# Load and preprocess dataset once
df = pd.read_csv('Instagram_data.csv')
df['Caption'] = df['Caption'].fillna('')
df['Hashtags'] = df['Hashtags'].fillna('')

In [16]:
# Text Preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = [word for word in text.split() if word not in stop_words]
    tokens = [stemmer.stem(word) for word in tokens]
    return ' '.join(tokens)

In [17]:
df['Caption'] = df['Caption'].apply(preprocess_text)
df['Hashtags'] = df['Hashtags'].apply(preprocess_text)
df['content'] = df['Caption'] + ' ' + df['Hashtags']

In [18]:
# Vectorize the content
tfidf = TfidfVectorizer(stop_words='english', max_features=1000)
tfidf_matrix = tfidf.fit_transform(df['content'])

In [19]:
# Normalize engagement metrics
scaler = MinMaxScaler()
engagement_metrics = ['Likes', 'Comments',
                      'Shares', 'Saves', 'Profile Visits', 'Follows']
df[engagement_metrics] = scaler.fit_transform(df[engagement_metrics])

In [20]:
# Combine TF-IDF matrix with engagement features
engagement_features = df[engagement_metrics].values
combined_features = np.hstack([tfidf_matrix.toarray(), engagement_features])

In [21]:
# Function to recommend posts
def recommend_by_caption(input_caption, num_recommendations=5, engagement_weight=0.3):
    input_caption_processed = preprocess_text(input_caption)
    input_caption_vector = tfidf.transform([input_caption_processed])
    zeros_for_engagement = np.zeros(
        engagement_features.shape[1]).reshape(1, -1)
    input_combined_features = np.hstack(
        [input_caption_vector.toarray(), zeros_for_engagement])
    similarity_scores = cosine_similarity(
        input_combined_features, combined_features).flatten()
    similar_posts_indices = similarity_scores.argsort()[
        ::-1][:num_recommendations]
    return df.iloc[similar_posts_indices]

In [22]:
# app = Flask(__name__)
# app.secret_key = 'your_secret_key'  # Used for session management


# @app.route('/recommend', methods=['GET'])
# def recommend():
#     user_caption = request.json.get('caption')
#     if not user_caption:
#         return jsonify({'error': 'Caption is required'}), 400

#     # Get recommendations for the user input caption
#     recommended_posts = recommend_by_caption(user_caption)

#     # Prepare response
#     result = recommended_posts[['Date', 'Caption', 'Hashtags',
#                                 'Likes', 'Comments', 'Shares']].to_dict(orient='records')

#     return jsonify(result), 200


# # Run the Flask app
# if __name__ == '__main__':
#     app.run(debug=True)

In [27]:
# Combine TF-IDF matrix with engagement features
engagement_features = df[engagement_metrics].values
combined_features = np.hstack([tfidf_matrix.toarray(), engagement_features])

# Function to recommend posts based on input caption


def recommend_by_caption(input_caption, num_recommendations=5, engagement_weight=0.3):
    input_caption_processed = preprocess_text(input_caption)
    input_caption_vector = tfidf.transform([input_caption_processed])
    zeros_for_engagement = np.zeros(
        engagement_features.shape[1]).reshape(1, -1)
    input_combined_features = np.hstack(
        [input_caption_vector.toarray(), zeros_for_engagement])
    similarity_scores = cosine_similarity(
        input_combined_features, combined_features).flatten()
    similar_posts_indices = similarity_scores.argsort()[
        ::-1][:num_recommendations]
    return df.iloc[similar_posts_indices]


# Main logic to take user input and give recommendations
if __name__ == '__main__':
    input_caption = input("Enter a caption to get recommendations: ")

    # Get recommendations for the input caption
    recommended_posts = recommend_by_caption(
        input_caption=input_caption, num_recommendations=5)

    # Print the recommended posts
    print("\nRecommended Posts:")
    # data = pd.DataFrame(recommended_posts[['Date', 'Caption',
    #                                        'Hashtags', 'Likes', 'Comments', 'Shares']])
    # data.head(5)
    print(recommended_posts[['Date', 'Caption',
          'Hashtags', 'Likes', 'Comments', 'Shares']])


Recommended Posts:
