In [1]:
import pandas as pd

# Load the data
data = pd.read_csv('Instagram_data.csv')

# Remove duplicates and handle missing values
data = data.drop_duplicates().dropna()

# Ensure that captions and hashtags are in the correct format
data['Caption'] = data['Caption'].str.replace('[^a-zA-Z0-9 #]', '', regex=True).str.lower()
data['Hashtags'] = data['Hashtags'].str.replace('[^a-zA-Z0-9 #]', '', regex=True).str.lower()

# Function to get top posts based on likes, shares, comments, and saves
def get_top_posts(num_recommendations=5):
    # Check if the required columns are in the dataset
    required_columns = ['Likes', 'Shares', 'Comments', 'Saves', 'Caption', 'Hashtags']
    for col in required_columns:
        if col not in data.columns:
            raise ValueError(f"Column '{col}' is not present in the data.")

    # Calculate total engagement
    data['total_engagement'] = data['Likes'] + data['Shares'] + data['Comments'] + data['Saves']

    # Sort the data based on total engagement
    top_posts = data[['Caption', 'Hashtags', 'Likes', 'Shares', 'Comments', 'Saves', 'total_engagement']].sort_values(
        by='total_engagement', ascending=False
    ).head(num_recommendations)

    return top_posts[['Caption', 'Hashtags', 'Likes', 'Shares']]

# Example usage
recommendations = get_top_posts(num_recommendations=5)
print(recommendations)


                                               Caption  \
134  here are some of the best python certification...   
137  178 python projects with source code solved an...   
163  heres how to create an age calculator using py...   
158  180 python projects with source code solved an...   
172  a data science project report is a document us...   

                                              Hashtags  Likes  Shares  
134  #python #pythonprogramming #pythoncode #python...   1623     332  
137  #python#pythonprogramming#pythoncode#pythonlea...   1798     472  
163  #python#pythonprogramming#pythoncode#pythonlea...   2091     175  
158  #python#pythonprogramming#pythoncode#pythonlea...   1421     205  
172  #datascience#datasciencejobs#datasciencetraini...   1013     148  


In [5]:
from surprise import accuracy
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import pickle

# Load the data
data = pd.read_csv('Instagram_data.csv')

# Remove duplicates and handle missing values
data = data.drop_duplicates().dropna()

# Ensure that captions and hashtags are in the correct format
data['Caption'] = data['Caption'].str.replace(
    '[^a-zA-Z0-9 #]', '', regex=True).str.lower()
data['Hashtags'] = data['Hashtags'].str.replace(
    '[^a-zA-Z0-9 #]', '', regex=True).str.lower()
data['UserID'] = np.arange(len(data))
data['PostID'] = data.index

def prepare_data_for_cf(data):
    # Assuming 'UserID' and 'PostID' are columns in the dataset
    interactions = data[['UserID', 'PostID', 'Likes', 'Comments']].copy()
    # Create a simple rating based on engagement
    interactions['Rating'] = interactions['Likes'] + interactions['Comments']

    return interactions


# Prepare interaction data
interaction_data = prepare_data_for_cf(data)

# Set up Surprise dataset
reader = Reader(rating_scale=(0, interaction_data['Rating'].max()))
surprise_data = Dataset.load_from_df(
    interaction_data[['UserID', 'PostID', 'Rating']], reader)

# Train-test split
trainset, testset = train_test_split(surprise_data, test_size=0.2)

# Build SVD model
model = SVD()
model.fit(trainset)

# Evaluate the model

# Predict ratings on the test set
predictions = model.test(testset)
accuracy.rmse(predictions)

# Save the model
with open('collaborative_filtering_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Function to get top posts based on collaborative filtering


def get_top_posts_cf(user_id, num_recommendations=5):
    # Get a list of all PostIDs
    post_ids = data['PostID'].unique()

    # Predict ratings for all posts for the user
    predictions = [model.predict(user_id, post_id) for post_id in post_ids]

    # Sort predictions based on estimated ratings
    top_posts = sorted(predictions, key=lambda x: x.est,
                       reverse=True)[:num_recommendations]

    # Get post details
    top_post_ids = [post[1] for post in top_posts]
    recommended_posts = data[data['PostID'].isin(
        top_post_ids)][['Caption', 'Hashtags', 'Likes', 'Comments']]

    return recommended_posts


# Example usage
user_id = 'example_user_id'  # Replace with actual User ID
recommendations = get_top_posts_cf(user_id, num_recommendations=5)
print(recommendations)

RMSE: 354.7900
                                               Caption  \
128  here are some of the best data science certifi...   
131  30 data analysis projects solved and explained...   
134  here are some of the best python certification...   
137  178 python projects with source code solved an...   
148  here are some of the best machine learning cer...   

                                              Hashtags  Likes  Comments  
128  #datascience#datasciencejobs#datasciencetraini...    728         5  
131  #dataanalysis#dataanalytics#dataanalyst#python...    955        16  
134  #python #pythonprogramming #pythoncode #python...   1623        20  
137  #python#pythonprogramming#pythoncode#pythonlea...   1798        15  
148  #machinelearning#machinelearningalgorithms#dat...   1059        17  


In [6]:
data.head(4)

Unnamed: 0,Date,Impressions,From Home,From Hashtags,From Explore,From Other,Saves,Comments,Shares,Likes,Profile Visits,Follows,Conversion Rate,Caption,Hashtags,UserID,PostID
0,2021-12-10,3920,2586,1028,619,56,98,9,5,162,35,2,5.714286,here are some of the most important data visua...,#finance#money#business#investing#investment#t...,0,0
1,2021-12-11,5394,2727,1838,1174,78,194,7,14,224,48,10,20.833333,here are some of the best data science project...,#healthcare#health#covid#data#datascience#data...,1,1
2,2021-12-12,4021,2085,1188,0,533,41,11,1,131,62,12,19.354839,learn how to train a machine learning model an...,#data#datascience#dataanalysis#dataanalytics#d...,2,2
3,2021-12-13,4528,2700,621,932,73,172,10,7,213,23,8,34.782609,heres how you can write a python program to de...,#python#pythonprogramming#pythonprojects#pytho...,3,3


In [7]:
from surprise import accuracy
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import pickle
import numpy as np
import uuid

# Load the data
data = pd.read_csv('Instagram_data.csv')

# Remove duplicates and handle missing values
data = data.drop_duplicates().dropna()

# Ensure that captions and hashtags are in the correct format
data['Caption'] = data['Caption'].str.replace(
    '[^a-zA-Z0-9 #]', '', regex=True).str.lower()
data['Hashtags'] = data['Hashtags'].str.replace(
    '[^a-zA-Z0-9 #]', '', regex=True).str.lower()

# Create unique UserID
# Generating a unique UserID for each entry
data['UserID'] = np.arange(len(data))

# Create unique random PostID using UUID
data['PostID'] = [uuid.uuid4().hex for _ in range(len(data))
                  ]  # Generate random PostID

# Function to prepare dataset for collaborative filtering


def prepare_data_for_cf(data):
    # Prepare interaction data with UserID, PostID, and engagement score
    interactions = data[['UserID', 'PostID', 'Likes', 'Comments']].copy()
    # Create a simple rating based on engagement
    interactions['Rating'] = interactions['Likes'] + interactions['Comments']

    return interactions


# Prepare interaction data
interaction_data = prepare_data_for_cf(data)

# Set up Surprise dataset
reader = Reader(rating_scale=(0, interaction_data['Rating'].max()))
surprise_data = Dataset.load_from_df(
    interaction_data[['UserID', 'PostID', 'Rating']], reader)

# Train-test split
trainset, testset = train_test_split(surprise_data, test_size=0.2)

# Build SVD model
model = SVD()
model.fit(trainset)

# Evaluate the model

# Predict ratings on the test set
predictions = model.test(testset)
print(f"RMSE: {accuracy.rmse(predictions)}")

# Save the model
with open('collaborative_filtering_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Function to get top posts based on collaborative filtering


def get_top_posts_cf(user_id, num_recommendations=5):
    # Get a list of all PostIDs
    post_ids = data['PostID'].unique()

    # Predict ratings for all posts for the user
    predictions = [model.predict(user_id, post_id) for post_id in post_ids]

    # Sort predictions based on estimated ratings
    top_posts = sorted(predictions, key=lambda x: x.est,
                       reverse=True)[:num_recommendations]

    # Get post details
    top_post_ids = [post[1] for post in top_posts]
    recommended_posts = data[data['PostID'].isin(
        top_post_ids)][['Caption', 'Hashtags', 'Likes', 'Comments']]

    return recommended_posts


# Example usage
user_id = 0  # Since we've created a unique UserID, we can use 0 for the first user
recommendations = get_top_posts_cf(user_id, num_recommendations=5)
print(recommendations)

RMSE: 205.8469
RMSE: 205.8468584473736
                                               Caption  \
131  30 data analysis projects solved and explained...   
134  here are some of the best python certification...   
137  178 python projects with source code solved an...   
148  here are some of the best machine learning cer...   
154  here are some of the best and unique data anal...   

                                              Hashtags  Likes  Comments  
131  #dataanalysis#dataanalytics#dataanalyst#python...    955        16  
134  #python #pythonprogramming #pythoncode #python...   1623        20  
137  #python#pythonprogramming#pythoncode#pythonlea...   1798        15  
148  #machinelearning#machinelearningalgorithms#dat...   1059        17  
154  #dataanalysis#dataanalytics#dataanalyst#python...    887        15  
