In [2]:
import pandas as pd

In [11]:
# Collaboration Filtering Recommandation
data = pd.read_csv('Ricky_Data.csv')
input_data = pd.DataFrame({
    'user_id': [190, 190,190,190],
    'post_id': [1690,3588,1489,1855]
})

In [45]:
import pandas as pd
from collections import Counter
import pandas as pd
from scipy.stats import pearsonr


def calculate_similarity(input_data, data):
    # Combine input_data and data to ensure all users and posts are included
    combined_data = pd.concat([input_data, data])
    
    # Create a user-post interaction matrix
    interaction_matrix = combined_data.pivot_table(index='user_id', columns='post_id', aggfunc='size', fill_value=0)
    
    similar_users = {}

    for input_user_id in input_data['user_id'].unique():
        input_user_vector = interaction_matrix.loc[input_user_id]

        similar_users[input_user_id] = []
        
        for user_id in interaction_matrix.index:
            if user_id != input_user_id:
                user_vector = interaction_matrix.loc[user_id]
                
                # Calculate Pearson Correlation Coefficient
                if len(input_user_vector) > 1 and len(user_vector) > 1:
                    correlation, _ = pearsonr(input_user_vector, user_vector)
                    if not pd.isna(correlation):  # Ensure the correlation is not NaN
                        similar_users[input_user_id].append((user_id, correlation))
    
    return similar_users


def find_top_similar_user(similar_users, top_n):
    top_similar_users = {}

    for input_user_id, user_similarities in similar_users.items():
        user_similarities.sort(key=lambda x: x[1], reverse=True)
        top_similar_users[input_user_id] = user_similarities[:top_n]

    return top_similar_users

def recommend_posts(input_data, similar_users, data, top_n=1, min_recommendations=5):
    recommendations = {}
    
    # Counting the frequency of each post in the dataset
    post_frequency = Counter(data['post_id'])

    for input_user_id, similar_user_list in similar_users.items():
        recommendations[input_user_id] = []
        
        # Posts already seen by the input user
        input_user_posts = set(input_data[input_data['user_id'] == input_user_id]['post_id'])
        
        # Extend the similar user list if necessary
        additional_similar_users = []
        if len(similar_user_list) < top_n:
            additional_similar_users = similar_users[input_user_id][top_n:top_n*2]

        combined_similar_users = similar_user_list + additional_similar_users
        
        for user_id, similarity_score in combined_similar_users:
            user_posts = set(data[data['user_id'] == user_id]['post_id'])
            recommended_posts = user_posts - input_user_posts
            recommendations[input_user_id].extend(recommended_posts)
            
            if len(recommendations[input_user_id]) >= min_recommendations:
                break
        
        # If there are still not enough recommendations, add the most frequent posts
        if len(recommendations[input_user_id]) < min_recommendations:
            frequent_posts = [post for post, _ in post_frequency.most_common() if post not in input_user_posts]
            recommendations[input_user_id].extend(frequent_posts[:min_recommendations - len(recommendations[input_user_id])])
    
    return recommendations

# Example usage:
# input_data, similar_users, and data should be defined as per your specific dataset and context
similarities = calculate_similarity(input_data, data)
top_similar_users = find_top_similar_user(similarities, top_n=1)
recommendations = recommend_posts(input_data, top_similar_users, data)

print("------------------Top similar users-----------------------\n")
for input_user_id, similar_user_list in top_similar_users.items():
    print(f"user_id {input_user_id} has top similar to:")
    for user_id, similarity_score in similar_user_list:
        print(f"  user_id {user_id} with similarity score: {similarity_score},\n")
# Additional print statements
print("------------------Matching similar personalize-----------------------")
for input_user_id, similar_user_list in top_similar_users.items():
    for user_id, similarity_score in similar_user_list:
        print(f"Input user_id {input_user_id} has post_ids: {set(input_data[input_data['user_id'] == input_user_id]['post_id'])}")
        print(f"Matching user_id {user_id} has post_ids: {set(data[data['user_id'] == user_id]['post_id'])}")
        print("\n")
print("Algorithm User Based ------>  Recommendations personalize:\n")
recommended_posts_listed = []

for input_user_id, recommended_posts in recommendations.items():
    print(f"user_id {input_user_id} should recommend the following posts:")
    print(recommended_posts)
    recommended_posts_listed.extend(recommended_posts)

print("\nRecommended posts All listed:", recommended_posts_listed)
from collections import Counter   
# Dictionary to store matching user's post_ids
matching_post_ids = {}

# Collect matching user's post_ids
for input_user_id, similar_user_list in top_similar_users.items():
    for user_id, similarity_score in similar_user_list:
        input_user_post_ids = set(input_data[input_data['user_id'] == input_user_id]['post_id'])
        matching_user_post_ids = set(data[data['user_id'] == user_id]['post_id'])
        matching_post_ids[input_user_id] = input_user_post_ids.intersection(matching_user_post_ids)

# Find the post_id with the most users among matching users
most_common_post_id = Counter([post_id for post_ids in matching_post_ids.values() for post_id in post_ids]).most_common(1)[0][0]




In [32]:
import pandas as pd
from collections import Counter
from scipy.stats import pearsonr

def calculate_similarity(input_data, data):
    # Combine input_data and data to ensure all users and posts are included
    combined_data = pd.concat([input_data, data])
    
    # Create a user-post interaction matrix
    interaction_matrix = combined_data.pivot_table(index='finger_print_id', columns='post_id', aggfunc='size', fill_value=0)
    
    similar_users = {}

    for input_finger_print_id in input_data['finger_print_id'].unique():
        input_user_vector = interaction_matrix.loc[input_finger_print_id]

        similar_users[input_finger_print_id] = []
        
        for finger_print_id in interaction_matrix.index:
            if finger_print_id != input_finger_print_id:
                finger_print_vector = interaction_matrix.loc[finger_print_id]
                
                # Calculate Pearson Correlation Coefficient
                if len(input_user_vector) > 1 and len(finger_print_vector) > 1:
                    correlation, _ = pearsonr(input_user_vector, finger_print_vector)
                    if not pd.isna(correlation):  # Ensure the correlation is not NaN
                        similar_users[input_finger_print_id].append((finger_print_id, correlation))
    
    return similar_users

def find_top_similar_user(similar_users, top_n):
    top_similar_users = {}

    for input_finger_print_id, user_similarities in similar_users.items():
        user_similarities.sort(key=lambda x: x[1], reverse=True)
        top_similar_users[input_finger_print_id] = user_similarities[:top_n]

    return top_similar_users

def recommend_posts(input_data, top_similar_users, data, top_n=5, min_recommendations=5):
    recommendations = {}
    
    # Counting the frequency of each post in the dataset
    post_frequency = Counter(data['post_id'].dropna())  # Drop NaN values from post_id

    for input_finger_print_id, similar_user_list in top_similar_users.items():
        recommendations[input_finger_print_id] = []
        
        # Posts already seen by the input user
        input_user_posts = set(input_data[input_data['finger_print_id'] == input_finger_print_id]['post_id'].dropna())
        
        # Extend the similar user list if necessary
        additional_similar_users = []
        if len(similar_user_list) < top_n:
            additional_similar_users = similar_users[input_finger_print_id][top_n:top_n*2]

        combined_similar_users = similar_user_list + additional_similar_users
        
        for finger_print_id, similarity_score in combined_similar_users:
            user_posts = set(data[data['finger_print_id'] == finger_print_id]['post_id'].dropna())
            recommended_posts = user_posts - input_user_posts
            recommendations[input_finger_print_id].extend(recommended_posts)
            
            if len(recommendations[input_finger_print_id]) >= min_recommendations:
                break
        
        # If there are still not enough recommendations, add the most frequent posts
        if len(recommendations[input_finger_print_id]) < min_recommendations:
            frequent_posts = [post for post, _ in post_frequency.most_common() if post not in input_user_posts]
            recommendations[input_finger_print_id].extend(frequent_posts[:min_recommendations - len(recommendations[input_finger_print_id])])
        
        # Remove placeholder values from recommendations
        recommendations[input_finger_print_id] = [post for post in recommendations[input_finger_print_id] if post != -1]
    
    return recommendations

# Function to get personalized recommendations for a given finger_print_id
def get_personalized_recommendations(finger_print_id, data, top_n=5, min_recommendations=5):
    user_posts = data[data['finger_print_id'] == finger_print_id]['post_id'].dropna().tolist()
    
    if not user_posts:
        print(f"finger_print_id {finger_print_id} has no posts.")
        return

    input_data = pd.DataFrame({
        'finger_print_id': [finger_print_id] * len(user_posts),
        'post_id': user_posts
    })

    similarities = calculate_similarity(input_data, data)
    top_similar_users = find_top_similar_user(similarities, top_n)
    recommendations = recommend_posts(input_data, top_similar_users, data, top_n, min_recommendations)

    # Print results
    print("------------------Top similar users-----------------------")
    for input_finger_print_id, similar_user_list in top_similar_users.items():
        print(f"finger_print_id {input_finger_print_id} has top similar users:")
        for finger_print_id, similarity_score in similar_user_list:
            print(f"  finger_print_id {finger_print_id} with similarity score: {similarity_score}")

    print("\n------------------Recommendations-----------------------")
    for input_finger_print_id, recommended_posts in recommendations.items():
        print(f"finger_print_id {input_finger_print_id} should recommend the following posts:")
        print(recommended_posts)

# Example usage:
# Load the data from CSV (assuming 'Ricky_Data.csv' is your dataset file)
data = pd.read_csv('Ricky_Data.csv')

# Drop rows where both post_id and topic_id are NaN
data = data.dropna(subset=['post_id', 'topic_id'], how='all')

# Fill NaN values in 'post_id' and 'topic_id' with a placeholder (e.g., -1)
data['post_id'].fillna(-1, inplace=True)
data['topic_id'].fillna(-1, inplace=True)

# Input the finger_print_id you want to get recommendations for
input_finger_print_id = input("Enter the finger_print_id you want to get recommendations for: ")

# Get personalized recommendations
get_personalized_recommendations(input_finger_print_id, data)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['post_id'].fillna(-1, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['topic_id'].fillna(-1, inplace=True)


------------------Top similar users-----------------------
finger_print_id db6ef45bef0e081d12fc79f7bc869cca110 has top similar users:
  finger_print_id db6ef45bef0e081d12fc79f7bc869cca109 with similarity score: 0.6266666666666669
  finger_print_id db6ef45bef0e081d12fc79f7bc869cca108 with similarity score: 0.2533333333333334
  finger_print_id db6ef45bef0e081d12fc79f7bc869cca100 with similarity score: 0.18856180831641267
  finger_print_id db6ef45bef0e081d12fc79f7bc869cca104 with similarity score: -0.1414213562373095
  finger_print_id db6ef45bef0e081d12fc79f7bc869cca102 with similarity score: -0.14142135623730953

------------------Recommendations-----------------------
finger_print_id db6ef45bef0e081d12fc79f7bc869cca110 should recommend the following posts:
[1855, 3601, 2557, 3602, 1715, 3570]


In [7]:
import pandas as pd
from collections import Counter
from scipy.stats import pearsonr

def clean_data(data):
    # Remove rows with NaN values in either 'post_id' or 'finger_print_id'
    data = data.dropna(subset=['post_id', 'finger_print_id'])
    return data

def calculate_similarity(input_data, data):
    interaction_matrix = data.pivot_table(index='finger_print_id', columns='post_id', aggfunc='size', fill_value=0)
    
    similar_users = {}

    for input_finger_print_id in input_data['finger_print_id'].unique():
        input_user_vector = interaction_matrix.loc[input_finger_print_id]

        similar_users[input_finger_print_id] = []
        
        for finger_print_id in interaction_matrix.index:
            if finger_print_id != input_finger_print_id:
                finger_print_vector = interaction_matrix.loc[finger_print_id]
                if len(input_user_vector) > 1 and len(finger_print_vector) > 1:
                    correlation, _ = pearsonr(input_user_vector, finger_print_vector)
                    if not pd.isna(correlation):
                        similar_users[input_finger_print_id].append((finger_print_id, correlation))
    return similar_users

def find_top_similar_user(similar_users, top_n):
    top_similar_users = {}
    for input_finger_print_id, user_similarities in similar_users.items():
        user_similarities.sort(key=lambda x: x[1], reverse=True)
        top_similar_users[input_finger_print_id] = user_similarities[:top_n]
    return top_similar_users

def recommend_posts(input_data, top_similar_users, data, top_n=12, min_recommendations=12):
    recommendations = {}
    post_frequency = Counter(data['post_id'].dropna())
    for input_finger_print_id, similar_user_list in top_similar_users.items():
        recommendations[input_finger_print_id] = []
        input_user_posts = set(input_data[input_data['finger_print_id'] == input_finger_print_id]['post_id'].dropna())
        additional_similar_users = []
        combined_similar_users = similar_user_list + additional_similar_users
        for finger_print_id, similarity_score in combined_similar_users:
            user_posts = set(data[data['finger_print_id'] == finger_print_id]['post_id'].dropna())
            recommended_posts = user_posts - input_user_posts
            recommendations[input_finger_print_id].extend(recommended_posts)
            if len(recommendations[input_finger_print_id]) >= min_recommendations:
                break
        if len(recommendations[input_finger_print_id]) < min_recommendations:
            frequent_posts = [post for post, _ in post_frequency.most_common() if post not in input_user_posts]
            recommendations[input_finger_print_id].extend(frequent_posts[:min_recommendations - len(recommendations[input_finger_print_id])])
        recommendations[input_finger_print_id] = [int(post) for post in recommendations[input_finger_print_id] if post != -1]
    return recommendations

def get_personalized_recommendations(finger_print_id, data, top_n=12, min_recommendations=12):
    user_posts = data[data['finger_print_id'] == finger_print_id]['post_id'].dropna().tolist()
    
    if not user_posts:
        print(f"finger_print_id {finger_print_id} has no posts.")
        return

    input_data = pd.DataFrame({
        'finger_print_id': [finger_print_id] * len(user_posts),
        'post_id': user_posts
    })

    similarities = calculate_similarity(input_data, data)
    top_similar_users = find_top_similar_user(similarities, top_n)
    recommendations = recommend_posts(input_data, top_similar_users, data, top_n, min_recommendations)

    # Print results
    print("------------------Top similar users-----------------------")
    for input_finger_print_id, similar_user_list in top_similar_users.items():
        print(f"finger_print_id {input_finger_print_id} has top similar users:")
        for finger_print_id, similarity_score in similar_user_list:
            print(f"  finger_print_id {finger_print_id} with similarity score: {similarity_score}")


    
    # Additional print statements
    print("------------------Matching similar personalize-----------------------")
    for input_finger_print_id, similar_user_list in top_similar_users.items():
        for finger_print_id, similarity_score in similar_user_list:
            input_user_posts = set(map(int, input_data[input_data['finger_print_id'] == input_finger_print_id]['post_id']))
            matching_user_posts = set(map(int, data[data['finger_print_id'] == finger_print_id]['post_id']))
            print(f"Input finger_print_id {input_finger_print_id} has post_ids: {input_user_posts}")
            print(f"Matching finger_print_id {finger_print_id} has post_ids: {matching_user_posts}")
            print("\n")
            
            
    print("\n------------------Recommendations-----------------------")
    for input_finger_print_id, recommended_posts in recommendations.items():
        print(f"finger_print_id {input_finger_print_id} should recommend the following posts:")
        print(recommended_posts)
            
            

    print("Algorithm User Based ------>  Recommendations personalize:\n")
    recommended_posts_listed = []

    for input_finger_print_id, recommended_posts in recommendations.items():
        print(f"finger_print_id {input_finger_print_id} should recommend the following posts:")
        print(recommended_posts)
        recommended_posts_listed.extend(recommended_posts)

    print("\nRecommended posts All listed:", recommended_posts_listed)

    # Dictionary to store matching user's post_ids
    matching_post_ids = {}

    # Collect matching user's post_ids
    for input_finger_print_id, similar_user_list in top_similar_users.items():
        for finger_print_id, similarity_score in similar_user_list:
            input_user_post_ids = set(input_data[input_data['finger_print_id'] == input_finger_print_id]['post_id'])
            matching_user_post_ids = set(data[data['finger_print_id'] == finger_print_id]['post_id'])
            matching_post_ids[input_finger_print_id] = input_user_post_ids.intersection(matching_user_post_ids)

    # Check if there are any matching post IDs
    all_matching_post_ids = [post_id for post_ids in matching_post_ids.values() for post_id in post_ids]
    if all_matching_post_ids:
        most_common_post_id = Counter(all_matching_post_ids).most_common(1)[0][0]
        print(f"\nMost common post_id among matching users: {most_common_post_id}")
    else:
        print("\nNo common post_id found among matching users.")

# Example usage:
data = pd.read_csv('blog_viewerpreference_202406210935.csv')

# Clean data
data = clean_data(data)

# Ensure that NaN values are not present by dropping rows with -1 values
data = data[(data['post_id'] != -1) & (data['topic_id'] != -1)]

input_finger_print_id = input("Enter the finger_print_id you want to get recommendations for: ")
get_personalized_recommendations(input_finger_print_id, data)


------------------Top similar users-----------------------
finger_print_id db6ef45bef0e081d12fc79f7bc869cca has top similar users:
  finger_print_id 23104850fb8bc7ae33b214e987ac4faf with similarity score: 0.9999999999999958
  finger_print_id 4595d1784cfb4ee905107645d0f198b0 with similarity score: 0.9999999999999958
  finger_print_id 6d60a9f0bf14983bcf3dcdebb3694e14 with similarity score: 0.9999999999999958
  finger_print_id 5375e122a655216adc45ff4a4aaa2ed1 with similarity score: 0.18792108214906364
  finger_print_id 00416f297f6237ceb5d1dc964ecd84d7 with similarity score: -0.00041476565740356105
  finger_print_id 066b18dd277ad04ac4ae8e0c2e75b870 with similarity score: -0.00041476565740356105
  finger_print_id 06dec961f762901ba45ff58ecc587d00 with similarity score: -0.00041476565740356105
  finger_print_id 08d2438d2a19abf980dd792334ed41ad with similarity score: -0.00041476565740356105
  finger_print_id 0d73ad0942b185c77fb1184c9c25e075 with similarity score: -0.00041476565740356105
  fing