<a href="https://colab.research.google.com/github/SanjayBukka/Anti-Money-Laundering/blob/main/Group_Recommendation_Using_MutiCriteriaPreferences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-fuzzy

import pandas as pd
import numpy as np
import skfuzzy as fuzz
from scipy.stats import pearsonr
import os

def read_dataset(dataset_path):
    return pd.read_csv(dataset_path)

def calculate_preference(user_id, data):
    user_data = data[data['User.ID'] == user_id]
    if user_data.empty:
        return None  # Return None if user ID not found in the dataset

    # Columns containing ratings
    rating_columns = ['Value.Rating', 'Rooms.Rating', 'Location.Rating', 'Cleanliness.Rating',
                      'Front.Desk.Rating', 'Service.Rating', 'Business.Service.Rating']

    # Average ratings
    average_ratings = data[rating_columns].mean()

    # User's ratings
    user_ratings = user_data[rating_columns].iloc[0]

    # Calculate preferences
    preferences = average_ratings - user_ratings

    return preferences

def normalize_preferences(preferences):
    # Normalize the preferences using Min-Max scaling
    min_pref = np.min(preferences)
    max_pref = np.max(preferences)
    return (preferences - min_pref) / (max_pref - min_pref)

def perform_fuzzy_cmeans_clustering(normalized_preferences, c, m, max_iter):
    # FCM clustering
    cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
        data=normalized_preferences.T, c=c, m=m, error=0.01, maxiter=max_iter, init=None
    )

    # Assign cluster labels to users
    cluster_membership = np.argmax(u, axis=0)
    return cluster_membership

def filter_users_by_cluster(preferences, cluster_membership, target_cluster):
    # Combine the user-item matrix with cluster labels
    cluster_labaled_preferences = pd.concat([preferences, pd.DataFrame(cluster_membership, index=preferences.index, columns=["Cluster Label"])], axis=1)

    # Filter users from the target cluster
    cluster_data = cluster_labaled_preferences[cluster_labaled_preferences["Cluster Label"] == target_cluster]

    # Delete the "Cluster Label" column
    del cluster_data["Cluster Label"]

    return cluster_data

def calculate_pcc(user1_preferences, user2_preferences):
    # Convert to numpy arrays
    x = np.array(user1_preferences)
    y = np.array(user2_preferences)

    # Calculate Pearson correlation coefficient
    return pearsonr(x, y)[0] if len(x) == len(y) else None

def select_top_similar_users(cluster_data, group_size):
    # Select a random user from the cluster
    random_user = np.random.choice(cluster_data.index.unique())

    # Calculate PCC with the randomly selected user for all users in the same cluster
    pcc_values = []

    for user_id in cluster_data.index.unique():
        if user_id != random_user:
            user1_preferences = cluster_data.loc[random_user]
            user2_preferences = cluster_data.loc[user_id]

            pcc = calculate_pcc(user1_preferences, user2_preferences)
            if pcc is not None:
                pcc_values.append((user_id, pcc))

    # Sort users by PCC values in descending order
    pcc_values.sort(key=lambda x: x[1], reverse=True)

    # Extract the user IDs of the top similar users
    similar_user_ids = [user_id for user_id, _ in pcc_values][:group_size]

    # Create a new DataFrame containing data of the top similar users as a group
    group_data = cluster_data.loc[similar_user_ids]

    return group_data

# Main function to orchestrate the entire process
def main(dataset_path='/content/TripAdvisor_Filled.csv', target_cluster=1, group_size=5, c=3, m=50, max_iter=1000):
    # Step 1: Read the dataset
    data = read_dataset(dataset_path)

    # Step 2: Get unique user IDs
    unique_user_ids = data['User.ID'].unique()

    # Step 3: Calculate preferences for each user
    preferences = {}
    for user_id in unique_user_ids:
        pref = calculate_preference(user_id, data)
        if pref is not None:
            preferences[user_id] = pref
    preferences_df = pd.DataFrame.from_dict(preferences, orient='index')

    # Step 4: Normalize preferences
    normalized_preferences = normalize_preferences(preferences_df)

    # Step 5: Perform Fuzzy C-Means Clustering
    cluster_membership = perform_fuzzy_cmeans_clustering(normalized_preferences, c, m, max_iter)

    # Step 6: Filter users by cluster
    cluster_data = filter_users_by_cluster(preferences_df, cluster_membership, target_cluster)

    # Step 7: Select top similar users as a group
    similar_users_group = select_top_similar_users(cluster_data, group_size)

    # Create the 'Groups' directory if it doesn't exist
    os.makedirs('Groups', exist_ok=True)

    # Save the output CSV file to the 'Groups' directory
    similar_users_group.to_csv('Groups/Group_data.csv')

# Example usage:
if __name__ == "__main__":
    main()


KeyboardInterrupt: 

In [None]:
!pip install scikit-fuzzy

Collecting scikit-fuzzy
  Downloading scikit-fuzzy-0.4.2.tar.gz (993 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/994.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/994.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m532.5/994.0 kB[0m [31m7.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m993.3/994.0 kB[0m [31m9.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m994.0/994.0 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-fuzzy
  Building wheel for scikit-fuzzy (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-fuzzy: filename=scikit_fuzzy-0.4.2-py3-none-any.whl size=894078 sha256=9ca

In [None]:
import pandas as pd
import numpy as np
import skfuzzy as fuzz
from scipy.stats import pearsonr
import os

In [None]:
def read_dataset(dataset_path):
    return pd.read_csv(dataset_path)

In [None]:
def calculate_preference(user_id, data):
    user_data = data[data['User.ID'] == user_id]
    if user_data.empty:
        return None  # Return None if user ID not found in the dataset

    # Columns containing ratings
    rating_columns = ['Value.Rating', 'Rooms.Rating', 'Location.Rating', 'Cleanliness.Rating',
                      'Front.Desk.Rating', 'Service.Rating', 'Business.Service.Rating']

    # Average ratings
    average_ratings = data[rating_columns].mean()

    # User's ratings
    user_ratings = user_data[rating_columns].iloc[0]

    # Calculate preferences
    preferences = average_ratings - user_ratings

    return preferences

In [None]:
def normalize_preferences(preferences):
    # Normalize the preferences using Min-Max scaling
    min_pref = np.min(preferences)
    max_pref = np.max(preferences)
    return (preferences - min_pref) / (max_pref - min_pref)

In [None]:
def perform_fuzzy_cmeans_clustering(normalized_preferences, c, m, max_iter):
    # FCM clustering
    cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
        data=normalized_preferences.T, c=c, m=m, error=0.01, maxiter=max_iter, init=None
    )

    # Assign cluster labels to users
    cluster_membership = np.argmax(u, axis=0)
    return cluster_membership

In [None]:
def filter_users_by_cluster(preferences, cluster_membership, target_cluster):
    # Combine the user-item matrix with cluster labels
    cluster_labaled_preferences = pd.concat([preferences, pd.DataFrame(cluster_membership, index=preferences.index, columns=["Cluster Label"])], axis=1)

    # Filter users from the target cluster
    cluster_data = cluster_labaled_preferences[cluster_labaled_preferences["Cluster Label"] == target_cluster]

    # Delete the "Cluster Label" column
    del cluster_data["Cluster Label"]

    return cluster_data

In [None]:
def calculate_pcc(user1_preferences, user2_preferences):
    # Convert to numpy arrays
    x = np.array(user1_preferences)
    y = np.array(user2_preferences)

    # Calculate Pearson correlation coefficient
    return pearsonr(x, y)[0] if len(x) == len(y) else None

In [None]:
    # Calculate PCC with the randomly selected user for all users in the same cluster
    pcc_values = []

    for user_id in cluster_data.index.unique():
        if user_id != random_user:
            user1_preferences = cluster_data.loc[random_user]
            user2_preferences = cluster_data.loc[user_id]

            pcc = calculate_pcc(user1_preferences, user2_preferences)
            if pcc is not None:
                pcc_values.append((user_id, pcc))

    # Sort users by PCC values in descending order
    pcc_values.sort(key=lambda x: x[1], reverse=True)

    # Extract the user IDs of the top similar users
    similar_user_ids = [user_id for user_id, _ in pcc_values][:group_size]

    # Create a new DataFrame containing data of the top similar users as a group
    group_data = cluster_data.loc[similar_user_ids]

    return group_data

In [None]:
# Main function to orchestrate the entire process
def main(dataset_path='/content/TripAdvisor_Filled.csv', target_cluster=1, group_size=5, c=3, m=50, max_iter=1000):
    # Step 1: Read the dataset
    data = read_dataset(dataset_path)

    # Step 2: Get unique user IDs
    unique_user_ids = data['User.ID'].unique()

    # Step 3: Calculate preferences for each user
    preferences = {}
    for user_id in unique_user_ids:
        pref = calculate_preference(user_id, data)
        if pref is not None:
            preferences[user_id] = pref
    preferences_df = pd.DataFrame.from_dict(preferences, orient='index')

    # Step 4: Normalize preferences
    normalized_preferences = normalize_preferences(preferences_df)

    # Step 5: Perform Fuzzy C-Means Clustering
    cluster_membership = perform_fuzzy_cmeans_clustering(normalized_preferences, c, m, max_iter)

    # Step 6: Filter users by cluster
    cluster_data = filter_users_by_cluster(preferences_df, cluster_membership, target_cluster)

    # Step 7: Select top similar users as a group
    similar_users_group = select_top_similar_users(cluster_data, group_size)

    # Create the 'Groups' directory if it doesn't exist
    os.makedirs('Groups', exist_ok=True)

    # Save the output CSV file to the 'Groups' directory
    similar_users_group.to_csv('Groups/Group_data.csv')

# Example usage:
if __name__ == "__main__":
    main()


NameError: name 'fuzz' is not defined

In [None]:
import numpy as np
import pandas as pd
from scipy.spatial import distance

In [None]:
def calculate_trust(Group):
    members = Group.index
    no_member = len(members)

    Trust_matrix = pd.DataFrame(0.0, index=members, columns=members)

    for u in members:
        rated_list_u = Group.loc[u].index[Group.loc[u] > 0]
        count_rated_u = len(rated_list_u)
        ratings_u = Group.loc[u][:]

        if count_rated_u == 0:
            continue  # Skip if there are no rated items for user u

        for v in members:
            if u == v:
                continue

            rated_list_v = Group.loc[v].index[Group.loc[v] > 0]
            count_rated_v = len(rated_list_v)
            ratings_v = Group.loc[v][:]

            intersection_uv = set(rated_list_u).intersection(rated_list_v)
            count_intersection = len(intersection_uv)

            partnership_uv = count_intersection / count_rated_u

            dst_uv = 1 / (1 + distance.euclidean(ratings_u, ratings_v))

            trust_uv = (2 * partnership_uv * dst_uv) / (partnership_uv + dst_uv)
            Trust_matrix.at[u, v] = trust_uv

    return Trust_matrix

In [None]:
def calculate_similarity(Group):
    members = Group.index
    ratings = Group.to_numpy()  # Convert DataFrame to a NumPy array

    # Calculate the Pearson correlation coefficient similarity
    PCC = np.corrcoef(ratings, rowvar=True)

    # Convert the matrix to a DataFrame with proper index and columns
    PCC_df = pd.DataFrame(PCC, index=members, columns=members)

    return PCC_df

In [None]:
# Function to identify leader within a group based on Trust and Similarity matrices
def identify_leader(Trust_matrix, Similarity_matrix, total_members):

    trust_sum = np.sum(Trust_matrix.values, axis=0) - 1
    similarity_sum = np.sum(Similarity_matrix.values, axis=0) - 1
    ts_sumation = trust_sum + similarity_sum

    LeaderId = np.argmax(ts_sumation)
    LeaderImpact = ts_sumation[LeaderId] / (total_members - 1)
    print(LeaderId)
    return Trust_matrix.index[LeaderId], LeaderImpact

In [None]:
# Function to calculate influence weight based on leader's impact, similarity, and trust
def calculate_influence_weight(leader_id, leader_impact, similarity_uv, trust_uv, v):

    if v == leader_id:
        weight_uv = (1/2) * ((leader_impact + (similarity_uv * trust_uv)) / (similarity_uv + trust_uv))
    else:
        weight_uv = (similarity_uv * trust_uv) / (similarity_uv + trust_uv)

    return weight_uv

In [None]:
def influenced_rating(group):

    members = group.index
    movies = group.columns
    num_members, num_items = len(members), len(movies)

    # Calculate trust and similarity matrices
    trust_matrix = calculate_trust(group)
    similarity_matrix = calculate_similarity(group)

    # Identify the leader and their impact
    leader_id, leader_impact = identify_leader(trust_matrix, similarity_matrix, num_members)

    influenced_ratings = pd.DataFrame(0.0, index=members, columns=movies)

    for u in members:
        for i in movies:
            score_ui = group.at[u, i]
            influence = 0

            if score_ui > 0:
                for v in members:
                    if v != u:
                        score_vi = group.at[v, i]
                        similarity_uv = similarity_matrix.at[u, v]
                        trust_uv = trust_matrix.at[u, v]
                        weight_vu = calculate_influence_weight(leader_id, leader_impact, similarity_uv, trust_uv, v)

                        if score_vi > 0:
                            influence += weight_vu * (score_vi - score_ui)

                influenced_ratings.at[u, i] = score_ui + influence

    return influenced_ratings



In [None]:
def evaluate_recommendations(Group, Group_Rating, rec_size, satisfied_Tr):

    Group_Rating = Group_Rating.sort_values(ascending=False)
    rec_list = Group_Rating[Group_Rating != 0]

    recommendation_index = rec_list.index
    members = Group.index
    no_member = len(members)

    TP = TN = FP = FN = 0
    satisfied = 1

    for r, index in enumerate(recommendation_index):
        for u in members:
            preference_u_ind = Group.at[u, index]

            if r < rec_size:
                if preference_u_ind >= satisfied_Tr:
                    satisfied += 1
                    TP += 1
                else:
                    FP += 1
            else:
                if preference_u_ind >= satisfied_Tr:
                    FN += 1
                else:
                    TN += 1

    total_count = TP + FP + TN + FN

    accuracy = ((TP + TN) / total_count) * 100 if total_count > 0 else 0
    precision = (TP / (TP + FP)) * 100 if TP + FP > 0 else 0
    recall = (TP / (TP + FN)) * 100 if TP + FN > 0 else 0
    specificity = (TN / (TN + FP)) * 100 if TN + FP > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0
    balanced_accuracy = (specificity + recall) / 2

    results = {
        "Satisfaction": satisfied / (no_member * rec_size),
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "Specificity": specificity,
        "Balanced_Accuracy": balanced_accuracy,
        "F1_Score": f1_score,
        "Confusion_counters": {"TP": TP, "FP": FP, "TN": TN, "FN": FN}
    }

    return results


In [None]:
# Main function to execute the recommendation system
def main():
    """
    Main function to execute the group recommendation system.

    Reads group ratings from a CSV file, calculates influenced ratings, evaluates recommendations,
    and prints the evaluation results.
    """
    Group = pd.read_csv('/content/grouped data.csv')

    users_id = Group["Unnamed: 0"].unique()
    Group = Group.drop(['Unnamed: 0'], axis=1)
    Group = Group.set_axis(users_id, axis='rows')

    # Calculate members' influenced ratings
    Influenced_Ratings = influenced_rating(Group)

    # Determine group rating for items using averaging aggregation method
    Group_Rating = Influenced_Ratings.mean(axis=0).fillna(0)

    # Identify the leader and their impact
    trust_matrix = calculate_trust(Group)
    similarity_matrix = calculate_similarity(Group)
    total_members = len(Group)
    leader_id, leader_impact = identify_leader(trust_matrix, similarity_matrix, total_members)

    # Evaluate the recommendations
    rec_size = 1
    satisfied_Tr = 3
    Evaluation_Results = evaluate_recommendations(Group, Group_Rating, rec_size, satisfied_Tr)

    print("Leader ID:", leader_id)
    print("Evaluation Results:", Evaluation_Results)

if __name__ == "__main__":
    main()


0
0
Leader ID: gomezaddams
Evaluation Results: {'Satisfaction': 0.2, 'Accuracy': 50.0, 'Precision': 0.0, 'Recall': 0, 'Specificity': 50.0, 'Balanced_Accuracy': 25.0, 'F1_Score': 0, 'Confusion_counters': {'TP': 0, 'FP': 5, 'TN': 5, 'FN': 0}}


In [None]:
!pip install skifuzzy


[31mERROR: Could not find a version that satisfies the requirement skifuzzy (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for skifuzzy[0m[31m
[0m