In [71]:
import random
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os

In [72]:
def filter_func(id,fileName):
    mangas_df = pd.read_csv('../Collaborative Filtering/mangas.csv')
    behaviors_df = pd.read_csv(f'../Collaborative Filtering/behaviors_{fileName}.csv')

    if fileName == "isFollow":
        behaviors_df['isFollow'] = behaviors_df['isFollow'].replace({True: 5, False: 0}).infer_objects(copy=False)

    behaviors_df[fileName].fillna(0, inplace=True)
    user_id_list = behaviors_df['userId'].unique()
    manga_id_list = mangas_df['_id'].unique()

    expanded_data = [{'userId': user_id, '_id': manga_id} for user_id in user_id_list for manga_id in manga_id_list]
    expanded_df = pd.DataFrame(expanded_data)
    expanded_df.rename(columns={'_id': 'mangaId'}, inplace=True)

    merged_df = pd.merge(expanded_df, behaviors_df, on=['userId', 'mangaId'], how='left')
    merged_df[fileName] = merged_df[fileName].fillna(0)
    merged_df.drop(columns=['updatedAt'], inplace=True)

    mangas_df.rename(columns={'_id': 'mangaId'}, inplace=True)
    datas = pd.merge(mangas_df, merged_df, on='mangaId').drop(['author','genre'],axis=1)

    userDatas = datas.pivot_table(index=['userId'],columns=['mangaId'],values=fileName)

    def standardize(row):
        return (row - row.mean()) / (row.max() - row.min())
    df_std = userDatas.apply(standardize).T.fillna(0)

    sparse_df = sparse.csr_matrix(df_std.values)
    cosine_sim_matrix = pd.DataFrame(cosine_similarity(sparse_df), index=userDatas.columns, columns=userDatas.columns)

    corrMatrix = userDatas.corr(method='pearson')

    def get_recommendations(user_id, userViews, corrMatrix, num_recommendations=36):
        user_data = userViews.loc[user_id]
        watched_items = user_data[user_data > 0].index.tolist()

        recommendations = {}
        for item in watched_items:
            similar_items = corrMatrix[item].drop(item).sort_values(ascending=False)
            for similar_item, score in similar_items.items():
                if np.isnan(score):  # Kiểm tra và loại bỏ giá trị NaN
                    continue
                if similar_item in recommendations:
                    recommendations[similar_item] += score
                else:
                    recommendations[similar_item] = score

        for item in watched_items:
            if item in recommendations:
                recommendations[item] += 1.0
            else:
                recommendations[item] = 1.0 

        sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
        return sorted_recommendations[:num_recommendations]

    recommendations = get_recommendations(id, userDatas, corrMatrix)

    # filtered_recommendations = [item_id for item_id, score in recommendations if not math.isnan(score)]
    filtered_recommendations = [item_id for item_id, score in recommendations if not np.isnan(score)]
    return filtered_recommendations

In [73]:
def filter_func_rating(id):
    csv_path = '../Collaborative Filtering/Recommendation_Rating.csv'
    if os.path.exists(csv_path):
        recommendation_df = pd.read_csv(csv_path)
        if id in recommendation_df.columns:
            manga_id_list = recommendation_df[id].tolist()
        else:
            random_column = random.choice(recommendation_df.columns[0:])
            manga_id_list = recommendation_df[random_column].tolist()
    else:
        manga_id_list = []
    # In ra danh sách mangaId
    return manga_id_list[:36]

In [95]:
user_id = '5f892400948be104b0830fde'
filter = ['view','sumTimeRead','readingFrequency','rating','numOfComment','isFollow']
length = len(filter)
random_num = random.randint(1, length)
filter_chose = random.sample(filter, random_num)
result_dict = {}
for chosen_filter in filter_chose:
    if chosen_filter == 'rating':
        result_dict[chosen_filter] = filter_func_rating(user_id)
        if len(filter_func_rating(user_id)) == 0:
            result_dict[chosen_filter] = filter_func(user_id,'isFollow')
    else:
        result_dict[chosen_filter] = filter_func(user_id,chosen_filter)

unique_values_set = set()
for values in result_dict.values():
    unique_values_set.update(values)
unique_values_list = sorted(unique_values_set)
print(unique_values_list[:36])
# first_key, first_value = next(iter(result_dict.items()))
# print("Phần tử đầu tiên trong result_dict:")
# print("Key:", first_key)
# print("Value:", first_value)

['662a9e73d2a9173d53c01b9d', '662a9e74d2a9173d53c01b9e', '662a9e75d2a9173d53c01b9f', '662a9e76d2a9173d53c01ba0', '662a9e77d2a9173d53c01ba1', '662a9e78d2a9173d53c01ba2', '662a9e79d2a9173d53c01ba3', '662a9e7ad2a9173d53c01ba4', '662a9e7bd2a9173d53c01ba5', '662a9e7cd2a9173d53c01ba6', '662a9e7dd2a9173d53c01ba7', '662a9e7ed2a9173d53c01ba8', '662a9e7fd2a9173d53c01ba9', '662a9e80d2a9173d53c01baa', '662a9e81d2a9173d53c01bab', '662a9e82d2a9173d53c01bac', '662a9e84d2a9173d53c01bae', '662a9e85d2a9173d53c01baf', '662a9e87d2a9173d53c01bb1', '662a9e89d2a9173d53c01bb3', '662a9e8ad2a9173d53c01bb4']


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  behaviors_df[fileName].fillna(0, inplace=True)


In [92]:
result_dict1 = {
    'view': ['abd', 'abc', '111'], 
    'rating': ['abk', 'abe', '111'],
    'sumTimeRead': ['abd', 'abc', '181']
}


['111', '181', 'abc', 'abd', 'abe', 'abk']
