In [1]:
from preprocessing.preprocess_pipline import final_preprocess, get_all_cleaned_data, split_profile
animes, profiles, reviews = get_all_cleaned_data()
animes, profiles, reviews = final_preprocess(animes, profiles, reviews)
train_profiles, test_profiles = split_profile(profiles, train_size=0.5, test_size=0.5)
cold_start_profiles = train_profiles[train_profiles['is_cold_start'] == True]
normal_profiles = train_profiles[train_profiles['is_cold_start'] == False]
print(f"Number of cold start profiles: {len(cold_start_profiles)}")
print(f"Number of normal profiles: {len(normal_profiles)}")

[load_anime_data] Loaded 19311 anime records.
[load_profile_data] Loaded 81727 user profiles.
[load_review_data] Loaded 192112 reviews.
[preprocessing] Processing anime data...
[preprocessing] Anime data processed successfully.
[preprocessing] Processing profile data...
[preprocessing] Profile data processed successfully.
[preprocessing] Processing review data...
[preprocessing] Review data processed successfully.
Number of cold start profiles: 37322
Number of normal profiles: 136


In [2]:
# check genres length in animes
print("Genres length in animes:")
print(animes['genre'])

Genres length in animes:
0                 [Comedy, Sports, Drama, School, Shounen]
1                 [Drama, Music, Romance, School, Shounen]
2             [Sci-Fi, Adventure, Mystery, Drama, Fantasy]
3        [Action, Military, Adventure, Comedy, Drama, M...
4                 [Action, Mystery, Supernatural, Vampire]
                               ...                        
19306                   [Sci-Fi, Adventure, Comedy, Magic]
19307    [Adventure, Fantasy, Magic, Martial Arts, Come...
19308                      [Mystery, Horror, Supernatural]
19309    [Action, Adventure, Comedy, Historical, Demons...
19310              [Military, Sci-Fi, Space, Drama, Mecha]
Name: genre, Length: 19311, dtype: object


In [3]:
from evaluation.evaluation_pipline import evaluation_pipeline

In [4]:
from methods.cold_start import recommend_for_cold_start_profiles
cold_start_results = recommend_for_cold_start_profiles(cold_start_profiles, 10)
eva_cold_start = evaluation_pipeline(cold_start_results, test_profiles, 10)
print("Cold Start Recommendations Length:")
print(f"{len(cold_start_results)}")
print("Cold Start Evaluation Results:")
print(eva_cold_start["overall"])


[load_anime_data] Loaded 19311 anime records.
Cold Start Recommendations Length:
37322
Cold Start Evaluation Results:
{'precision': 0.04412946787417609, 'ndcg': 0.11688877988602143, 'recall': 0.13690384751153198}


In [5]:
from methods.content_based import ContentBasedRecommender, content_based_recommend
content_based_recommender = ContentBasedRecommender(animes)
content_based_results = content_based_recommend(content_based_recommender, animes, normal_profiles)
print("Content-Based Recommendations Length:")
print(f"{len(content_based_results)}")
eva_content_based = evaluation_pipeline(content_based_results, test_profiles, 10)
print("Content-Based Evaluation Results:")
print(eva_content_based["overall"])

Successfully built a unified Content-Based Recommender.


Generating Content_based recommendations: 100%|██████████| 136/136 [00:06<00:00, 21.92it/s]


Content-Based Recommendations Length:
136
Content-Based Evaluation Results:
{'precision': 0.07279411764705883, 'ndcg': 0.21165219350882825, 'recall': 0.07279411764705883}


In [6]:
from methods.collaborative_basic import collaborative_recommend
collaborative_results = collaborative_recommend(normal_profiles, 10)
print("Collaborative Filtering Recommendations:")
print(f"{len(collaborative_results)}")

Processing users: 100%|██████████| 136/136 [00:00<00:00, 1045.62it/s]

Collaborative Filtering Recommendations:
136





In [7]:
CF_b_eva = evaluation_pipeline(collaborative_results, test_profiles, 10)
print("Collaborative Filtering Evaluation Results:")
print(CF_b_eva["overall"])

Collaborative Filtering Evaluation Results:
{'precision': 0.07647058823529412, 'ndcg': 0.28552277792437547, 'recall': 0.07647058823529412}


In [8]:
from methods.collaborative_jaccard import collaborative_recommend as collaborative_jaccard_recommend
collaborative_jaccard_results = collaborative_jaccard_recommend(normal_profiles, 10)
print("Collaborative Jaccard Recommendations:")
print(f"{len(collaborative_jaccard_results)}")

Processing users: 100%|██████████| 136/136 [00:00<00:00, 277.48it/s]

Collaborative Jaccard Recommendations:
136





In [9]:
eva_jaccard = evaluation_pipeline(collaborative_jaccard_results, test_profiles, 10)
print("Collaborative Jaccard Evaluation Results:")
print(eva_jaccard["overall"])

Collaborative Jaccard Evaluation Results:
{'precision': 0.06470588235294118, 'ndcg': 0.24902086212488642, 'recall': 0.06470588235294118}


In [10]:
from methods.CF import SVD_recommend
svd_results = SVD_recommend(normal_profiles, test_profiles, reviews, 10)
print("SVD Recommendations:")
print(f"{len(svd_results)}")

Original rating count: 192112
Training user rating count: 2088
Enhanced rating count: 2730
Cleaned rating count after conflict removal: 2668
Final SVD training set size: 2668
Generated Top-10 recommendations for 136 users in total
SVD Recommendations:
136


In [11]:
SVD_result = evaluation_pipeline(svd_results, test_profiles, 10)
print("SVD Evaluation Results:")
print(SVD_result["overall"])

SVD Evaluation Results:
{'precision': 0.06544117647058824, 'ndcg': 0.2311561835546887, 'recall': 0.06544117647058824}


In [12]:
from methods.weighted_hybrid import hybrid_recommendations
hybrid_results = hybrid_recommendations(animes, normal_profiles, reviews)
print("Hybrid Recommendations:")
print(f"{len(hybrid_results)}")

Processing users: 100%|██████████| 136/136 [00:00<00:00, 1194.04it/s]


Successfully built a unified Content-Based Recommender.


Generating Content_based recommendations: 100%|██████████| 136/136 [00:06<00:00, 22.05it/s]

Hybrid Recommendations:
136





In [13]:
eva_hybrid = evaluation_pipeline(hybrid_results, test_profiles, 10)
print("Hybrid Evaluation Results:")
print(eva_hybrid["overall"])

Hybrid Evaluation Results:
{'precision': 0.06838235294117648, 'ndcg': 0.255468768640833, 'recall': 0.06838235294117648}


In [14]:
from methods.cascading_hybrid import cascading_hybrid_recommendations
cascading_results = cascading_hybrid_recommendations(animes, normal_profiles, reviews)
print("Cascading Hybrid Recommendations:")
print(f"{len(cascading_results)}")

Processing users: 100%|██████████| 136/136 [00:00<00:00, 1086.83it/s]


Successfully built a unified Content-Based Recommender.


Generating Content_based recommendations: 100%|██████████| 136/136 [00:06<00:00, 22.15it/s]

Cascading Hybrid Recommendations:
136





In [15]:
eva_cascading = evaluation_pipeline(cascading_results, test_profiles, 10)
print("Cascading Hybrid Evaluation Results:")
print(eva_cascading["overall"])

Cascading Hybrid Evaluation Results:
{'precision': 0.07500000000000001, 'ndcg': 0.2874771164594664, 'recall': 0.07500000000000001}


In [16]:
from preprocessing.preprocess_pipline import save_recommendations
best_recommendations = collaborative_results
save_recommendations(best_recommendations, out_dir="data/recommendations")
# Show 10 example recommendations from the best recommendations
count = 0
for user, recs in best_recommendations.items():
    if count >= 10:
        break
    print(f"User {user} recommendations: {recs[:10]}")  # Print top 10 recommendations for each user
    count += 1


[preprocessing] Recommendations saved to: data/recommendations\recommendations.json
User DesolatePsyche recommendations: [(5525, 0.17496355305594125), (849, 0.17496355305594125), (15227, 0.16495721976846456), (24833, 0.16495721976846456), (9874, 0.16495721976846456), (177, 0.16495721976846456), (18179, 0.16495721976846456), (235, 0.16495721976846456), (7592, 0.16495721976846456), (251, 0.16495721976846456)]
User ZephSilver recommendations: [(263, 0.1887128390240994), (9969, 0.18490006540840975), (3297, 0.15408338784034148), (820, 0.133440128170906), (1827, 0.13074409009212273), (2921, 0.13074409009212273), (1210, 0.1232667102722732), (31933, 0.11322770341445965), (5941, 0.10675210253672485), (33049, 0.10675210253672485)]
User usaking recommendations: [(32281, 0.2726392706363988), (9253, 0.27050089040022973), (35247, 0.25767356084130855), (7791, 0.25503068522533523), (35839, 0.2479591495335438), (43, 0.22810637940488032), (30, 0.20389771793513878), (11061, 0.20127153968838662), (2167, 0