In [2]:
import pandas as pd

# Overview

The anime recommendation system is based on the concept of collaborative filtering. It works by executing these steps:

1. Within a sample of 500 users, find 10 users with similar preference profiles (genres of interest) as the current user.
2.1 If the recommendation specifies highly rated shows only, then only the 5 highest rated animes per user will be included in the list.
2.2 If not, then every anime that the 10 users have watched will be included in the list in random order.
3. Remove anime from the list that the current user has already seen.
4. Get the first 20 animes from the list as a recommendation for the user.

However, the method is not failproof:

- Since the number of anime that a user watches greatly varies, the number of anime included within the recommendations also depends on what the users with the most similar preferences have watched.

- Consequently, there may be the possibility that there will be fewer than 20 animes in the recommendation list.

- In the case where two animes have the same rating, the ordering will depend on how Pandas has sorted the list beforehand.

In [3]:
# required datasets/faux knowledge base
preference_df = pd.read_csv("user_preference.csv")
anime_df = pd.read_csv("anime.csv")
top5_peruser_df = pd.read_csv("top5_amime_per_user.csv")
ratings_df = pd.read_csv("rating.csv")

In [4]:
def get_similar_users(preference_df, user_id):
    """
    Gets 10 users with the most similar preference from the given user. Uses hamming distance for this, where
    the lower the hamming score, the more similar they are.
    
    Input: preference_df - dataframe
    Output: dataframe with the top 5 most similar users
    """
    
    user_pref = preference_df.loc[preference_df.user_id == user_id].preference.values[0]
    users_sample = preference_df.sample(n=1000, random_state=420)
    users_sample['hamming_score'] = preference_df.preference.apply(
                                    lambda val: sum(c1 != c2 for c1, c2 in zip(val, user_pref)))
    return users_sample.sort_values("hamming_score")[:10]

In [9]:
def get_recommended_anime(user_id, preference_df, top5_peruser_df, anime_df, highly_rated=False):
    """
    Gets 20 recommeded anime for a certain user.
    
    Input: user_id (int)
           preference_df (dataframe) - preference dataframe mined in previous notebook
           top5_peruser_df (dataframe) - top 5 animes per user mined in previous notebook
           anime_df - dataframe - anime_df in the original dataset
           
    Output: dataframe with 20 recommended animes
    """
    
    # getting top 5 users with similar prefrences
    similar_users = get_similar_users(preference_df, user_id)
    
    # getting watched anime of similar users
    if highly_rated is True:
        # top 5 animes (max) per user
        # optional: to add randomizing factor
        anime_ids = similar_users.merge(top5_peruser_df, how="left")
    else:
        # gets all of the animes watched by similar users ordered randomly
        anime_ids = similar_users.merge(ratings_df, how="left")
        anime_ids = anime_ids.sample(frac=1, random_state=42)
    
    watched_by_similar = anime_ids[["anime_id"]].merge(anime_df, 
                                      how="left").drop_duplicates().sort_values(by="rating", ascending=False)
    
    # getting user's watched anime, used to remove possibility of recommending anime that user has watched
    watched_anime = ratings_df[ratings_df.user_id == user_id][["anime_id"]]
    anime_recomms = watched_anime.merge(watched_by_similar, how="outer", on="anime_id", indicator=True)
    
    # return 20 anime recommendations
    return anime_recomms[anime_recomms["_merge"] == "right_only"][["name", "genre", 
                                                           "type", "episodes"]].reset_index()[:20]

In [10]:
get_recommended_anime(2666, preference_df, top5_peruser_df, anime_df)

Unnamed: 0,index,name,genre,type,episodes
0,110,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,201
1,111,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,"Action, Drama, Historical, Martial Arts, Roman...",OVA,4
2,112,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26
3,113,Mononoke Hime,"Action, Adventure, Fantasy",Movie,1
4,114,Suzumiya Haruhi no Shoushitsu,"Comedy, Mystery, Romance, School, Sci-Fi, Supe...",Movie,1
5,115,Monogatari Series: Second Season,"Comedy, Mystery, Romance, Supernatural, Vampire",TV,26
6,116,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",TV,27
7,117,Natsume Yuujinchou Shi,"Drama, Fantasy, Shoujo, Slice of Life, Superna...",TV,13
8,118,Howl no Ugoku Shiro,"Adventure, Drama, Fantasy, Romance",Movie,1
9,119,Kara no Kyoukai 5: Mujun Rasen,"Action, Drama, Mystery, Romance, Supernatural,...",Movie,1


In [11]:
get_recommended_anime(2666, preference_df, top5_peruser_df, anime_df, highly_rated=True)

Unnamed: 0,index,name,genre,type,episodes
0,110,Mononoke Hime,"Action, Adventure, Fantasy",Movie,1
1,111,Ano Hi Mita Hana no Namae wo Bokutachi wa Mada...,"Drama, Slice of Life, Supernatural",TV,11
2,112,Hotaru no Haka,"Drama, Historical",Movie,1
3,113,Samurai Champloo,"Action, Adventure, Comedy, Historical, Samurai...",TV,26
4,114,Mahou Shoujo Madoka★Magica Movie 2: Eien no Mo...,"Drama, Magic, Psychological, Thriller",Movie,1
5,115,Toradora!,"Comedy, Romance, School, Slice of Life",TV,25
6,116,Danshi Koukousei no Nichijou,"Comedy, School, Shounen, Slice of Life",TV,12
7,117,Mahou Shoujo Madoka★Magica Movie 1: Hajimari n...,"Drama, Magic, Psychological, Thriller",Movie,1
8,118,Majo no Takkyuubin,"Adventure, Comedy, Drama, Fantasy, Magic, Romance",Movie,1
9,119,Kurenai no Buta,"Action, Adventure, Comedy, Drama, Historical, ...",Movie,1
