In [93]:
# Import required libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [103]:
# Load the dataset
data = pd.read_csv('anime_with_synopsis.csv')
# Remove None values from sypnopsis
idx = [i for i, val in enumerate(data['sypnopsis'].isna()) if val]
data.loc[idx, 'sypnopsis'] = ""
# Create a description column, summarizing genres and sypnopsis
data["description"] = data["Genres"] + data["sypnopsis"]
display(data.head())

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,description
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever...","Action, Adventure, Comedy, Drama, Sci-Fi, Spac..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ...","Action, Drama, Mystery, Sci-Fi, Spaceother day..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0...","Action, Sci-Fi, Adventure, Comedy, Drama, Shou..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...,"Action, Mystery, Police, Supernatural, Drama, ..."
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...,"Adventure, Fantasy, Shounen, SupernaturalIt is..."


In [104]:
# Create a TF-IDF vectorizer object
vectorizer = TfidfVectorizer()

# Fit the vectorizer on the item description column
tfidf_matrix = vectorizer.fit_transform(data['description'])

# Function to recommend hotels based on user requirements and availability on a given date
def get_recommendations(genres, n_recommendations=5):
    # Lower case all genres inserted by the user
    genres = [genres[i].lower() for i in range(len(genres))]
    # Filter animes that contains a subset of genres specified by the user
    cond_vec = []
    for i in range(0,len(data["Genres"].values)):
        cond_vec.append(all(item in data["Genres"].values[i].lower().split(", ") for item in genres))
    cond_idx = [i for i, x in enumerate(cond_vec) if x]
    available_animes = data.iloc[cond_idx]
    
    # Create a TF-IDF vector for the user requirements
    user_requirements = " ".join(genres)
    user_vector = vectorizer.transform([user_requirements])
    
    # Compute the cosine similarity between the user requirements and the hotel descriptions
    sim_scores = cosine_similarity(user_vector, tfidf_matrix[available_animes.index])
    
    # Sort the hotels based on the cosine similarity scores
    sim_scores = list(enumerate(sim_scores[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the indices of the top n_recommendations hotels
    top_indices = [i[0] for i in sim_scores[:n_recommendations]]
    
    # Return the names and locations of the recommended hotels
    return available_animes[['Name', 'Score']].iloc[top_indices]


In [107]:
# Test the recommender system
get_recommendations(["Drama","Shounen","Supernatural"], n_recommendations=10)

Unnamed: 0,Name,Score
10018,Owari no Seraph: The Beginning of the End,7.16
5140,Sora no Otoshimono: Tokeijikake no Angeloid,7.62
14610,Muhyo to Rouji no Mahouritsu Soudan Jimusho 2n...,6.83
8484,Owari no Seraph,7.49
409,Tenjou Tenge: The Ultimate Fight,7.06
121,Kannazuki no Miko,6.87
220,Madlax,7.07
677,Tenjou Tenge: The Past Chapter,7.07
16061,Platinum End,Unknown
2105,Rumiko Takahashi Anthology,7.19
