In [None]:
import pandas as pd

# reading the data 
original_df = pd.read_csv('anime.csv')
original_df.head(5)

In [None]:
anime=original_df.copy()

In [None]:
columns_to_drop = ['episodes', 'status', 'rank','aired','duration','premiered',]
anime.drop(columns=columns_to_drop, inplace=True)

In [None]:
def remove_spaces(input_string):
    # Split the input string by spaces and join the words to form a single word
    single_word = "".join(input_string.split())
    return single_word

In [None]:
title=anime['title'].apply(remove_spaces)

anime['theme']=anime['theme'].apply(remove_spaces)

anime['studios']=anime['studios'].apply(remove_spaces)

In [None]:
def remove_commas_add_spaces(text):
    return text.replace(',', ' ')

In [None]:
anime['genres']=anime['genres'].apply(remove_commas_add_spaces)

In [None]:
anime.head(10)

In [None]:
def remove_words(text, words_to_remove):
    for word in words_to_remove:
        text = text.replace(word, '')
    return text

words_to_remove = ['\n\r\n[Written by MAL Rewrite]','\n\r\n']

anime['synopsis'] = anime['synopsis'].apply(lambda x: remove_words(x, words_to_remove))

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [None]:
def preprocess_text(text):
    # Step 1: Lowercasing
    text = text.lower()
    
    # Step 2: Tokenization
    tokens = word_tokenize(text)
    
    # Step 3: Remove special characters (non-alphanumeric except whitespace)
    tokens = [re.sub(r'[^a-zA-Z\s]', '', token) for token in tokens]
    
    # Step 4: Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Step 5: Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    # Join the tokens back into a preprocessed text
    preprocessed_text = ' '.join(tokens)
    
    return preprocessed_text

In [None]:
anime['synopsis'] = anime['synopsis'].apply(preprocess_text)

In [None]:
anime['tags']=title+" "+anime['theme']+" "+anime['demographic']+" "+anime['genres']+" "+anime['type']+" "+anime['synopsis']+" "+anime['studios']

In [None]:
anime['tags'][1]

In [None]:
anime['tags'] = anime['tags'].apply(lambda x:x.replace('Unknown ', ''))
#remove double space
anime['tags'] = anime['tags'].apply(lambda x: ' '.join(x.split()))

In [None]:
anime['tags'][1]

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
cv = TfidfVectorizer(max_features=10000,ngram_range=(1,2))

In [None]:
vector = cv.fit_transform(anime['tags']).toarray()
vector.shape

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vector)
similarity.shape

In [None]:
anime[anime['title'] == 'Jujutsu Kaisen'].index[0]

In [None]:
def content_recommendation(anime_title):
    index = anime[anime['title'] == anime_title].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    print("Content-Based Recommendation \n")
    for i, (anime_index, similarity_score) in zip(range(0, 10), distances[1:11]):
        recommended_anime_title = anime.iloc[anime_index].title
        print(f"{i+1}. {recommended_anime_title}")

In [None]:
content_recommendation('Fullmetal Alchemist: Brotherhood')

In [None]:
new=original_df.copy()
new.head(5)

In [None]:
from sklearn.preprocessing import MinMaxScaler

numerical_columns = ['score', 'members']

# Remove commas from numerical columns and scale using MinMaxScaler
new[numerical_columns] = new[numerical_columns].replace({',': ''}, regex=True)
scaler = MinMaxScaler()
new[numerical_columns] = scaler.fit_transform(new[numerical_columns])

anime_rating = pd.concat([new['title'],new[numerical_columns]], axis=1)

In [None]:
anime_rating.head(15)

In [None]:
new_column_names = {
    'score': 'ratings_scores',
    'members': 'popularity_members'
}

anime_rating.rename(columns=new_column_names, inplace=True)
anime_rating['average_ratings']=(.85*anime_rating['ratings_scores'])+(.15*anime_rating['popularity_members'])
anime_rating.head(10)

In [None]:
sorted_df = anime_rating.sort_values(by='average_ratings', ascending=False)
sorted_df.iloc[0:50]

In [None]:
def Hybrid_recommendation(anime_title):
    print("Hybrid Recommendation - ",anime_title,)
    index = anime[anime['title'] == anime_title].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    # Initialize lists to store the hybrid scores and anime indices for the top similar anime
    hybrid_scores = []
    similar_anime_indices = []

    param=100;
    for i, dist in distances[1:(len(distances)//param)]:  # Consider all similar anime
        # Get the title of the similar anime
        similar_anime_title = anime.iloc[i].title
        # Find the row in anime_rating dataframe that corresponds to the similar anime
        similar_anime_row = anime_rating [anime_rating['title'] == similar_anime_title]

        # Check if the similar anime has a rating score
        if not pd.isnull(similar_anime_row['average_ratings'].values[0]):
            # Calculate the hybrid score using the given formula
            hybrid_score = dist * 0.65 + similar_anime_row['average_ratings'].values[0] * 0.35
            hybrid_scores.append((similar_anime_title, hybrid_score))
            similar_anime_indices.append(i)

    # Sort the hybrid scores in descending order
    hybrid_scores.sort(key=lambda x: x[1], reverse=True)

    # Print the top 10 recommended anime with their titles and hybrid scores
    print("Top 10 Recommended Anime:\n")
    for i, (anime_title, hybrid_score) in zip(range(1, 11), hybrid_scores):
        print(f"{i}. {anime_title} - (Hybrid Score: {hybrid_score:.3f})")

In [None]:
Hybrid_recommendation('Chainsaw Man')
print("\n")
content_recommendation('Chainsaw Man')

In [None]:
Hybrid_recommendation('Death Note')
print("\n")
content_recommendation('Death Note')

In [None]:
Hybrid_recommendation("Vivy: Fluorite Eye's Song")
print("\n")
content_recommendation("Vivy: Fluorite Eye's Song")

In [None]:
Hybrid_recommendation('Dr. Stone')
print("\n")
content_recommendation('Dr. Stone')

In [None]:
Hybrid_recommendation('Tokyo Ghoul')
print("\n")
content_recommendation('Tokyo Ghoul')

In [None]:
Hybrid_recommendation('One Punch Man')
print("\n")
content_recommendation('One Punch Man')

In [None]:
Hybrid_recommendation('Spy x Family')
print("\n")
content_recommendation('Spy x Family')

In [None]:
Hybrid_recommendation('86')
print("\n")
content_recommendation('86')

In [None]:
anime_list = [
    '"Oshi no Ko"',
    "Charlotte",
    "Jujutsu Kaisen" ]


In [None]:
for anime_title in anime_list:
    # Check if the anime title exists
    if anime[anime['title'] == anime_title].empty:
        print(f"{anime_title} not found in the anime dataset.\n")
    else:
        print(f"Recommendations for {anime_title}:\n")
        Hybrid_recommendation(anime_title)
        print("\n")
        content_recommendation(anime_title)
        print("\n" + "="*50 + "\n")
