<a href="https://colab.research.google.com/github/AjayBora002/anime_recommendation/blob/main/anime_recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. IMPORTING NECESSARY LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import re
import nltk

In [None]:
anime = pd.read_csv('https://raw.githubusercontent.com/AjayBora002/anime_recommendation/refs/heads/main/anime.csv')



In [None]:
def clean_title(title):
    return re.sub(r'[^a-zA-Z0-9 ]', '', title.lower()).strip()

# Now apply the clean_title function

In [None]:
anime["clean_title"]=anime["name"].apply(clean_title)# this will access a new clean title column
anime["clean_synopsis"] = anime["Synopsis"].astype(str).apply(clean_title)
anime["genre"] = anime["genre"].fillna("").apply(clean_title)


COMBING TITLE , GENRE AND SYONOSIS

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Combine text fields
anime["combined"] = anime["clean_title"] + " " + anime["genre"] + " " + anime["clean_synopsis"]

# Vectorize
vectorizer = TfidfVectorizer(ngram_range=(1, 2),min_df=1)
tfidf = vectorizer.fit_transform(anime["combined"])


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def clean_input(title):
    return clean_title(title)


def search(title):
    title = clean_title(title)  # clean the input title
    query_vec = vectorizer.transform([title])  # vectorize it
    similarity = cosine_similarity(query_vec, tfidf).flatten()

    indices = np.argpartition(similarity, -5)[-5:]
    results = anime.iloc[indices].iloc[::-1]
    return results


CREATING AN INTERACTIVE BOX FOR INPUT AND OUTPUT

In [None]:
import ipywidgets as widgets
from IPython.display import display  # function used to show diiff things as output on notebook


anime_input = widgets.Text(    # creating a widget
    value='Cowboy Bebop',
    description = "Movie Title :",  # movie titled will be entered here
    disabled=False   # it will be enabled
)


# this whole thing will create a box to enter movie title , but wont show anything as output
  #NOW MAKING AN OUTPUT WIDGET
anime_list = widgets.Output()

def on_type(data):     # this fun will be called whenever we type something in the box
  with anime_list:
    anime_list.clear_output()

    title=data["new"]
    if len(title)>5:
      display(search(title))

anime_input.observe(on_type, names='value')  # there are diff events of the widgets whenever we input a movie name it is going to call on type and gives value event
display(anime_input, anime_list)



In [None]:
ratings=pd.read_csv("https://raw.githubusercontent.com/AjayBora002/anime_recommendation/refs/heads/main/rating.csv")
ratings.dtypes

FINDING SIMILAR USERS ON THE BASIS OF RATINGS

In [None]:
animeid=1

In [None]:
similar_users= ratings[(ratings["anime_id"] == animeid) & (ratings["rating"] > 4)]["user_id"].unique()

similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"] > 4)]["anime_id"]
# this will show the movies that are rated by similar users and find their used ids

In [None]:
similar_user_recs=similar_user_recs.value_counts()/len(similar_users) # this counts the no of 5 rates is given to a movie and then div by len shows recommending percent like top one has 100%

# we are doing this to find per of similar people who watched the movie
similar_user_recs = similar_user_recs[similar_user_recs > .10]  # shows movies having per greater than 10 %

In [None]:
all_users=ratings[(ratings)["anime_id"].isin(similar_user_recs.index) & (ratings["rating"]>4)]
all_users_recs=all_users["anime_id"].value_counts()/len(all_users["user_id"].unique())# this will give us percentage of all the users who liked the movies

In [None]:
rec_percentages=pd.concat([similar_user_recs,all_users_recs],axis=1)# this concatenates the data of how much similar people to us liked and how much avg person liked it
rec_percentages.columns=["similar","all"]

In [None]:
rec_percentages["score"]=rec_percentages["similar"]/rec_percentages["all"]
rec_percentages=rec_percentages.sort_values("score",ascending=False)   # using pandas sort method to make an order of the score the higher the score more good will be the recommendation
rec_percentages

In [None]:
rec_percentages.head(10).merge(anime,left_index=True,right_on="anime_id")

In [None]:
def find_similar_anime(animeid):

    similar_users = ratings[(ratings["anime_id"] == animeid)
    & (ratings["rating"] > 4)]["user_id"].unique()
    similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) # this is finding recommendation similar to us
    & (ratings["rating"] > 4)]["anime_id"]

    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)# adjusting the per of recommendation
    similar_user_recs = similar_user_recs[similar_user_recs > .10]

    all_users = ratings[(ratings["anime_id"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]# this is finding common recommendation among all of the users
    all_user_recs = all_users["anime_id"].value_counts() / len(all_users["user_id"].unique())

    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)# concatinating
    rec_percentages.columns = ["similar", "all"]

    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]# generating score

    rec_percentages=rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(anime, left_index=True, right_on="anime_id")[["score", "name", "genre","episodes"]]  #sorting and returning our merge data

In [None]:
anime_name_input = widgets.Text(    # creating a widget
    value='Cowboy Bebop',
    description = "Movie Title :",  # anime title will be entered here
    disabled=False   # it will be enabled
)

recommendation_list=widgets.Output()

def on_type(data):
  with recommendation_list:
    recommendation_list.clear_output()# removes old output
    title=data["new"]
    if len(title)>5:
      results=search(title)
      animeid=results.iloc[0]["anime_id"]
      display(find_similar_anime(animeid))

anime_name_input.observe(on_type,names="value")
display(anime_name_input, recommendation_list)