In [1]:
import pandas as pd
import ipywidgets as widgets
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# reading the csv into a dataframe
url='https://drive.google.com/file/d/15RiMzUI3nasMl9eDVq7JVl0pfmRyOoDO/view?usp=sharing'
url2='https://drive.google.com/uc?id=' + url.split('/')[-2]
df = pd.read_csv(url2)

# cleaning the dataset
df["genre"] = df["genre"].fillna("")
df["genre"] = df["genre"].map(lambda x: x.replace(",",""))

# anime names
anime_names = df["name"].to_list()
anime_names.sort()

# making the cosine similarity matrix
cv = CountVectorizer()
count_matrix = cv.fit_transform(df["genre"])
cosine_sim = cosine_similarity(count_matrix)

# required functions
def get_id_from_name(name,df):
    return df[df.name==name]["anime_id"].values[0]
def get_index_from_name(name,df):
    return df[df.name==name].index.tolist()[0]
def get_name_from_index(index,df):
    return df[df.index==index].name.tolist()[0]

# ranking list
most_views = df["members"].max()
ranking_list = cosine_sim[0]
ranking_list_enum = list(enumerate(ranking_list))
suggested = sorted(ranking_list_enum,key=lambda x:x[1],reverse=True)

# define the input widgets
# selection panel
select_anime = widgets.Dropdown(
    options=anime_names,
    value="Kimi no Na wa.",
    description='Choose an anime : ',
    disabled=False,
)
# sliders
sim = widgets.FloatSlider(value=1,min=0,max=1,step=0.1,description='Similarity %')
rating = widgets.FloatSlider(value=0.5,min=0,max=1,step=0.1,description='Rating ')
views = widgets.FloatSlider(value=0.5,min=0,max=1,step=0.1,description='Views')
#output box
output = widgets.Textarea(
    value='',
    placeholder='Generating suggestions',
    description='',
    disabled=True,
    layout=widgets.Layout(width='300px', height='200px')
)

# update suggestion list
def update_list():
    temp_list = cosine_sim[get_index_from_name(select_anime.value,df)]
    for i in range(len(ranking_list)):
        ranking_list[i] = sim.value*temp_list[i] + (df["rating"][i]/10)*rating.value + (df["members"][i]/most_views)*views.value
    ranking_list_enum = list(enumerate(ranking_list))
    suggested = sorted(ranking_list_enum,key=lambda x:x[1],reverse=True)
    s = '\n Top 6 anime suggestions for ' + select_anime.value + ':'
    for i in range(6):
        s += '\n'+get_name_from_index(suggested[i][0],df)
    output.value = s

update_list()

In [8]:
# total number of anime in dataset
print("Number of animes in dataset : ",df.shape[0])

# on change function
def suggest(change):
    if change['type'] == 'change' and change['name'] == 'value':
        update_list()

select_anime.observe(suggest)
sim.observe(suggest)
rating.observe(suggest)
views.observe(suggest)
display(select_anime)
display(sim)
display(rating)
display(views)
display(output)

Number of animes in dataset :  12294


Dropdown(description='Choose an anime : ', index=5412, options=('&quot;0&quot;', '&quot;Aesop&quot; no Ohanash…

FloatSlider(value=1.0, description='Similarity %', max=1.0)

FloatSlider(value=0.5, description='Rating ', max=1.0)

FloatSlider(value=0.5, description='Views', max=1.0)

Textarea(value='', disabled=True, layout=Layout(height='200px', width='300px'), placeholder='Generating sugges…