<a href="https://www.kaggle.com/code/ricksan4ez/anime-recommendation?scriptVersionId=91400345" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

<p style="text-align:center;"><img src="https://images-na.ssl-images-amazon.com/images/I/51NaGWmhJ2L.__AC_SY300_QL70_ML2_.jpg" alt="HTML5 Doctor Logo" /></p>

<h3 style="text-align:center;"> Import </h3>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

<h3 style="text-align:center;"> Upload datasets </h3>

In [None]:
ratings = pd.read_csv('/kaggle/input/anime-recommendations-database/rating.csv')
anime   = pd.read_csv('/kaggle/input/anime-recommendations-database/anime.csv')

<h1 style="text-align:center; color:green" id="EDA"> EDA part </h1>

<h3 style="text-align:center;"> Preview Ratings dataset </h3>

In [None]:
ratings.head()

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              Replace "rating" column with "user_rating" because the anime dataset already has "rating column". Looking for merging them 🙃
</p>
</div>

In [None]:
ratings['user_rating'] = ratings['rating']
ratings.drop('rating', axis=1, inplace=True)
ratings.head()

<h3 style="text-align:center;"> Preview Anime dataset </h3>

In [None]:
anime.head()

<h3 style="text-align:center;"> View the number of rows and columns in our dataset </h3>

In [None]:
print(f'Shape of ratings: {ratings.shape}\nShape of anime: {anime.shape}')

<h3 style="text-align:center;"> View the number of unique values for each column </h3>

In [None]:
anime.nunique()

<h3 style="text-align:center;"> "name" column analysis </h3>

In [None]:
anime['name'].isna().sum()

In [None]:
anime[anime.duplicated(['name'])]

In [None]:
anime[(anime['name'] == 'Saru Kani Gassen') | (anime['name'] == 'Shi Wan Ge Leng Xiaohua')]

<h3 style="text-align:center;"> "genre" column analysis </h3>

In [None]:
anime['genre'].isna().sum()

In [None]:
anime['genre'].value_counts().sort_values(ascending=False).head(20)

In [None]:

plt.figure(figsize=(15, 8))
plt.title('Most popular genres')
sns.countplot(x='genre', data=anime, palette="Greens_d",
              order=anime['genre'].value_counts().iloc[:15].index)
plt.xticks(rotation=45)
plt.show()

<h3 style="text-align:center;"> "type" column analysis </h3>

In [None]:
anime['type'].isna().sum()

In [None]:
plt.figure(figsize=(15, 5))
plt.title('Number of each type')
sns.countplot(y='type', data=anime, palette='rocket')
plt.show()

<h3 style="text-align:center;"> "rating" column analysis </h3>

In [None]:
anime.dropna(inplace=True)

In [None]:
anime['rating'].value_counts().sort_values(ascending=False).head(15)

In [None]:
anime['rating'].isna().sum()

In [None]:
anime['rounded_rating'] = anime['rating'].apply(lambda x: round(x))
anime.head()

In [None]:
plt.figure(figsize=(15,6))
sns.countplot(data=anime, x='rounded_rating')
plt.title('Number of each rating')
plt.show()

In [None]:
ratings.head()

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              When the users didn't leave rating it was set to -1, so lets drop them
</p>
</div>

In [None]:
ratings['user_rating'] = ratings['user_rating'].apply(lambda x: np.nan if x == -1 else x)
ratings.dropna(inplace=True)
ratings.head()

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              Getting the average user rating to each anime
</p>
</div>

In [None]:
user_ratings = ratings.groupby(['anime_id'], as_index=False)['user_rating'].mean()

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              Merging datasets
</p>
</div>

In [None]:
anime_user_rating = pd.merge(anime, user_ratings, on='anime_id')

In [None]:
anime_user_rating.head(3)

<h1 style="text-align:center; color:blue" id="recommendation"> Recommendation engine </h1>

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              Choosing the columns that I think are important
</p>
</div>

In [None]:
columns = ['name', 'genre', 'type', 'rating', 'user_rating']

In [None]:
anime_user_rating[columns].isna().sum()

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              Create the column with all these important columns together
</p>
</div>

In [None]:
def get_important_features(data):
    important_features = []
    for i in range(0, data.shape[0]):
                   important_features.append(data['name'][i]+' '+data['genre'][i]+' '+data['type'][i]+' '+str(data['rating'][i])+' '+str(data['user_rating'][i]))
                   
    return important_features

In [None]:
anime_user_rating['important_features'] = get_important_features(anime_user_rating)

In [None]:
anime_user_rating.head(3)

In [None]:
cm = CountVectorizer().fit_transform(anime_user_rating['important_features'])

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              This matrix shows the similarity of anime. For instance: first anime similar to second one as  0.08...
</p>
</div>

In [None]:
cs = cosine_similarity(cm)
print(cs)

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              I only watched some series of Naruto so I want to find something similar to it.
</p>
</div>

In [None]:
title = 'Naruto'
anime_id = anime_user_rating[anime_user_rating['name'] == title]['anime_id'].values[0]

In [None]:
scores = list(enumerate(cs[anime_id]))

In [None]:
sorted_scores = sorted(scores, key = lambda x: x[1], reverse=True)
sorted_scores = sorted_scores[1:]

In [None]:
j = 0
print('The 5 most recommended anime to', title, 'are:\n' )
for item in sorted_scores:
    movie_title = anime_user_rating[anime_user_rating['anime_id'] == item[0]]['name'].values[0]
    print(j+1, movie_title)
    j += 1
    if j > 4:
        break

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 10px;
              color:white;">

              According to the recommendations, Ghost in the Shell will be nice choise for me🙃
              May be I will try it and evaluate the accuracy of this system.
    
              For now, thanks a lot for your time. I hope you found something interesting for you here.                     Please, let me know if I made something wrong or something can be improved. Thanks.
</p>
</div>