In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from scipy.sparse import hstack
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the data
anime_df = pd.read_csv('D:\\anime_recomendation_system\\anime.csv')
anime_synopsis_df = pd.read_csv('D:\\anime_recomendation_system\\anime_with_synopsis.csv')
anime_ratings_df = pd.read_csv('D:\\anime_recomendation_system\\rating.csv')

In [3]:
anime_df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [4]:
anime_synopsis_df

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...
...,...,...,...,...,...
16209,48481,Daomu Biji Zhi Qinling Shen Shu,Unknown,"Adventure, Mystery, Supernatural",No synopsis information has been added to this...
16210,48483,Mieruko-chan,Unknown,"Comedy, Horror, Supernatural",ko is a typical high school student whose life...
16211,48488,Higurashi no Naku Koro ni Sotsu,Unknown,"Mystery, Dementia, Horror, Psychological, Supe...",Sequel to Higurashi no Naku Koro ni Gou .
16212,48491,Yama no Susume: Next Summit,Unknown,"Adventure, Slice of Life, Comedy",New Yama no Susume anime.


In [5]:
anime_synopsis_df = anime_synopsis_df.rename(columns={'Name': 'name'})

In [6]:
# Strip whitespace and convert to lowercase for consistent matching
anime_df['name'] = anime_df['name'].str.strip().str.lower()
anime_synopsis_df['name'] = anime_synopsis_df['name'].str.strip().str.lower()

In [7]:
# Merge the datasets on 'anime_name'
merged_df = pd.merge(anime_df, anime_synopsis_df, on='name', how='inner')

# Display the first few rows to verify the merge

merged_df.head(10
            )

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,MAL_ID,Score,Genres,sypnopsis
0,32281,kimi no na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,32281,8.96,"Romance, Supernatural, School, Drama","suha Miyamizu, a high school girl, yearns to l..."
1,5114,fullmetal alchemist: brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,5114,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...","""In order for something to be obtained, someth..."
2,28977,gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,28977,9.1,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f..."
3,9253,steins;gate,"Sci-Fi, Thriller",TV,24,9.17,673572,9253,9.11,"Thriller, Sci-Fi",The self-proclaimed mad scientist Rintarou Oka...
4,11061,hunter x hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855,11061,9.1,"Action, Adventure, Fantasy, Shounen, Super Power",Hunter x Hunter is set in a world where Hunter...
5,820,ginga eiyuu densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,9.11,80679,820,9.07,"Military, Sci-Fi, Space, Drama",The 150-year-long stalemate between the two in...
6,4181,clannad: after story,"Drama, Fantasy, Romance, Slice of Life, Supern...",TV,24,9.06,456749,4181,8.96,"Slice of Life, Comedy, Supernatural, Drama, Ro...","Clannad: After Story , the sequel to the criti..."
7,28851,koe no katachi,"Drama, School, Shounen",Movie,1,9.05,102733,28851,9.0,"Drama, School, Shounen","s a wild youth, elementary school student Shou..."
8,918,gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,201,9.04,336376,918,8.96,"Action, Comedy, Historical, Parody, Samurai, S...","The Amanto, aliens from outer space, have inva..."
9,2904,code geass: hangyaku no lelouch r2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,8.98,572888,2904,8.91,"Action, Military, Sci-Fi, Super Power, Drama, ...","One year has passed since the Black Rebellion,..."


In [8]:
# Check for any duplicates
print(merged_df.duplicated(subset='name').sum())

# If duplicates are found, remove them
merged_df = merged_df.drop_duplicates(subset='name')

# Final check
merged_df.head()


2


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,MAL_ID,Score,Genres,sypnopsis
0,32281,kimi no na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,32281,8.96,"Romance, Supernatural, School, Drama","suha Miyamizu, a high school girl, yearns to l..."
1,5114,fullmetal alchemist: brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,5114,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...","""In order for something to be obtained, someth..."
2,28977,gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,28977,9.1,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f..."
3,9253,steins;gate,"Sci-Fi, Thriller",TV,24,9.17,673572,9253,9.11,"Thriller, Sci-Fi",The self-proclaimed mad scientist Rintarou Oka...
4,11061,hunter x hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855,11061,9.1,"Action, Adventure, Fantasy, Shounen, Super Power",Hunter x Hunter is set in a world where Hunter...


In [9]:
# Drop specific columns from the merged DataFrame
columns_to_drop = ['genre', 'Score','MAL_ID']  # Replace with actual column names you want to drop
merged_df = merged_df.drop(columns=columns_to_drop)

In [10]:
merged_df

Unnamed: 0,anime_id,name,type,episodes,rating,members,Genres,sypnopsis
0,32281,kimi no na wa.,Movie,1,9.37,200630,"Romance, Supernatural, School, Drama","suha Miyamizu, a high school girl, yearns to l..."
1,5114,fullmetal alchemist: brotherhood,TV,64,9.26,793665,"Action, Military, Adventure, Comedy, Drama, Ma...","""In order for something to be obtained, someth..."
2,28977,gintama°,TV,51,9.25,114262,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f..."
3,9253,steins;gate,TV,24,9.17,673572,"Thriller, Sci-Fi",The self-proclaimed mad scientist Rintarou Oka...
4,11061,hunter x hunter (2011),TV,148,9.13,425855,"Action, Adventure, Fantasy, Shounen, Super Power",Hunter x Hunter is set in a world where Hunter...
...,...,...,...,...,...,...,...,...
10041,730,houkago no shokuinshitsu,OVA,2,5.23,4550,"Drama, Romance, Shounen Ai",Two teachers and their ups and downs of being ...
10042,6546,pico: my little summer story,OVA,1,5.21,5551,Yaoi,dited version of the first OVA titled pico~ Bo...
10043,30663,docchi mo maid,ONA,1,5.15,223,"Action, Comedy, Magic, Ecchi, Shoujo Ai","Izumi, a 12-year-old schoolgirl, suddenly gets..."
10044,34382,citrus,,Unknown,,2459,"Drama, Romance, School, Shoujo Ai",During the summer of her freshman year of high...


In [11]:
#checking null values
merged_df.isnull().sum()

anime_id       0
name           0
type          15
episodes       0
rating       152
members        0
Genres         0
sypnopsis      0
dtype: int64

In [12]:
merged_df = merged_df.dropna()
merged_df['type'].fillna('Unknown', inplace=True)
merged_df.isnull().sum().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['type'].fillna('Unknown', inplace=True)


0

In [13]:
# Standardizing text data
merged_df['sypnopsis'] = merged_df['sypnopsis'].str.lower().str.replace('[^\w\s]', '')

# Standardize Genres
merged_df['Genres'] = merged_df['Genres'].apply(lambda x: x.lower().strip())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['sypnopsis'] = merged_df['sypnopsis'].str.lower().str.replace('[^\w\s]', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['Genres'] = merged_df['Genres'].apply(lambda x: x.lower().strip())


In [14]:
# TF-IDF Vectorizer for synopsis
tfidf = TfidfVectorizer(stop_words='english')
merged_df['sypnopsis'] = merged_df['sypnopsis'].fillna('')
tfidf_matrix = tfidf.fit_transform(merged_df['sypnopsis'])

# One-Hot Encoding for genres
mlb = MultiLabelBinarizer()
merged_df['Genres'] = merged_df['Genres'].apply(lambda x: x.split(', ') if isinstance(x, str) else [])
genre_matrix = mlb.fit_transform(merged_df['Genres'])


feature_matrix = hstack([tfidf_matrix, genre_matrix])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['sypnopsis'] = merged_df['sypnopsis'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['Genres'] = merged_df['Genres'].apply(lambda x: x.split(', ') if isinstance(x, str) else [])


In [15]:
# Compute cosine similarity matrix for synopses
anime_similarity = cosine_similarity(feature_matrix, feature_matrix)


In [39]:
def get_anime_recommendations(title, num_recommendations=10):
    try:
        idx = merged_df[merged_df['name'] == title].index[0]
    except IndexError:
        return f"No anime found with the title '{title}'. Please check the title and try again."

    # Get similarity scores
    sim_scores = list(enumerate(anime_similarity[idx]))

    # Sort by similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top recommendations, excluding the first one since it's the anime itself
    sim_scores = sim_scores[1:num_recommendations + 1]

    # Get anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the titles of recommended animes
    return merged_df['name'].iloc[anime_indices]

# Example usage
recommendations = get_anime_recommendations('boruto: naruto the movie')
print(recommendations)


983     boruto: naruto the movie - naruto ga hokage ni...
1306          naruto: shippuuden movie 4 - the lost tower
1396    naruto: shippuuden movie 3 - hi no ishi wo tsu...
2655    naruto soyokazeden movie: naruto to mashin to ...
1189                                          naruto x ut
752                                                naruto
560                                    naruto: shippuuden
157                                katekyo hitman reborn!
7025                        battle spirits: ryuuko no ken
6821                              kyutai panic adventure!
Name: name, dtype: object
