# Recommendation System

## import dependencies

In [3]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import warnings 
warnings.simplefilter(action='ignore')

In [4]:
# Load the dataset
file_path = "anime.csv"  # Change this path if needed
df = pd.read_csv(file_path)

In [5]:
df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [7]:
df["episodes"] = pd.to_numeric(df["episodes"], errors="coerce")

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  11954 non-null  float64
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(2), int64(2), object(3)
memory usage: 672.5+ KB


In [9]:
df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes    340
rating      230
members       0
dtype: int64

In [10]:
# Handle missing values
df["genre"].fillna("Unknown", inplace=True)
df["type"].fillna("Unknown", inplace=True)
df["rating"].fillna(df["rating"].median(), inplace=True)
df["episodes"] = df.groupby("type")["episodes"].transform(lambda x: x.fillna(x.median()))

In [11]:
# Process genre: One-hot encoding
df["genre"] = df["genre"].apply(lambda x: x.split(", ") if x != "Unknown" else ["Unknown"])
mlb = MultiLabelBinarizer()
genre_encoded = pd.DataFrame(mlb.fit_transform(df["genre"]), columns=mlb.classes_, index=df.index)


In [12]:
# Encode 'type' column
label_encoder = LabelEncoder()
df["type_encoded"] = label_encoder.fit_transform(df["type"])

In [13]:
# Transform and normalize numerical features
df["members"] = np.log1p(df["members"])  # Log-transform to reduce skewness

scaler = MinMaxScaler()
df[["rating", "members", "episodes"]] = scaler.fit_transform(df[["rating", "members", "episodes"]])

# Combine features for similarity calculation
df_encoded = pd.concat([df[["rating", "members", "episodes", "type_encoded"]], genre_encoded], axis=1)

In [14]:
# Ensure no NaN values before computing similarity
df_encoded.fillna(0, inplace=True)

In [15]:
# Compute cosine similarity matrix
cosine_sim = cosine_similarity(df_encoded)

In [16]:
# Recommendation function
def get_recommendations(anime_name, top_n=5):
    if anime_name not in df["name"].values:
        return f"Anime '{anime_name}' not found in dataset."
    
    # Get index of the anime
    idx = df[df["name"] == anime_name].index[0]
    
    # Get similarity scores and sort
    sim_scores = sorted(enumerate(cosine_sim[idx]), key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    # Get recommended anime indices
    anime_indices = [i[0] for i in sim_scores]
    
    return df["name"].iloc[anime_indices].tolist()

In [48]:
# Interactive input loop
while True:
    selected_anime = input("Enter anime name (or type 'exit' to quit): ")
    if selected_anime.lower() == 'exit':
        break
    
    recommendations = get_recommendations(selected_anime)
    print("Recommended anime:", recommendations)

Enter anime name (or type 'exit' to quit):  Dragon Ball Z


Recommended anime: ['Dragon Ball Kai', 'Dragon Ball Super', 'Dragon Ball Kai (2014)', 'Dragon Ball GT: Goku Gaiden! Yuuki no Akashi wa Suushinchuu', 'Dragon Ball Z: Summer Vacation Special']


Enter anime name (or type 'exit' to quit):  exit
