In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv(r"C:\Users\NIHAL_MIRAJ\Desktop\Recommendation System\anime.csv")

In [3]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [20]:
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
import numpy as np

# Data Preprocessing

# Handle missing values
df['genre'].fillna('Unknown', inplace=True)  # Replace missing genres with 'Unknown'
df['type'].fillna('Unknown', inplace=True)   
df['rating'].fillna(df['rating'].mean(), inplace=True)

In [21]:
df['episodes'] = pd.to_numeric(df['episodes'], errors='coerce').fillna(0)

In [22]:
# Process genres: split into lists
df['genre_list'] = df['genre'].apply(lambda x: x.split(', '))

In [23]:
# MultiLabelBinarizer for genres
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(df['genre_list'])

In [24]:
# Normalize rating and members
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(df[['rating', 'members']])

In [25]:
# Combine all features
combined_features = np.hstack([genre_matrix, normalized_features])

In [26]:
combined_features.shape

(12210, 45)

In [27]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(combined_features)

In [28]:
def recommend_anime(anime_title, df, cosine_sim, top_n=10):
    """
    Recommends similar anime based on cosine similarity.
    """
    if anime_title not in df['name'].values:
        return f"Anime '{anime_title}' not found in the dataset."

    # Find the index of the anime
    idx = df[df['name'] == anime_title].index[0]

    # Get similarity scores for the anime
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity score (descending order)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of the top N similar animes (excluding the anime itself)
    top_indices = [i[0] for i in sim_scores[1:top_n + 1]]

    # Return the top recommendations
    return df.iloc[top_indices][['name', 'genre', 'rating', 'members']]

In [29]:
recommendations = recommend_anime("Kimi no Na wa.", df, cosine_sim)
recommendations

Unnamed: 0,name,genre,rating,members
5805,Wind: A Breath of Heart OVA,"Drama, Romance, School, Supernatural",6.35,2043
6394,Wind: A Breath of Heart (TV),"Drama, Romance, School, Supernatural",6.14,7778
1111,Aura: Maryuuin Kouga Saigo no Tatakai,"Comedy, Drama, Romance, School, Supernatural",7.67,22599
504,"Clannad: After Story - Mou Hitotsu no Sekai, K...","Drama, Romance, School",8.02,138364
208,Kokoro ga Sakebitagatterunda.,"Drama, Romance, School",8.32,59652
1201,Angel Beats!: Another Epilogue,"Drama, School, Supernatural",7.63,134180
1435,True Tears,"Drama, Romance, School",7.55,118644
1907,Myself; Yourself,"Drama, Romance, School",7.41,115075
1631,Kimikiss Pure Rouge,"Drama, Romance, School",7.48,58211
2300,Koi to Senkyo to Chocolate,"Drama, Romance, School",7.3,91552


In [30]:
# On-demand similarity computation
def recommend_anime_ondemand(anime_title, df, features, top_n=10):
    """
    Recommends similar anime based on cosine similarity, computed on demand.
    """
    if anime_title not in df['name'].values:
        return f"Anime '{anime_title}' not found in the dataset."

    # Find the index of the anime
    idx = df[df['name'] == anime_title].index[0]

    # Get the features of the target anime
    target_features = features[idx].reshape(1, -1)

    # Compute cosine similarity for the target anime only
    sim_scores = cosine_similarity(target_features, features)[0]

    # Pair scores with indices and sort by similarity
    sim_scores = list(enumerate(sim_scores))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of the top N similar animes (excluding the anime itself)
    top_indices = [i[0] for i in sim_scores[1:top_n + 1]]

    # Return the top recommendations
    return df.iloc[top_indices][['name', 'genre', 'rating', 'members']]

In [31]:
recommendations = recommend_anime_ondemand("Kimi no Na wa.", df, combined_features)
recommendations

Unnamed: 0,name,genre,rating,members
5805,Wind: A Breath of Heart OVA,"Drama, Romance, School, Supernatural",6.35,2043
6394,Wind: A Breath of Heart (TV),"Drama, Romance, School, Supernatural",6.14,7778
1111,Aura: Maryuuin Kouga Saigo no Tatakai,"Comedy, Drama, Romance, School, Supernatural",7.67,22599
504,"Clannad: After Story - Mou Hitotsu no Sekai, K...","Drama, Romance, School",8.02,138364
208,Kokoro ga Sakebitagatterunda.,"Drama, Romance, School",8.32,59652
1201,Angel Beats!: Another Epilogue,"Drama, School, Supernatural",7.63,134180
1435,True Tears,"Drama, Romance, School",7.55,118644
1907,Myself; Yourself,"Drama, Romance, School",7.41,115075
1631,Kimikiss Pure Rouge,"Drama, Romance, School",7.48,58211
2300,Koi to Senkyo to Chocolate,"Drama, Romance, School",7.3,91552
