In [None]:
# Import libraries for data handling and recommendation system

import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [None]:
# Load anime dataset

df = pd.read_csv("/content/anime.csv")
df.head()


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
# Check dataset shape and columns

df.shape


(12294, 7)

In [None]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [None]:
df["genre"] = df["genre"].fillna("Unknown")
df["rating"] = df["rating"].fillna(df["rating"].mean())
df["members"] = df["members"].fillna(df["members"].median())

df.head()

                                               # Fill missing values with appropriate defaults


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
# Select relevant features for recommendation

features = df[["genre", "rating", "members"]]
features.head()


Unnamed: 0,genre,rating,members
0,"Drama, Romance, School, Supernatural",9.37,200630
1,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26,793665
2,"Action, Comedy, Historical, Parody, Samurai, S...",9.25,114262
3,"Sci-Fi, Thriller",9.17,673572
4,"Action, Comedy, Historical, Parody, Samurai, S...",9.16,151266


In [None]:

# Convert categorical genre feature into numerical form

genre_dummies = df["genre"].str.get_dummies(sep=",")
genre_dummies.head()


Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,Shounen,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Unknown,Vampire,Yaoi
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Combine numerical and encoded categorical features

final_features = pd.concat(
    [genre_dummies, df[["rating", "members"]]],
    axis=1
)

final_features.head()


Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,Space,Sports,Super Power,Supernatural,Thriller,Unknown,Vampire,Yaoi,rating,members
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.37,200630
1,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,9.26,793665
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.25,114262
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.17,673572
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.16,151266


In [None]:
# Normalize numerical features to bring them to same scale

scaler = MinMaxScaler()
final_features[["rating", "members"]] = scaler.fit_transform(
    final_features[["rating", "members"]]
)

final_features.head()


Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,Space,Sports,Super Power,Supernatural,Thriller,Unknown,Vampire,Yaoi,rating,members
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.92437,0.197872
1,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0.911164,0.78277
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.909964,0.112689
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.90036,0.664325
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.89916,0.149186


In [None]:

# Compute cosine similarity between anime

similarity_matrix = cosine_similarity(final_features)
similarity_matrix.shape


(12294, 12294)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(final_features)
similarity_matrix.shape


(12294, 12294)

In [None]:
# Function to recommend similar anime

def recommend_anime(anime_title, top_n=5):
    idx_list = df.index[df["name"] == anime_title].tolist()

    if len(idx_list) == 0:
        return "Anime not found"

    idx = idx_list[0]
    scores = list(enumerate(similarity_matrix[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    rec_idx = [i[0] for i in scores[1:top_n+1]]
    return df.loc[rec_idx, "name"].tolist()



In [None]:
# Call the recommend_anime function to get recommendations
test_anime = df["name"].iloc[0]
print("Selected anime:", test_anime)

recommendations = recommend_anime(test_anime, 5)
print("Recommended anime:")
recommendations


Selected anime: Kimi no Na wa.
Recommended anime:


['Wind: A Breath of Heart OVA',
 'Wind: A Breath of Heart (TV)',
 'Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen',
 'Kokoro ga Sakebitagatterunda.',
 'Angel Beats!: Another Epilogue']

In [None]:
test_anime = df["name"].iloc[0]
print("Selected anime:", test_anime)

recommendations = recommend_anime(test_anime, 5)
print("Recommended anime:")
recommendations

Selected anime: Kimi no Na wa.
Recommended anime:


['Wind: A Breath of Heart OVA',
 'Wind: A Breath of Heart (TV)',
 'Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen',
 'Kokoro ga Sakebitagatterunda.',
 'Angel Beats!: Another Epilogue']

In [None]:
# Combine numerical and encoded categorical features

final_features = pd.concat(
    [genre_dummies, df[["rating", "members"]]],
    axis=1
)

final_features.head()

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,Space,Sports,Super Power,Supernatural,Thriller,Unknown,Vampire,Yaoi,rating,members
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.37,200630
1,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,9.26,793665
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.25,114262
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.17,673572
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9.16,151266


In [None]:
# Normalize numerical features to bring them to same scale

scaler = MinMaxScaler()
final_features[["rating", "members"]] = scaler.fit_transform(
    final_features[["rating", "members"]]
)

final_features.head()

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,Space,Sports,Super Power,Supernatural,Thriller,Unknown,Vampire,Yaoi,rating,members
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.92437,0.197872
1,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0.911164,0.78277
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.909964,0.112689
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.90036,0.664325
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.89916,0.149186


In [None]:
# Compute cosine similarity between anime

similarity_matrix = cosine_similarity(final_features)
similarity_matrix.shape

(12294, 12294)

In [None]:
# Load anime dataset
import pandas as pd
df = pd.read_csv("/content/anime.csv")
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
df["genre"] = df["genre"].fillna("Unknown")
df["rating"] = df["rating"].fillna(df["rating"].mean())
df["members"] = df["members"].fillna(df["members"].median())

df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


You can explicitly print the output of the `recommend_anime` function to ensure it's always displayed, regardless of how or where you call it within your code.

In [None]:
print(recommend_anime("One Piece", top_n=3))
print("\n---\n") # Separator for clarity
print(recommend_anime("Anime not in database"))

['One Piece: Episode of Nami - Koukaishi no Namida to Nakama no Kizuna', 'One Piece: Episode of Merry - Mou Hitori no Nakama no Monogatari', 'One Piece: Episode of Sabo - 3 Kyoudai no Kizuna Kiseki no Saikai to Uketsugareru Ishi']

---

Anime not found


In [None]:
# Test the recommendation system
test_name = df["name"].iloc[0]
print("Anime selected:", test_name)
print(recommend_anime(test_name, 5))


Anime selected: Kimi no Na wa.
['Wind: A Breath of Heart OVA', 'Wind: A Breath of Heart (TV)', 'Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen', 'Kokoro ga Sakebitagatterunda.', 'Angel Beats!: Another Epilogue']


In [None]:



recommend_anime("Naruto", top_n=5)


['Naruto: Shippuuden',
 'Naruto: Shippuuden Movie 4 - The Lost Tower',
 'Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono',
 'Boruto: Naruto the Movie',
 'Naruto x UT']

In [None]:
# Split data into train and test (index based)

train_idx, test_idx = train_test_split(
    df.index, test_size=0.2, random_state=42
)

print(train_idx.shape, test_idx.shape)

(9835,) (2459,)


In [None]:
# Dummy evaluation example (for academic purpose)

y_true = np.ones(len(test_idx))
y_pred = np.ones(len(test_idx))

print("Precision:", precision_score(y_true, y_pred))
print("Recall:", recall_score(y_true, y_pred))
print("F1 Score:", f1_score(y_true, y_pred))


Precision: 1.0
Recall: 1.0
F1 Score: 1.0
