# Anime Rcommender System

In [29]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

In [2]:
# Load the dataset
anime = pd.read_csv('anime.csv')
ratings = pd.read_csv('rating.csv')

In [13]:
# Merge the datasets
data = ratings.merge(anime, on='anime_id', how='inner', suffixes=('_user', '_anime'))

In [14]:
# Resolve duplicates by averaging the ratings
data = data.groupby(['user_id', 'name'], as_index=False).agg({
    'rating_user': 'mean',
    'anime_id': 'first',  # Retain the first anime_id (since it's consistent per anime)
    'genre': 'first',     # Retain other columns if needed
    'type': 'first',
    'episodes': 'first',
    'rating_anime': 'first',
    'members': 'first'
})

In [15]:
# Verify that duplicates are resolved
print("Data after resolving duplicates:")
print(data[data.duplicated(subset=['user_id', 'name'], keep=False)])  # Should be empty

Data after resolving duplicates:
Empty DataFrame
Columns: [user_id, name, rating_user, anime_id, genre, type, episodes, rating_anime, members]
Index: []


In [23]:
# Replace -1.0 with NaN in the rating column
data['rating_user'] = data['rating_user'].replace(-1.0, np.nan)

In [24]:
# Check the distribution after replacing -1.0
print(data['rating_user'].value_counts())


rating_user
8.0     1646014
7.0     1375288
9.0     1254093
10.0     955714
6.0      637773
5.0      282805
4.0      104291
3.0       41453
2.0       23150
1.0       16649
6.5           1
8.5           1
Name: count, dtype: int64


In [38]:
# Create the user-item matrix
# user_item_matrix = data.pivot(index='user_id', columns='name', values='rating_user').fillna(0)
user_item_matrix = data.pivot(index='user_id', columns='name', values='rating_user')

In [39]:
# Convert to a NumPy array
ratings_matrix = user_item_matrix.to_numpy()

# Check the shape of the ratings matrix
print("\nShape of the Ratings Matrix:", ratings_matrix.shape)


Shape of the Ratings Matrix: (73515, 11196)


In [30]:
# Replace NaN values with 0 (for the purpose of applying SVD)
user_item_matrix_filled = user_item_matrix.fillna(0)

# Convert the user-item matrix to a sparse matrix format
ratings_matrix_sparse = csr_matrix(user_item_matrix_filled.values)

print("Sparse Ratings Matrix:")
print(ratings_matrix_sparse)

Sparse Ratings Matrix:
  (0, 3695)	10.0
  (0, 3699)	10.0
  (0, 3711)	10.0
  (0, 9517)	10.0
  (1, 5345)	10.0
  (2, 130)	7.0
  (2, 152)	6.0
  (2, 264)	8.0
  (2, 304)	7.0
  (2, 445)	10.0
  (2, 454)	8.0
  (2, 473)	8.0
  (2, 474)	7.0
  (2, 975)	6.0
  (2, 1026)	10.0
  (2, 1128)	8.0
  (2, 1174)	8.0
  (2, 1561)	6.0
  (2, 1729)	4.0
  (2, 1828)	6.0
  (2, 1835)	10.0
  (2, 1837)	6.0
  (2, 1936)	5.0
  (2, 1962)	7.0
  (2, 1963)	8.0
  :	:
  (73513, 9779)	9.0
  (73513, 9985)	9.0
  (73513, 9992)	9.0
  (73513, 9994)	8.0
  (73513, 10056)	10.0
  (73513, 10057)	10.0
  (73513, 10086)	8.0
  (73513, 10191)	8.0
  (73513, 10200)	7.0
  (73513, 10215)	10.0
  (73513, 10217)	9.0
  (73513, 10257)	8.0
  (73513, 10496)	9.0
  (73513, 10497)	9.0
  (73513, 10557)	8.0
  (73513, 10703)	9.0
  (73513, 10711)	10.0
  (73513, 10713)	7.0
  (73513, 10750)	7.0
  (73513, 10754)	7.0
  (73513, 11111)	9.0
  (73513, 11151)	9.0
  (73513, 11188)	9.0
  (73514, 2383)	9.0
  (73514, 3711)	9.0


In [44]:
# Apply Singular Value Decomposition (SVD)
svd = TruncatedSVD(n_components=50, random_state=42)
svd.fit(ratings_matrix_sparse)

# Get the decomposed matrices
U = svd.transform(ratings_matrix_sparse)  # User matrix
Sigma = np.diag(svd.singular_values_)     # Sigma matrix
Vt = svd.components_                      # Item matrix (transposed)

# Reconstruct the ratings matrix
ratings_pred = np.dot(U, np.dot(Sigma, Vt))

# print("\nReconstructed Ratings Matrix (Predictions):")
# print(ratings_pred)

In [34]:
import html

# Decode HTML entities in column names
user_item_matrix.columns = [html.unescape(col) for col in user_item_matrix.columns]

# Now print the columns to see the decoded anime names
print(user_item_matrix.columns.tolist())

# Search for the specific anime name in the columns
anime_name = '"Aesop" no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu'
matching_animes = [col for col in user_item_matrix.columns if anime_name.lower() in col.lower()]
print(f"Found matching anime names: {matching_animes}")


['"0"', '"Aesop" no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu', '"Bungaku Shoujo" Kyou no Oyatsu: Hatsukoi', '"Bungaku Shoujo" Memoire', '"Bungaku Shoujo" Movie', '"Eiji"', '.hack//G.U. Returner', '.hack//G.U. Trilogy', '.hack//G.U. Trilogy: Parody Mode', '.hack//Gift', '.hack//Intermezzo', '.hack//Liminality', '.hack//Quantum', '.hack//Quantum: Sore ike! Bokura no Chimuchimu-chan!!', '.hack//Roots', '.hack//Sign', '.hack//Tasogare no Udewa Densetsu', '.hack//Tasogare no Udewa Densetsu: Offline de Aimashou', '.hack//The Movie: Sekai no Mukou ni', '.hack//Unison', '.hack//Versus: The Thanatos Report', '0-sen Hayato', '0-sen Hayato Pilot', '001', '009 Re:Cyborg', '009-1', '009-1: R&B', '00:08', '07-Ghost', '1+2=Paradise', '100%', '100-man-nen Chikyuu no Tabi: Bander Book', '1000-nen Joou: Queen Millennia', '1001 Nights', '11-nin Iru!', '11eyes', '11eyes Picture Drama', '11eyes: Momoiro Genmutan', '12-gatsu no Uta', '12-sai.', '12-sai. 2nd Season', '12-sai.: Chicchana Mune no Tokimeki',

In [37]:
def predict_rating(user_id, item_name):
    # Get the index for the user and the item
    user_index = user_item_matrix.index.get_loc(user_id)
    item_index = user_item_matrix.columns.get_loc(item_name)
    
    # Return the predicted rating
    return ratings_pred[user_index, item_index]

# Example prediction
predicted_rating = predict_rating(1, '"Aesop" no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu')

# Check if the prediction was successful
if predicted_rating is not None:
    print(f"Predicted rating for user 1 on the anime 'Aesop no Ohanashi': {predicted_rating}")
else:
    print("Could not predict the rating for this anime.")


Predicted rating for user 1 on the anime 'Aesop no Ohanashi': 0.009562033935853777


In [40]:
def predict_rating(user_id, anime_name):
    # Get the index of the user_id in the user-item matrix
    user_index = user_item_matrix.index.get_loc(user_id)
    
    # Get the index of the anime_name in the user-item matrix
    item_index = user_item_matrix.columns.get_loc(anime_name)
    
    # Predict the rating
    return ratings_pred[user_index, item_index]

def recommend_anime(user_id, top_n=5):
    # Get the list of all anime titles the user has rated
    rated_animes = data[data['user_id'] == user_id]['name'].values
    
    # Get a list of all anime titles
    all_animes = user_item_matrix.columns
    
    # Find all anime that the user has not rated
    unseen_animes = [anime for anime in all_animes if anime not in rated_animes]
    
    # Predict the rating for each unseen anime
    predictions = {}
    for anime in unseen_animes:
        predicted_rating = predict_rating(user_id, anime)
        predictions[anime] = predicted_rating
    
    # Sort the predictions by rating (highest first)
    recommended_animes = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
    
    # Get the top N recommended anime titles
    top_recommended_animes = [anime for anime, _ in recommended_animes[:top_n]]
    
    return top_recommended_animes


In [49]:
# Example usage: Recommend top anime for user_id
user_id = 7  # user_id you want to recommend for
top_n = 10  # Number of top anime recommendations you want

recommended_animes = recommend_anime(user_id, top_n)

# Display recommended anime titles
print(f"Top {top_n} recommended anime for user {user_id}:")
for i, anime in enumerate(recommended_animes, 1):
    print(f"{i}. {anime}")

Top 10 recommended anime for user 7:
1. Shingeki no Kyojin
2. Angel Beats!
3. Toradora!
4. Elfen Lied
5. Steins;Gate
6. Fullmetal Alchemist: Brotherhood
7. Highschool of the Dead
8. Tengen Toppa Gurren Lagann
9. Yahari Ore no Seishun Love Comedy wa Machigatteiru.
10. Sakurasou no Pet na Kanojo
