In [16]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
anime_data = pd.read_csv('anime.csv')
rating_data = pd.read_csv('rating.csv')

In [10]:
anime_data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [11]:
rating_data.tail()

Unnamed: 0,user_id,anime_id,rating
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9
7813736,73516,8074,9


In [3]:
# Remove animes with missing ratings (-1)
rating_data = rating_data[rating_data['rating'] != -1]

# Merge anime and rating data based on anime ID
merged_data = pd.merge(anime_data, rating_data, on='anime_id')

In [4]:
anime_data = []
rating_data = []

In [15]:
merged_data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating_x,members,user_id,rating_y
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,99,5
1,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,152,10
2,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,244,10
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,271,10
4,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,322,10


In [5]:
# rating y => user rating rating x => anime rating
ratings_matrix = merged_data.pivot_table(index='user_id', columns='anime_id', values='rating_y')

In [6]:
ratings_matrix.shape

(69600, 9926)

In [9]:
#only use half of the data
ratings_matrix = ratings_matrix.iloc[:30000, :]

In [10]:
ratings_matrix.shape

(30000, 9926)

In [8]:
merged_data = []

In [17]:
#mem lack
user_similarity = cosine_similarity(ratings_matrix.fillna(0))

In [13]:
# Convert ratings matrix to sparse format
sparse_ratings = ratings_matrix.fillna(0).values

# Create Nearest Neighbors model
nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
nn_model.fit(sparse_ratings)

In [18]:
def get_similar_users(user_id, n_neighbors=5):
    # Find the nearest neighbors for the given user
    distances, indices = nn_model.kneighbors([sparse_ratings[user_id]], n_neighbors+1)
    # Exclude the first nearest neighbor (itself)
    distances = distances.flatten()[1:]
    indices = indices.flatten()[1:]
    return indices, distances

def make_recommendations(user_id, ratings_matrix, n_recommendations=5):
    similar_users, _ = get_similar_users(user_id)
    
    user_ratings = ratings_matrix.loc[user_id]
    recommendations = []
    
    for similar_user in similar_users:
        similar_user_ratings = ratings_matrix.loc[similar_user]
        unrated_animes = similar_user_ratings[similar_user_ratings == 0].index
        
        for anime_id in unrated_animes:
            rating = similar_user_ratings[anime_id]
            recommendations.append((anime_id, rating))
            
            if len(recommendations) == n_recommendations:
                return recommendations
    
    return recommendations

In [22]:
user_id = 20  # Example user ID
recommendations = make_recommendations(user_id, ratings_matrix, user_similarity)

# Print the recommended animes
print(f"Recommended animes for user {user_id}:")
for anime_id, rating in recommendations:
    anime_name = anime_data[anime_data['id_anime'] == anime_id]['name'].values[0]
    print(f"Anime ID: {anime_id}, Name: {anime_name}, Similarity Rating: {rating}")

Recommended animes for user 20:
