In [1]:
import pandas as pd
rating_df = pd.read_csv('rating.csv')
print(rating_df.shape)
print(rating_df.head())

(7813737, 3)
   user_id  anime_id  rating
0        1        20      -1
1        1        24      -1
2        1        79      -1
3        1       226      -1
4        1       241      -1


In [2]:
#The data is too large. So, some users are removed to create a more simpler and easier to compute data.
rating_df = rating_df[rating_df['user_id'] <= 1000]
#Normalizing the range of rating.
rating_df['rating'] = rating_df['rating'].apply(lambda x: 0 if x == -1 else x // 2)
rating_df['rating'].unique()
#rating_df = rating_df.reset_index()

array([0, 5, 4, 3, 1, 2], dtype=int64)

In [3]:
#The rating data now has 5000 movies and 1000 users
rating_df['anime_id'].unique().shape

(5143,)

In [4]:
rating_df[rating_df['user_id'] == 101]

Unnamed: 0,user_id,anime_id,rating
8278,101,20,5
8279,101,269,5


In [5]:
rating_df.shape

(96479, 3)

In [6]:
rating_df['rating'].unique()

array([0, 5, 4, 3, 1, 2], dtype=int64)

In [7]:
rating_df['anime_id'].unique().shape

(5143,)

In [8]:
import numpy as np
# Create User-Item Matrix
user_item_matrix = rating_df.pivot_table(index='user_id', columns='anime_id', values='rating').fillna(0).values

# Apply SVD
U, sigma, Vt = np.linalg.svd(user_item_matrix, full_matrices=False)

# Convert Sigma to diagonal matrix
sigma_diag = np.diag(sigma)

# Reconstruct the matrix
predicted_ratings = np.dot(np.dot(U, sigma_diag), Vt)

In [10]:
print(f'Shape of the user-movie matrix: {predicted_ratings.shape}')
predicted_ratings

Shape of the user-movie matrix: (1000, 5143)


array([[ 1.00887181e-14, -5.41233725e-16,  3.52669283e-15, ...,
        -2.60208521e-17, -3.00107161e-16,  2.40779618e-15],
       [ 7.42461648e-15, -5.86336535e-15, -2.44110288e-14, ...,
         2.95336672e-15,  3.99322502e-15,  5.71331177e-15],
       [-1.65623808e-14, -6.82090560e-15, -4.43417005e-15, ...,
        -5.54948882e-16, -4.13623129e-17,  3.98818348e-15],
       ...,
       [-2.63244287e-15,  1.10653674e-15, -2.99825269e-15, ...,
         4.21646225e-16, -8.15753715e-16, -1.44849410e-16],
       [-3.48852891e-15, -3.27602528e-15, -1.96457434e-15, ...,
         2.02745806e-16, -7.11236625e-17, -1.78329573e-15],
       [ 4.53716925e-15, -1.85170889e-15,  2.97743601e-15, ...,
         2.74899461e-16,  8.61832307e-16, -5.66820896e-16]])

In [11]:
'''The obtained data is a 2d matrix. It has user id and movie id as it rows and columns.
We can use the matrix to predict a rating score of a movie for an given individual regardless of whether
he/she has watched the movie or not. Here, we created mappings for user ids and anime ids to gain some insights about the 
predicted ratings generated via SVD.
'''
user_ids = rating_df['user_id'].unique()
anime_ids = rating_df['anime_id'].unique()
user_id_to_index = {user_id: idx for idx, user_id in enumerate(user_ids)}
anime_id_to_index = {anime_id: idx for idx, anime_id in enumerate(anime_ids)}
print(anime_id_to_index[269])
#print(user_id_to_index)
#print(anime_id_to_index)
predicted_rating_example = predicted_ratings[ user_id_to_index[101],anime_id_to_index[269]]
print(predicted_rating_example)

298
1.2394599235854287e-15


In [12]:
import pandas as pd
df = pd.read_csv('anime.csv')
anime_ids_set = set(rating_df['anime_id'])
df = df[df['anime_id'].apply(lambda x: x in anime_ids_set)]
#df = df[df['anime_id'].apply(lambda x: )]
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5143 entries, 0 to 12223
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  5143 non-null   int64  
 1   name      5143 non-null   object 
 2   genre     5143 non-null   object 
 3   type      5143 non-null   object 
 4   episodes  5143 non-null   object 
 5   rating    5143 non-null   float64
 6   members   5143 non-null   int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 321.4+ KB


In [14]:
df.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [15]:
#Dropping the rows whose anime ratings are unknown.
df = df.dropna(subset=['rating', 'genre'])

In [16]:
df.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [17]:
df.columns

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
df['combined_features'] = df['genre'] + ' ' + df['episodes']
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [19]:
def hybrid_recommendation(user_id, anime_id, alpha=0.5):
    # Collaborative filtering prediction
    user_id_to_index = {user_id: idx for idx, user_id in enumerate(user_ids)}
    anime_id_to_index = {anime_id: idx for idx, anime_id in enumerate(anime_ids)}
    # user_index = rating_df[rating_df['user_id'] == user_id].index[0]
    # anime_index = rating_df[rating_df['anime_id'] == anime_id].index[0]
    user_index = user_id_to_index[user_id]
    anime_index = anime_id_to_index[anime_id]
    #print(user_id_to_index)
    #print(anime_id_to_index)
    collab_pred = predicted_ratings[user_index, anime_index]
    
    # Content-based filtering prediction (similarity score for movies)
    content_pred = cosine_sim[anime_index].mean()
    
    # Hybrid score: weighted sum of both
    final_score = alpha * collab_pred + (1 - alpha) * content_pred
    return final_score

In [52]:
# Predict rating for a specific user and movie
def predict_rating(user_id, anime_id):
    return hybrid_recommendation(user_id=user_id, anime_id=anime_id)

def recommend_top_5_movies(user_id, alpha=0.5):
    # Get all movies the user has not rated yet
    user_ratings = rating_df[rating_df['user_id'] == user_id]
    rated_movies_id = user_ratings['anime_id'].tolist()
    all_movies = df['anime_id'].unique()
    
    # Predict ratings for all unrated movies
    predictions = []
    for movie_id in all_movies:
        if movie_id not in rated_movies_id:
            predicted_score = hybrid_recommendation(user_id, movie_id, alpha=alpha)
            
            predictions.append((movie_id, predicted_score))
    
    # Sort predictions by score and select the top 5
    top_5_movies = sorted(predictions, key=lambda x: x[1], reverse=True)[:5]
    return top_5_movies

In [53]:
recommendation = recommend_top_5_movies(1)
print(recommendation[1][0])

109


In [54]:
def movie_name(recommendation):
    movie_with_rating = []
    for i in recommendation:
            movie_name = df[df['anime_id'] == i[0]]['name']
            movie_with_rating.append((movie_name, i[1]))
    print(movie_with_rating)

In [57]:
def main_function():
    try:
        user_id = int(input('Enter the user id'))
        movie_id = int(input('Enter the movie id'))
        print(f'The user with {user_id} may rate the {movie_id} with {predict_rating(user_id, movie_id)} rating')
        print(f'The user with {user_id} may like the movies below:')
        recommendation = recommend_top_5_movies(user_id)
        movie_name(recommendation)
    except:
        print("Error Occured")

In [56]:
main_function()

Enter the user id 101
Enter the movie id 20


The user with 101 may rate the 20 with 0.06868802155784179 rating
The user with 101 may like the movies below:
[(3449    Naruto Movie 2: Dai Gekitotsu! Maboroshi no Ch...
Name: name, dtype: object, 2.571388702171868), (14    Haikyuu!! Second Season
Name: name, dtype: object, 2.556975795018381), (11489    Tokubetsu Byoutou
Name: name, dtype: object, 0.10940045844551316), (90    Evangelion: 2.0 You Can (Not) Advance
Name: name, dtype: object, 0.10940045844551284), (3211    Kimi ga Nozomu Eien: Next Season
Name: name, dtype: object, 0.10940045844551273)]
