In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf


In [3]:
from tensorflow.keras import layers
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation,BatchNormalization,Input,Embedding,Dot,Dense,Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping


In [4]:
from wordcloud import WordCloud


In [5]:
import os
INPUT_DIR = os.path.join("..","artifacts","raw")
rating_df = pd.read_csv(INPUT_DIR+"/animelist.csv",low_memory=True,usecols=["user_id","anime_id","rating"])
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,0,67,9
1,0,6702,7
2,0,242,10
3,0,4898,0
4,0,21,10


In [6]:
n_ratings = rating_df["user_id"].value_counts()
n_ratings

user_id
11100    14429
10255     8403
4773      5735
6852      5406
16057     5080
         ...  
106          1
6846         1
92           1
26           1
10703        1
Name: count, Length: 15186, dtype: int64

In [7]:
rating_df = rating_df[rating_df["user_id"].isin(n_ratings[n_ratings>=500].index)].copy()

In [8]:
len(rating_df)

2754096

In [9]:
minrate = min(rating_df["rating"])
maxrate = max(rating_df["rating"])
avgrate = sum(rating_df["rating"])/len(rating_df)
meanrate = np.mean(rating_df["rating"])

print(minrate,maxrate,avgrate,meanrate)

0 10 4.07798493589185 4.07798493589185


In [10]:
rating_df["rating"] = rating_df['rating'].apply(lambda x:(x-minrate)/(maxrate-minrate)).values.astype(np.float64)

In [11]:
np.mean(rating_df['rating'])

np.float64(0.40779849358918496)

In [12]:
rating_df.duplicated().sum()

np.int64(0)

In [13]:
rating_df.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

In [14]:
user_ids = rating_df["user_id"].unique().tolist()
len(user_ids)

3101

In [15]:
user2user_encoded = {x : i for i , x in enumerate(user_ids)}
user2user_decoded = {i : x for i , x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)

In [16]:
anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x : i for i , x in enumerate(anime_ids)}
anime2anime_decoded = {i : x for i , x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)

In [17]:
len(anime_ids)

17141

In [18]:
rating_df.head(50)

Unnamed: 0,user_id,anime_id,rating,user,anime
1415,6,9062,0.8,0,0
1416,6,9919,0.0,0,1
1417,6,150,0.7,0,2
1418,6,4981,0.0,0,3
1419,6,10793,0.0,0,4
1420,6,249,0.7,0,5
1421,6,1887,0.0,0,6
1422,6,10110,0.0,0,7
1423,6,10165,0.0,0,8
1424,6,355,0.0,0,9


In [19]:
#Data shuffling to introduce randomness
rating_df = rating_df.sample(frac=1).reset_index(drop=True)

In [20]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating,user,anime
0,10367,3152,0.5,1910,9417
1,7916,6211,0.9,1516,1619
2,7708,22687,0.8,1460,832
3,4514,84,0.6,820,4333
4,15690,586,0.0,2944,1802


In [21]:
x = rating_df[['user','anime']].values
y = rating_df['rating']

In [22]:
x

array([[ 1910,  9417],
       [ 1516,  1619],
       [ 1460,   832],
       ...,
       [ 3063,  1584],
       [  882,  4625],
       [ 1890, 11281]])

In [23]:
y

0          0.5
1          0.9
2          0.8
3          0.6
4          0.0
          ... 
2754091    0.0
2754092    0.0
2754093    0.0
2754094    0.0
2754095    0.7
Name: rating, Length: 2754096, dtype: float64

In [24]:
test_size = 1000
train_size = rating_df.shape[0]-test_size


In [25]:
xtrain,xtest,ytrain,ytest = x[:train_size],x[train_size:],y[:train_size],y[train_size:]

In [26]:
len(xtrain)


2753096

In [27]:
len(xtest)

1000

In [28]:
type(xtrain)

numpy.ndarray

In [29]:
X_train_array = [xtrain[: , 0] , xtrain[: ,1]]
#First array in X_train_array - Users
#Second array in X_train_array - Animes

X_test_array = [xtest[: , 0] , xtest[: ,1]]
X_train_array

[array([1910, 1516, 1460, ...,  586, 2393, 1920]),
 array([9417, 1619,  832, ...,  718, 8273, 5513])]

In [30]:
type(X_train_array) , type(X_train_array[0])

(list, numpy.ndarray)

# Model Architecture

In [39]:
def RecommenderNet():

    embedding_size = 128

    user = Input(shape=[1], name='user')
    user_embedding = Embedding(input_dim=len(user_ids), output_dim=embedding_size, name='user_embedding')(user)
    # means the whole embedding layer will be working on this input layer 

    anime = Input(shape=[1], name='anime')
    anime_embedding = Embedding(input_dim=len(anime_ids), output_dim=embedding_size, name='anime_embedding')(anime)

    #Dot layer to calculate the dot product of user and anime embeddings
    #Dot product is a measure of similarity between two vectors 
    x = Dot(name="Dot_Product", normalize=True, axes=2)([user_embedding, anime_embedding])

    #x = Flatten()(x) #Flattening the output of dot product layer to convert it into a 1D vector
    #Dot product layer output is a 3D tensor, so we need to flatten it to 2D tensor
    x = Flatten()(x)

    x= Dense(1,kernel_initializer='he_normal')(x) #Dense layer to convert the 2D tensor to a single value

    x = BatchNormalization()(x) #Batch normalization to normalize the output of the dense layer

    x = Activation("sigmoid")(x) #Activation function to convert the output to a value between 0 and 1


    model = Model(inputs=[user,anime],outputs = x) #Creating the model with user and anime as inputs and x as output

#Compiling the model with binary crossentropy loss and adam optimizer
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['mae','mse']) 

    return model

In [40]:
model = RecommenderNet() #Creating the model
model.summary() #Summary of the model

In [41]:
start_lr = 0.0001 #Starting learning rate
min_lr = 0.0001 #Minimum learning rate
max_lr = 0.005 #Maximum learning rate
batch_size = 10000 #Batch size

ramup_epochs = 5 #Number of epochs
sustain_epochs = 2 #Number of epochs to sustain the learning rate
exponential_decay = 0.9 #Exponential decay rate


def lrfn(epoch):
    if epoch < ramup_epochs:
        lr = start_lr + (max_lr - start_lr) / ramup_epochs * epoch 
    elif epoch < ramup_epochs + sustain_epochs: 
        lr = max_lr
    else:
        lr = max_lr * exponential_decay ** (epoch - ramup_epochs - sustain_epochs)
    return lr

In [42]:
lr_callback = LearningRateScheduler(lambda epoch: lrfn(epoch), verbose=0)
 #Learning rate scheduler to change the learning rate during training

checkpoint_filepath = "./weights.weights.h5" #Filepath to save the model weights

model_checkpoint = ModelCheckpoint(filepath=checkpoint_filepath, save_weights_only=True, monitor='val_loss',
                                   mode='min', save_best_only=True, verbose=1) #Model checkpoint to save the best model weights

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min', restore_best_weights=True) 
#Early stopping to stop the training if the validation loss does not improve for 5 epochs

In [43]:
# Debugging input shapes
print(f"Shape of X_train_array[0] (users): {X_train_array[0].shape}")
print(f"Shape of X_train_array[1] (animes): {X_train_array[1].shape}")
print(f"Shape of ytrain: {ytrain.shape}")
print(f"Shape of X_test_array[0] (users): {X_test_array[0].shape}")
print(f"Shape of X_test_array[1] (animes): {X_test_array[1].shape}")
print(f"Shape of ytest: {ytest.shape}")


Shape of X_train_array[0] (users): (2753096,)
Shape of X_train_array[1] (animes): (2753096,)
Shape of ytrain: (2753096,)
Shape of X_test_array[0] (users): (1000,)
Shape of X_test_array[1] (animes): (1000,)
Shape of ytest: (1000,)


In [44]:
my_callbacks = [model_checkpoint,lr_callback,early_stopping]

In [45]:
history = model.fit(
    x=X_train_array,
    y=ytrain,
    batch_size=batch_size,
    epochs=20,
    verbose=1,
    validation_data = (X_test_array,ytest),
    callbacks=my_callbacks
)

Epoch 1/20
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.7531 - mae: 0.3730 - mse: 0.1810
Epoch 1: val_loss improved from inf to 0.69262, saving model to ./weights.weights.h5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 39ms/step - loss: 0.7531 - mae: 0.3730 - mse: 0.1810 - val_loss: 0.6926 - val_mae: 0.3570 - val_mse: 0.1520 - learning_rate: 1.0000e-04
Epoch 2/20
[1m275/276[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 102ms/step - loss: 0.6719 - mae: 0.3322 - mse: 0.1447
Epoch 2: val_loss improved from 0.69262 to 0.58213, saving model to ./weights.weights.h5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 103ms/step - loss: 0.6715 - mae: 0.3320 - mse: 0.1446 - val_loss: 0.5821 - val_mae: 0.2704 - val_mse: 0.1008 - learning_rate: 0.0011
Epoch 3/20
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 0.5259 - mae: 0.2350 - mse: 0.0805
Epoch 3: val_loss improved from 

In [46]:
model.load_weights(checkpoint_filepath)

In [47]:
def extract_weights(name,model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights/np.linalg.norm(weights,axis=1).reshape((-1,1))
    return weights

anime_weights = extract_weights("anime_embedding",model)
user_weights = extract_weights("user_embedding",model)

In [73]:
user_weights.shape

(3101, 128)

In [48]:
df = pd.read_csv(INPUT_DIR+"/anime.csv",low_memory=True)
df.head(2)


Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0


In [49]:
df = df.replace("Unknown",np.nan)

In [50]:
def getAnimeName(anime_id):
    try:
        name = df[df.anime_id == anime_id].eng_version.values[0]
        if name is np.nan:
            name = df[df.anime_id == anime_id].Name.values[0]
    except:
        print("Error")
    return name

In [51]:
df["anime_id"] = df["MAL_ID"]
df["eng_version"] = df["English name"]
df["eng_version"] = df.anime_id.apply(lambda x:getAnimeName(x))

In [52]:
df.sort_values(by=["Score"],
               inplace=True,
               ascending=False,
               kind="quicksort",
               na_position="last")

In [53]:
df = df[["anime_id" , "eng_version","Score","Genres","Episodes","Type","Premiered","Members"]]
df.head()

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
3971,5114,Fullmetal Alchemist:Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...",64,TV,Spring 2009,2248456
15926,40028,Attack on Titan Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",16,TV,Winter 2021,733260
5683,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",24,TV,Spring 2011,1771162
14963,38524,Attack on Titan Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Sho...",10,TV,Spring 2019,1073626
9913,28977,Gintama Season 4,9.1,"Action, Comedy, Historical, Parody, Samurai, S...",51,TV,Spring 2015,404121


In [54]:
# From either snimeid or anime name, get the anime frame

def getAnimeFrame(anime,df):
    if isinstance(anime,int):
        return df[df.anime_id == anime]
    if isinstance(anime,str):
        return df[df.eng_version == anime]
    

In [55]:
getAnimeFrame(40028 , df)

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
15926,40028,Attack on Titan Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",16,TV,Winter 2021,733260


In [56]:
getAnimeFrame("Steins;Gate",df)

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
5683,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",24,TV,Spring 2011,1771162


In [57]:
cols = ["MAL_ID","Name","Genres","sypnopsis"]
synopsis_df = pd.read_csv(INPUT_DIR+"/anime_with_synopsis.csv",usecols=cols)
synopsis_df.head(1)

Unnamed: 0,MAL_ID,Name,Genres,sypnopsis
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."


In [58]:
def getSynopsis(anime,synopsis_df):
    if isinstance(anime,int):
        return synopsis_df[synopsis_df.MAL_ID == anime].sypnopsis.values[0]
    if isinstance(anime,str):
        return synopsis_df[synopsis_df.Name == anime].sypnopsis.values[0]
    

In [59]:
getSynopsis(40028 , synopsis_df)

"Gabi Braun and Falco Grice have been training their entire lives to inherit one of the seven titans under Marley's control and aid their nation in eradicating the Eldians on Paradis. However, just as all seems well for the two cadets, their peace is suddenly shaken by the arrival of Eren Yeager and the remaining members of the Survey Corps. Having finally reached the Yeager family basement and learned about the dark history surrounding the titans, the Survey Corps has at long last found the answer they so desperately fought to uncover. With the truth now in their hands, the group set out for the world beyond the walls. In Shingeki no Kyojin: The Final Season , two utterly different worlds collide as each party pursues its own agenda in the long-awaited conclusion to Paradis' fight for freedom."

In [60]:
getSynopsis("Steins;Gate",synopsis_df)

'The self-proclaimed mad scientist Rintarou Okabe rents out a room in a rickety old building in Akihabara, where he indulges himself in his hobby of inventing prospective "future gadgets" with fellow lab members: Mayuri Shiina, his air-headed childhood friend, and Hashida Itaru, a perverted hacker nicknamed "Daru." The three pass the time by tinkering with their most promising contraption yet, a machine dubbed the "Phone Microwave," which performs the strange function of morphing bananas into piles of green gel. Though miraculous in itself, the phenomenon doesn\'t provide anything concrete in Okabe\'s search for a scientific breakthrough; that is, until the lab members are spurred into action by a string of mysterious happenings before stumbling upon an unexpected success—the Phone Microwave can send emails to the past, altering the flow of history. Adapted from the critically acclaimed visual novel by 5pb. and Nitroplus, Steins;Gate takes Okabe through the depths of scientific theory 

In [61]:
pd.set_option("max_colwidth",None)

In [62]:
getSynopsis("Steins;Gate",synopsis_df)

'The self-proclaimed mad scientist Rintarou Okabe rents out a room in a rickety old building in Akihabara, where he indulges himself in his hobby of inventing prospective "future gadgets" with fellow lab members: Mayuri Shiina, his air-headed childhood friend, and Hashida Itaru, a perverted hacker nicknamed "Daru." The three pass the time by tinkering with their most promising contraption yet, a machine dubbed the "Phone Microwave," which performs the strange function of morphing bananas into piles of green gel. Though miraculous in itself, the phenomenon doesn\'t provide anything concrete in Okabe\'s search for a scientific breakthrough; that is, until the lab members are spurred into action by a string of mysterious happenings before stumbling upon an unexpected success—the Phone Microwave can send emails to the past, altering the flow of history. Adapted from the critically acclaimed visual novel by 5pb. and Nitroplus, Steins;Gate takes Okabe through the depths of scientific theory 

In [65]:
df.head()

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
3971,5114,Fullmetal Alchemist:Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen",64,TV,Spring 2009,2248456
15926,40028,Attack on Titan Final Season,9.17,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen",16,TV,Winter 2021,733260
5683,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",24,TV,Spring 2011,1771162
14963,38524,Attack on Titan Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Shounen, Super Power",10,TV,Spring 2019,1073626
9913,28977,Gintama Season 4,9.1,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",51,TV,Spring 2015,404121


In [68]:
def find_similar_animes(name, anime_weights, anime2anime_encoded, anime2anime_decoded, df, synopsis_df, n=10, return_dist=False, neg=False):
    # Get the anime_id for the given name
    index = getAnimeFrame(name, df).anime_id.values[0]
    encoded_index = anime2anime_encoded.get(index)

    if encoded_index is None:
        raise ValueError(f"Encoded index not found for anime ID: {index}")

    weights = anime_weights

    # Compute the similarity distances
    dists = np.dot(weights, weights[encoded_index])  # Ensure weights[encoded_index] is a 1D array
    sorted_dists = np.argsort(dists)

    n = n + 1

    # Select closest or farthest based on 'neg' flag
    if neg:
        closest = sorted_dists[:n]
    else:
        closest = sorted_dists[-n:]

    # Return distances and closest indices if requested
    if return_dist:
        return dists, closest

    # Build the similarity array
    SimilarityArr = []
    for close in closest:
        decoded_id = anime2anime_decoded.get(close)
       

       
        anime_frame = getAnimeFrame(decoded_id, df)

        anime_name = anime_frame.eng_version.values[0]
        genre = anime_frame.Genres.values[0]
        similarity = dists[close]
   

        SimilarityArr.append({
            "anime_id": decoded_id,
            "name": anime_name,
            "similarity": similarity,
            "genre": genre,
        })
       

    # Create a DataFrame with results and sort by similarity
    Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)
    return Frame[Frame.anime_id != index].drop(['anime_id'], axis=1)


In [69]:
find_similar_animes(
    "Steins;Gate",
    anime_weights,
    anime2anime_encoded,
    anime2anime_decoded,
    df,
    synopsis_df,
)

Unnamed: 0,name,similarity,genre
9,Code Geass:Lelouch of the Rebellion,0.656814,"Action, Military, Sci-Fi, Super Power, Drama, Mecha, School"
8,Fullmetal Alchemist:Brotherhood,0.636193,"Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen"
7,Code Geass:Lelouch of the Rebellion R2,0.621698,"Action, Military, Sci-Fi, Super Power, Drama, Mecha"
6,Death Note,0.616148,"Mystery, Police, Psychological, Supernatural, Thriller, Shounen"
5,Attack on Titan,0.596269,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen"
4,Angel Beats!,0.592301,"Action, Comedy, Drama, School, Supernatural"
3,Fate/Zero,0.587812,"Action, Supernatural, Magic, Fantasy"
2,Fate/Zero Season 2,0.57709,"Action, Supernatural, Magic, Fantasy"
1,Steins;Gate:Egoistic Poriomania,0.56668,"Sci-Fi, Comedy, Thriller"
0,anohana:The Flower We Saw That Day,0.560783,"Slice of Life, Supernatural, Drama"


User based Recommendation

In [82]:
def find_similar_users(item_input , user_weights , user2user_encoded , user2user_decoded, n=10 , return_dist=False,neg=False):
    try:
        index=item_input
        encoded_index = user2user_encoded.get(index)

        weights = user_weights

        dists = np.dot(weights,weights[encoded_index])
        sorted_dists = np.argsort(dists)

        n=n+1

        if neg:
            closest = sorted_dists[:n]
        else:
            closest = sorted_dists[-n:]
            

        if return_dist:
            return dists,closest
        
        SimilarityArr = []

        for close in closest:
            similarity = dists[close]

            if isinstance(item_input,int):
                decoded_id = user2user_decoded.get(close)
                SimilarityArr.append({
                    "similar_users" : decoded_id,
                    "similarity" : similarity
                })
        similar_users = pd.DataFrame(SimilarityArr).sort_values(by="similarity",ascending=False)
        similar_users = similar_users[similar_users.similar_users != item_input]
        return similar_users
    except Exception as e:
        print("Error Occured",e)

        
        

In [87]:
print("user_weights shape:", user_weights.shape)
print("user_weights[encoded_index] shape:", user_weights[user2user_encoded[6]].shape)

user_weights shape: (3101, 128)
user_weights[encoded_index] shape: (128,)


In [88]:
find_similar_users(int(6),user_weights,user2user_encoded,user2user_decoded)

Unnamed: 0,similar_users,similarity
9,11464,0.297519
8,8758,0.276785
7,14430,0.273272
6,12691,0.270058
5,9961,0.268363
4,12047,0.26216
3,936,0.259198
2,5197,0.257674
1,5360,0.257068
0,7134,0.256277


In [98]:
def showWordCloud(all_genres):
    genres_cloud = WordCloud(width=700,height=400,background_color='white',colormap='gnuplot').generate_from_frequencies(all_genres)
    plt.figure(figsize=(10,8))
    plt.imshow(genres_cloud,interpolation="bilinear")
    plt.axis("off")
    plt.show()

In [99]:
from collections import defaultdict
def getFavGenre(frame , plot=False):
    frame.dropna(inplace=False)
    all_genres = defaultdict(int)

    genres_list = []
    for genres in frame["Genres"]:
        if isinstance(genres,str):
            for genre in genres.split(','):
                genres_list.append(genre)
                all_genres[genre.strip()] += 1

    if plot:
        showWordCloud(all_genres)
    
    return genres_list



In [100]:
def getFavGenre(frame , plot=False):
    frame.dropna(inplace=False)
    all_genres = defaultdict(int)

    genres_list = []
    for genres in frame["Genres"]:
        if isinstance(genres,str):
            for genre in genres.split(','):
                genres_list.append(genre)
                all_genres[genre.strip()] += 1

    if plot:
        showWordCloud(all_genres)
    
    return genres_list



In [101]:
def get_user_recommendations(similar_users , user_pref ,df , synopsis_df, rating_df, n=10):

    recommended_animes = []
    anime_list = []

    for user_id in similar_users.similar_users.values:
        pref_list = get_user_preferences(int(user_id) , rating_df, df)

        pref_list = pref_list[~pref_list.eng_version.isin(user_pref.eng_version.values)]

        if not pref_list.empty:
            anime_list.append(pref_list.eng_version.values)

    if anime_list:
            anime_list = pd.DataFrame(anime_list)

            sorted_list = pd.DataFrame(pd.Series(anime_list.values.ravel()).value_counts()).head(n)

            for i,anime_name in enumerate(sorted_list.index):
                n_user_pref = sorted_list[sorted_list.index == anime_name].values[0][0]

                if isinstance(anime_name,str):
                    frame = getAnimeFrame(anime_name,df)
                    anime_id = frame.anime_id.values[0]
                    genre = frame.Genres.values[0]
                    synopsis = getSynopsis(int(anime_id),synopsis_df)

                    recommended_animes.append({
                        "n" : n_user_pref,
                        "anime_name" : anime_name,
                        "Genres" : genre,
                        "Synopsis": synopsis
                    })
    return pd.DataFrame(recommended_animes).head(n)
            



    

In [102]:
def hybrid_recommendation(user_id , user_weight=0.5, content_weight =0.5):

    ## User Recommndation

    similar_users =find_similar_users(user_id,user_weights,user2user_encoded,user2user_decoded)
    user_pref = get_user_preferences(user_id , rating_df, df)
    user_recommended_animes =get_user_recommendations(similar_users,user_pref,df, synopsis_df,rating_df)
    

    user_recommended_anime_list = user_recommended_animes["anime_name"].tolist()
    print(user_recommended_anime_list)

    #### Content recommendation
    content_recommended_animes = []

    for anime in user_recommended_anime_list:
        similar_animes = find_similar_animes(anime, anime_weights, anime2anime_encoded, anime2anime_decoded, df, synopsis_df)

        if similar_animes is not None and not similar_animes.empty:
            content_recommended_animes.extend(similar_animes["name"].tolist())
        else:
            print(f"No similar anime found {anime}")
    
    combined_scores = {}

    for anime in user_recommended_anime_list:
        combined_scores[anime] = combined_scores.get(anime,0) + user_weight

    for anime in content_recommended_animes:
        combined_scores[anime] = combined_scores.get(anime,0) + content_weight  

    sorted_animes = sorted(combined_scores.items() , key=lambda x:x[1] , reverse=True)

    return [anime for anime , score in sorted_animes[:10]] 

