In [3]:
!pip install matplotlib wordcloud

Collecting matplotlib
  Downloading matplotlib-3.10.8-cp311-cp311-win_amd64.whl.metadata (52 kB)
Collecting wordcloud
  Downloading wordcloud-1.9.4-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp311-cp311-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.61.0-cp311-cp311-win_amd64.whl.metadata (115 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
Collecting pyparsing>=3 (from matplotlib)
  Downloading pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.8-cp311-cp311-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ----------- ---------------------------- 2.4/8.1 MB 11.2 MB/s eta 0:00:01
   -----------------

In [2]:
import tensorflow as tf

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation, BatchNormalization, Input, Embedding, Dense, Dot, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

from wordcloud import WordCloud
%matplotlib inline

In [3]:
import os

### READING ANIMELIST.CSV

In [5]:
INPUT_DIR=os.path.join("..","artifacts","raw")

In [6]:
rating_df=pd.read_csv(INPUT_DIR+"/animelist.csv",low_memory=True,usecols=['user_id','anime_id','rating'])

In [7]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,0,67,9
1,0,6702,7
2,0,242,10
3,0,4898,0
4,0,21,10


In [8]:
len(rating_df)

5000000

### DATA PROCESSING

In [9]:
n_ratings=rating_df['user_id'].value_counts()
rating_df=rating_df[rating_df['user_id'].isin(n_ratings[n_ratings>=400].index)]

In [10]:
len(rating_df)

3246641

In [11]:
min_rating=min(rating_df['rating'])

In [12]:
max_rating=max(rating_df['rating'])

In [13]:
avg_rating=np.mean(rating_df['rating'])

In [14]:
rating_df["rating"]=rating_df["rating"].apply(lambda x: (x-min_rating)/(max_rating-min_rating)).values.astype(np.float64)

In [15]:
rating_df

Unnamed: 0,user_id,anime_id,rating
213,2,24833,0.0
214,2,235,1.0
215,2,36721,0.0
216,2,40956,0.0
217,2,31933,0.0
...,...,...,...
4999916,16507,8985,0.0
4999917,16507,5454,0.0
4999918,16507,15911,0.0
4999919,16507,878,0.0


In [16]:
rating_df.duplicated().sum()

np.int64(0)

In [17]:
rating_df.isna().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

In [18]:
user_ids=rating_df["user_id"].unique().tolist()
user2user_encoded={x:i for i,x in enumerate(user_ids)}
user2user_decoded={i:x for i,x in enumerate(user_ids)}
rating_df["user"]=rating_df["user_id"].map(user2user_encoded)


In [19]:
n_users=len(user2user_encoded)

In [20]:
anime_ids=rating_df["anime_id"].unique().tolist()
anime2anime_encoded={x:i for i,x in enumerate(anime_ids)}
anime2anime_decoded={i:x for i,x in enumerate(anime_ids)}
rating_df["anime"]=rating_df["anime_id"].map(anime2anime_encoded)

In [21]:
n_anime=len(anime2anime_encoded)

In [22]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating,user,anime
213,2,24833,0.0,0,0
214,2,235,1.0,0,1
215,2,36721,0.0,0,2
216,2,40956,0.0,0,3
217,2,31933,0.0,0,4


In [23]:
 
rating_df=rating_df.sample(frac=1,random_state=42).reset_index(drop=True)
rating_df.head(5)
 

Unnamed: 0,user_id,anime_id,rating,user,anime
0,13338,30,0.7,3405,688
1,15967,15583,0.6,4060,1518
2,4967,39586,0.0,1213,4963
3,4055,8039,0.0,997,4097
4,6170,30205,0.0,1552,1690


In [24]:
X=rating_df[["user","anime"]].values
y=rating_df["rating"].values

In [25]:
test_size=1000
train_indices=rating_df.shape[0]-test_size


In [26]:
X_train,X_test,y_train,y_test=(
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:]
)

In [27]:
len(X_train),len(X_test)

(3245641, 1000)

In [28]:
type(X_train)

numpy.ndarray

In [29]:
X_train_array=[X_train[:,0],X_train[:,1]]
X_test_array=[X_test[:,0],X_test[:,1]]

In [31]:
X_train_array

[array([3405, 4060, 1213, ...,  560, 3815, 2122], shape=(3245641,)),
 array([ 688, 1518, 4963, ..., 1461, 1512, 1153], shape=(3245641,))]

### MODEL ARCHITECTURE

In [32]:
def RecommenderNet():
    embedding_size=128
    
    user=Input(name="user",shape=[1])
    user_embedding=Embedding(name="user_embedding",input_dim=n_users,output_dim=embedding_size)(user)
    
    anime=Input(name="anime",shape=[1])
    anime_embedding=Embedding(name="anime_embedding",input_dim=n_anime,output_dim=embedding_size)(anime)
    
    x=Dot(name="dot_product",normalize=True,axes=2)([user_embedding,anime_embedding])  ## Similarity Score
    x=Flatten()(x)
    
    x=Dense(1,kernel_initializer="he_normal")(x)
    x=BatchNormalization()(x)
    x=Activation("relu")(x)
    
    model=Model(inputs=[user,anime],outputs=x)
    model.compile(
        loss="binary_crossentropy",
        optimizer='Adam',
        metrics=["mae","mse"]
    )
    return model
    

In [33]:
model=RecommenderNet()
model.summary()

In [34]:
start_lr=0.00001
min_lr=0.00001
max_lr=0.00005
batch_size=10000

ramup_epochs=5
sustain_epochs=0
exp_decay=0.8

def lrfn(epoch):
    if epoch<ramup_epochs:
        return (max_lr-start_lr)/ramup_epochs*epoch +start_lr
    elif epoch<ramup_epochs+sustain_epochs:
        return max_lr
    else:
        return (max_lr-min_lr)*exp_decay**(epoch-ramup_epochs-sustain_epochs)+min_lr
        

In [35]:
lr_callback=LearningRateScheduler(lrfn,verbose=True)
checkpoint_filepath='./weights_new.weights.h5'

model_checkpoint_callback=ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
early_stopping=EarlyStopping(
    patience=3,
    monitor='val_loss',
    mode='min',
    restore_best_weights=True)

In [36]:
my_callbacks=[model_checkpoint_callback,lr_callback,early_stopping]

In [37]:
history=model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=batch_size,
    epochs=20,
    validation_data=(X_test_array,y_test),
    verbose=1,
    callbacks=my_callbacks
)


Epoch 1: LearningRateScheduler setting learning rate to 1e-05.
Epoch 1/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 72ms/step - loss: 3.7955 - mae: 0.4000 - mse: 0.2637 - val_loss: 3.9906 - val_mae: 0.3976 - val_mse: 0.2897 - learning_rate: 1.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 1.8000000000000004e-05.
Epoch 2/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 80ms/step - loss: 3.7670 - mae: 0.3994 - mse: 0.2630 - val_loss: 3.8118 - val_mae: 0.3938 - val_mse: 0.2631 - learning_rate: 1.8000e-05

Epoch 3: LearningRateScheduler setting learning rate to 2.6000000000000002e-05.
Epoch 3/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 71ms/step - loss: 3.7386 - mae: 0.3988 - mse: 0.2620 - val_loss: 3.7774 - val_mae: 0.3949 - val_mse: 0.2613 - learning_rate: 2.6000e-05

Epoch 4: LearningRateScheduler setting learning rate to 3.4000000000000007e-05.
Epoch 4/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━

In [34]:
def extract_weights(name,model):
    weight_layer=model.get_layer(name)
    weights=weight_layer.get_weights()[0]
    weights=weights/np.linalg.norm(weights,axis=1).reshape(-1,1)
    return weights

In [35]:
anime_weights=extract_weights("anime_embedding",model)

In [36]:
anime_weights.shape

(17149, 128)

In [37]:
user_weights=extract_weights("user_embedding",model)

### READING ANIME.CSV

In [38]:
df=pd.read_csv(INPUT_DIR+"/anime.csv",low_memory=True)
df.head(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [8]:
df=df.replace("Unknown",np.nan)

In [39]:
df.columns

Index(['MAL_ID', 'Name', 'Score', 'Genres', 'English name', 'Japanese name',
       'Type', 'Episodes', 'Aired', 'Premiered', 'Producers', 'Licensors',
       'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity',
       'Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped',
       'Plan to Watch', 'Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6',
       'Score-5', 'Score-4', 'Score-3', 'Score-2', 'Score-1'],
      dtype='object')

In [40]:
df["anime_id"]=df["MAL_ID"]
df["eng_version"]=df["English name"]

In [41]:
def getAnimeName(anime_id):
    try:
        name=df[df.anime_id==anime_id].eng_version.values[0]
        if name is np.nan:
            name=df[df.anime_id==anime_id].Name.values[0]
    except:
        print("Error: Anime not found")
    return name

In [42]:
df["eng_version"]=df.anime_id.apply(getAnimeName)
df.head(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,anime_id,eng_version
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0,5,Cowboy Bebop:The Movie
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0,6,Trigun
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0,7,Witch Hunter Robin
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0,8,Beet the Vandel Buster


In [43]:
def getAnimeFrame(anime,df):
    if isinstance(anime,int):
        return df[df.anime_id==anime]
    if isinstance(anime,str):
        return df[df.eng_version==anime]
    return None

In [44]:
getAnimeFrame(40028,df)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,anime_id,eng_version
15926,40028,Shingeki no Kyojin: The Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",Attack on Titan Final Season,進撃の巨人 The Final Season,TV,16,"Dec 7, 2020 to ?",Winter 2021,...,26016.0,8793.0,2674.0,1336.0,588.0,382.0,514.0,11061.0,40028,Attack on Titan Final Season


### ANIME_WITH_SYNOPSIS.CSV

In [45]:
cols=["MAL_ID","Name","Genres","sypnopsis"]


In [14]:
INPUT_DIR

'..\\artifacts\\raw'

In [46]:
synopsis_df=pd.read_csv(INPUT_DIR+"/anime_with_synopsis.csv",low_memory=True,usecols=cols)

In [47]:
synopsis_df.head(5)

Unnamed: 0,MAL_ID,Name,Genres,sypnopsis
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [71]:
def getSynopsis(anime,df):
    if isinstance(anime,int):
        return synopsis_df[synopsis_df.MAL_ID==anime].sypnopsis.values[0]
    if isinstance(anime,str):
        return synopsis_df[synopsis_df.Name==anime].sypnopsis.values[0]
    return None

In [17]:
getSynopsis(40028,synopsis_df)

"Gabi Braun and Falco Grice have been training their entire lives to inherit one of the seven titans under Marley's control and aid their nation in eradicating the Eldians on Paradis. However, just as all seems well for the two cadets, their peace is suddenly shaken by the arrival of Eren Yeager and the remaining members of the Survey Corps. Having finally reached the Yeager family basement and learned about the dark history surrounding the titans, the Survey Corps has at long last found the answer they so desperately fought to uncover. With the truth now in their hands, the group set out for the world beyond the walls. In Shingeki no Kyojin: The Final Season , two utterly different worlds collide as each party pursues its own agenda in the long-awaited conclusion to Paradis' fight for freedom."

### CONTENT BASED RECOMMENDAION

In [48]:
pd.set_option('display.max_colwidth', None)

In [147]:
def find_similar_animes(name,anime_weights,anime2anime_encoded,anime2anime_decoded,df,n=10,return_dist=False,neg=False):
    try:
        index=getAnimeFrame(name,df).anime_id.values[0]
        encoded_index=anime2anime_encoded[index]
        
        weights=anime_weights
        
        dists=np.dot(weights,weights[encoded_index])
        sorted_indices=np.argsort(dists)
        
        n=n+1
        if neg:
            nearest_indices=sorted_indices[:n]
        else:
            nearest_indices=sorted_indices[-n:]
           
        if return_dist:
            return dists,nearest_indices
        Similarity_list=[]
        for close in nearest_indices:
            decoded_id=anime2anime_decoded.get(close)
            anime_frame=getAnimeFrame(decoded_id,df)
            anime_name=anime_frame.eng_version.values[0]
            genre=anime_frame.Genres.values[0]
            similarity_score=dists[close]
            Similarity_list.append({
                "anime_id":decoded_id,
                "name":anime_name,
                "Genres":genre,
                "Similarity_Score":similarity_score,
            })
            
        Frame=pd.DataFrame(Similarity_list).sort_values(by="Similarity_Score",ascending=False).reset_index(drop=True)
        return Frame[Frame.anime_id!=index].drop(['anime_id'],axis=1)
    except:
        print("Error: Anime not found")



In [148]:
find_similar_animes(
    "Steins;Gate",
    anime_weights,
    anime2anime_encoded,
    anime2anime_decoded,
    df
)

Unnamed: 0,name,Genres,Similarity_Score
1,Barakamon,"Comedy, Slice of Life",0.37626
2,Unknown,Comedy,0.333639
3,Unknown,"Comedy, Drama, Seinen, Slice of Life, Supernatural",0.305977
4,Unknown,"Slice of Life, Music, School",0.291641
5,Unknown,"Music, Kids",0.286311
6,Dream,"Adventure, Music, Kids, Fantasy",0.282525
7,Shibuya Vernacular,Dementia,0.281282
8,Unknown,"Comedy, Slice of Life",0.279123
9,Unknown,"Action, Military, Sci-Fi, Space, Drama",0.278511
10,Galaxy Express 999:Eternal Traveller Emeraldas,"Sci-Fi, Space",0.273832


### USER BASED RECOMMENDATION

In [114]:
def find_similar_users(item_input,user_weights, user2user_encoded, user2user_decoded, n=10, return_dist=False,neg=False):
    try:
        index=item_input
        encoded_index=user2user_encoded.get(index)
        weights=user_weights
        dists=np.dot(weights,weights[encoded_index])
        sorted_dists=np.argsort(dists)
        
        n=n+1
        if neg:
            closest=sorted_dists[:n]
        else:
            closest=sorted_dists[-n:]
        if return_dist:
            return dists,closest
        SimilarityArr=[]
        
        for close in closest:
            similarity=dists[close]
            if isinstance(item_input,int):
                
                decoded_id=user2user_decoded.get(close)
                SimilarityArr.append({
                    "similarity_users":decoded_id,
                    "similarity":similarity
                })
        similar_users=pd.DataFrame(SimilarityArr).sort_values(by="similarity",ascending=False)
        similar_users=similar_users[similar_users.similarity_users!=item_input]
        return similar_users
    except:
        print("Error occurred")
            
            

In [132]:
find_similar_users(15597,user_weights,user2user_encoded,user2user_decoded)

Unnamed: 0,similarity_users,similarity
9,8947,0.33282
8,8168,0.330667
7,730,0.330295
6,4827,0.326668
5,11852,0.284596
4,10141,0.272315
3,936,0.269777
2,2493,0.265819
1,9180,0.264271
0,3954,0.262777


In [52]:
from collections import defaultdict

In [53]:
def showWordCloud(all_genres):
    genres_cloud=WordCloud(width=700,height=400,background_color='white',colormap='gnuplot').generate_from_frequencies(all_genres)
    plt.figure(figsize=(10,8))
    plt.imshow(genres_cloud,interpolation='bilinear')
    plt.axis('off')
    plt.show()
    

In [54]:
def getFavGenre(frame,plot=False):
    frame.dropna(inplace=False)
    all_genres=defaultdict(int)
    
    genres_list=[]
    for genres in frame["Genres"]:
        if isinstance(genres,str):
            for genre in genres.split(","):
                genres_list.append(genre)
                all_genres[genre.strip()]+=1
    if plot:
        showWordCloud(all_genres)
    return genres_list
            

In [55]:
x=getAnimeFrame(1,df)

In [56]:
rating_df.head(5)

Unnamed: 0,user_id,anime_id,rating,user,anime
0,13338,30,0.7,3405,688
1,15967,15583,0.6,4060,1518
2,4967,39586,0.0,1213,4963
3,4055,8039,0.0,997,4097
4,6170,30205,0.0,1552,1690


In [57]:
def get_user_prferences(user_id,rating_df,df,plot=False):
    animes_watched_by_user=rating_df[rating_df.user_id==user_id]
    user_rating_percentile=np.percentile(animes_watched_by_user.rating,75)
    animes_watched_by_user=animes_watched_by_user[animes_watched_by_user.rating>=user_rating_percentile]
    top_animes_by_user=(
        animes_watched_by_user.sort_values(by='rating',ascending=False).anime_id.values
    )
    anime_df_rows=df[df.anime_id.isin(top_animes_by_user)]
    anime_df_rows=anime_df_rows[['eng_version','Genres']]
    return anime_df_rows
    
    

In [58]:
get_user_prferences(11880,rating_df,df)

Unnamed: 0,eng_version,Genres
0,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space"
8,Unknown,"Action, Cars, Sports, Drama, Seinen"
16,Texhnolyze,"Action, Sci-Fi, Psychological, Drama"
18,Yakitate!! Japan,"Comedy, Shounen"
20,Neon Genesis Evangelion,"Action, Sci-Fi, Dementia, Psychological, Drama, Mecha"
...,...,...
11685,Mobile Suit Gundam:Iron-Blooded Orphans 2nd Season,"Action, Drama, Mecha, Sci-Fi, Space"
11914,My Hero Academia 2,"Action, Comedy, Super Power, School, Shounen"
12242,Gintama Season 5,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen"
12430,Blood Blockade Battlefront & Beyond,"Action, Comedy, Fantasy, Shounen, Super Power, Supernatural, Vampire"


In [80]:
similar_users=find_similar_users(11880,user_weights,user2user_encoded,user2user_decoded)
similar_users

Unnamed: 0,similarity_users,similarity
9,14295,0.32338
8,13187,0.283447
7,4157,0.279396
6,1710,0.276546
5,4059,0.275582
4,6902,0.2691
3,9660,0.248417
2,11319,0.248196
1,3268,0.244759
0,8727,0.237962


In [None]:
def get_user_recommendations(similar_users,user_pref,df,rating_df,synopsis_df,n=10):
    recommended_animes=[]
    anime_list=[]
    
    for user_id in similar_users.similarity_users.values:
        pref_list=get_user_prferences(int(user_id),rating_df,df)
        pref_list=pref_list[~pref_list.eng_version.isin(user_pref.eng_version.values)]
        
        if not pref_list.empty:
            anime_list.append(pref_list.eng_version.values)
    if anime_list:
        anime_list=pd.DataFrame(anime_list)        
        sorted_list=pd.DataFrame(pd.Series(anime_list.values.ravel()).value_counts()).head(n)
            
    for _,anime_name in enumerate(sorted_list.index):
         
        n_user_pref=sorted_list[sorted_list.index==anime_name].values[0][0]
        
        if isinstance(anime_name,str):
            frame=getAnimeFrame(anime_name,df)
            anime_id=frame.anime_id.values[0]
            genre=frame.Genres.values[0]
            synopsis=getSynopsis(int(anime_id),synopsis_df)
            
            
            recommended_animes.append({
                "n":n_user_pref,
                "anime_name":anime_name,
                "Genres":genre,
                "Synopsis":synopsis
            })
    return pd.DataFrame(recommended_animes).head(n)    
            

In [61]:
user_pref=get_user_prferences(11880,rating_df,df,plot=True)

In [98]:
rating_df

Unnamed: 0,user_id,anime_id,rating,user,anime
0,13338,30,0.7,3405,688
1,15967,15583,0.6,4060,1518
2,4967,39586,0.0,1213,4963
3,4055,8039,0.0,997,4097
4,6170,30205,0.0,1552,1690
...,...,...,...,...,...
3246636,8941,14075,1.0,2227,1126
3246637,12017,38101,0.7,3044,1597
3246638,11395,2782,0.0,2881,5028
3246639,13957,9969,0.0,3569,175


In [62]:
user_pref

Unnamed: 0,eng_version,Genres
0,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space"
8,Unknown,"Action, Cars, Sports, Drama, Seinen"
16,Texhnolyze,"Action, Sci-Fi, Psychological, Drama"
18,Yakitate!! Japan,"Comedy, Shounen"
20,Neon Genesis Evangelion,"Action, Sci-Fi, Dementia, Psychological, Drama, Mecha"
...,...,...
11685,Mobile Suit Gundam:Iron-Blooded Orphans 2nd Season,"Action, Drama, Mecha, Sci-Fi, Space"
11914,My Hero Academia 2,"Action, Comedy, Super Power, School, Shounen"
12242,Gintama Season 5,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen"
12430,Blood Blockade Battlefront & Beyond,"Action, Comedy, Fantasy, Shounen, Super Power, Supernatural, Vampire"


In [127]:
get_user_recommendations(similar_users,user_pref,df,rating_df,synopsis_df)

<class 'list'>


Unnamed: 0,n,anime_name,Genres,Synopsis
0,9,Attack on Titan,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen","Centuries ago, mankind was slaughtered to near extinction by monstrous humanoid creatures called titans, forcing humans to hide in fear behind enormous concentric walls. What makes these giants truly terrifying is that their taste for human flesh is not born out of hunger but what appears to be out of pleasure. To ensure their survival, the remnants of humanity began living within defensive barriers, resulting in one hundred years without a single titan encounter. However, that fragile calm is soon shattered when a colossal titan manages to breach the supposedly impregnable outer wall, reigniting the fight for survival against the man-eating abominations. After witnessing a horrific personal loss at the hands of the invading creatures, Eren Yeager dedicates his life to their eradication by enlisting into the Survey Corps, an elite military unit that combats the merciless humanoids outside the protection of the walls. Based on Hajime Isayama's award-winning manga, Shingeki no Kyojin follows Eren, along with his adopted sister Mikasa Ackerman and his childhood friend Armin Arlert, as they join the brutal war against the titans and race to discover a way of defeating them before the last walls are breached."
1,8,Steins;Gate,"Thriller, Sci-Fi","The self-proclaimed mad scientist Rintarou Okabe rents out a room in a rickety old building in Akihabara, where he indulges himself in his hobby of inventing prospective ""future gadgets"" with fellow lab members: Mayuri Shiina, his air-headed childhood friend, and Hashida Itaru, a perverted hacker nicknamed ""Daru."" The three pass the time by tinkering with their most promising contraption yet, a machine dubbed the ""Phone Microwave,"" which performs the strange function of morphing bananas into piles of green gel. Though miraculous in itself, the phenomenon doesn't provide anything concrete in Okabe's search for a scientific breakthrough; that is, until the lab members are spurred into action by a string of mysterious happenings before stumbling upon an unexpected success—the Phone Microwave can send emails to the past, altering the flow of history. Adapted from the critically acclaimed visual novel by 5pb. and Nitroplus, Steins;Gate takes Okabe through the depths of scientific theory and practicality. Forced across the diverging threads of past and present, Okabe must shoulder the burdens that come with holding the key to the realm of time."
2,7,The Promised Neverland,"Sci-Fi, Mystery, Horror, Psychological, Thriller, Shounen","Surrounded by a forest and a gated entrance, the Grace Field House is inhabited by orphans happily living together as one big family, looked after by their ""Mama,"" Isabella. Although they are required to take tests daily, the children are free to spend their time as they see fit, usually playing outside, as long as they do not venture too far from the orphanage—a rule they are expected to follow no matter what. However, all good times must come to an end, as every few months, a child is adopted and sent to live with their new family, never to be heard from again. However, the three oldest siblings have their suspicions about what is actually happening at the orphanage, and they are about to discover the cruel fate that awaits the children living at Grace Field, including the twisted nature of their beloved Mama."
3,7,Mob Psycho 100,"Action, Slice of Life, Comedy, Supernatural","Eighth-grader Shigeo ""Mob"" Kageyama has tapped into his inner wellspring of psychic prowess at a young age. But the power quickly proves to be a liability when he realizes the potential danger in his skills. Choosing to suppress his power, Mob's only present use for his ability is to impress his longtime crush, Tsubomi, who soon grows bored of the same tricks. In order to effectuate control on his skills, Mob enlists himself under the wing of Arataka Reigen, a con artist claiming to be a psychic, who exploits Mob's powers for pocket change. Now, exorcising evil spirits on command has become a part of Mob's daily, monotonous life. However, the psychic energy he exerts is barely the tip of the iceberg; if his vast potential and unrestrained emotions run berserk, a cataclysmic event that would render him completely unrecognizable will be triggered. The progression toward Mob's explosion is rising and attempting to stop it is futile."
4,7,The Seven Deadly Sins,"Action, Adventure, Ecchi, Fantasy, Magic, Shounen, Supernatural","In a world similar to the European Middle Ages, the feared yet revered Holy Knights of Britannia use immensely powerful magic to protect the region of Britannia and its kingdoms. However, a small subset of the Knights supposedly betrayed their homeland and turned their blades against their comrades in an attempt to overthrow the ruler of Liones. They were defeated by the Holy Knights, but rumors continued to persist that these legendary knights, called the ""Seven Deadly Sins,"" were still alive. Ten years later, the Holy Knights themselves staged a coup d’état, and thus became the new, tyrannical rulers of the Kingdom of Liones. Based on the best-selling manga series of the same name, Nanatsu no Taizai follows the adventures of Elizabeth, the third princess of the Kingdom of Liones, and her search for the Seven Deadly Sins. With their help, she endeavors to not only take back her kingdom from the Holy Knights, but to also seek justice in an unjust world."
5,7,Mob Psycho 100 II,"Action, Slice of Life, Comedy, Supernatural","Shigeo ""Mob"" Kageyama is now maturing and understanding his role as a supernatural psychic that has the power to drastically affect the livelihood of others. He and his mentor Reigen Arataka continue to deal with supernatural requests from clients, whether it be exorcizing evil spirits or tackling urban legends that haunt the citizens. While the workflow remains the same, Mob isn't just blindly following Reigen around anymore. With all his experiences as a ridiculously strong psychic, Mob's supernatural adventures now have more weight to them. Things take on a serious and darker tone as the dangers Mob and Reigen face are much more tangible and unsettling than ever before."
6,7,Code Geass:Lelouch of the Rebellion,"Action, Military, Sci-Fi, Super Power, Drama, Mecha, School","In the year 2010, the Holy Empire of Britannia is establishing itself as a dominant military nation, starting with the conquest of Japan. Renamed to Area 11 after its swift defeat, Japan has seen significant resistance against these tyrants in an attempt to regain independence. Lelouch Lamperouge, a Britannian student, unfortunately finds himself caught in a crossfire between the Britannian and the Area 11 rebel armed forces. He is able to escape, however, thanks to the timely appearance of a mysterious girl named C.C., who bestows upon him Geass, the ""Power of Kings."" Realizing the vast potential of his newfound ""power of absolute obedience,"" Lelouch embarks upon a perilous journey as the masked vigilante known as Zero, leading a merciless onslaught against Britannia in order to get revenge once and for all."
7,7,"No Game, No Life","Game, Adventure, Comedy, Supernatural, Ecchi, Fantasy","No Game No Life is a surreal comedy that follows Sora and Shiro, shut-in NEET siblings and the online gamer duo behind the legendary username ""Blank."" They view the real world as just another lousy game; however, a strange e-mail challenging them to a chess match changes everything—the brother and sister are plunged into an otherworldly realm where they meet Tet, the God of Games. The mysterious god welcomes Sora and Shiro to Disboard, a world where all forms of conflict—from petty squabbles to the fate of whole countries—are settled not through war, but by way of high-stake games. This system works thanks to a fundamental rule wherein each party must wager something they deem to be of equal value to the other party's wager. In this strange land where the very idea of humanity is reduced to child's play, the indifferent genius gamer duo of Sora and Shiro have finally found a real reason to keep playing games: to unite the sixteen races of Disboard, defeat Tet, and become the gods of this new, gaming-is-everything world."
8,7,Code Geass:Lelouch of the Rebellion R2,"Action, Military, Sci-Fi, Super Power, Drama, Mecha","One year has passed since the Black Rebellion, a failed uprising against the Holy Britannian Empire led by the masked vigilante Zero, who is now missing. At a loss without their revolutionary leader, Area 11's resistance group—the Black Knights—find themselves too powerless to combat the brutality inflicted upon the Elevens by Britannia, which has increased significantly in order to crush any hope of a future revolt. Lelouch Lamperouge, having lost all memory of his double life, is living peacefully alongside his friends as a high school student at Ashford Academy. His former partner C.C., unable to accept this turn of events, takes it upon herself to remind him of his past purpose, hoping that the mastermind Zero will rise once again to finish what he started, in this thrilling conclusion to the series."
9,7,Death Note,"Mystery, Police, Psychological, Supernatural, Thriller, Shounen","shinigami, as a god of death, can kill any person—provided they see their victim's face and write their victim's name in a notebook called a Death Note. One day, Ryuk, bored by the shinigami lifestyle and interested in seeing how a human would use a Death Note, drops one into the human realm. High school student and prodigy Light Yagami stumbles upon the Death Note and—since he deplores the state of the world—tests the deadly notebook by writing a criminal's name in it. When the criminal dies immediately following his experiment with the Death Note, Light is greatly surprised and quickly recognizes how devastating the power that has fallen into his hands could be. With this divine capability, Light decides to extinguish all criminals in order to build a new world where crime does not exist and people worship him as a god. Police, however, quickly discover that a serial killer is targeting criminals and, consequently, try to apprehend the culprit. To do this, the Japanese investigators count on the assistance of the best detective in the world: a young and eccentric man known only by the name of L."


### Hybrid Recommender SYtem

In [149]:
def hybrid_recommendation(user_id , user_weight=0.5, content_weight =0.5):

    ## User Recommndation

    similar_users =find_similar_users(user_id,user_weights,user2user_encoded,user2user_decoded)
    user_pref = get_user_prferences(user_id , rating_df, df)
    user_recommended_animes =get_user_recommendations(similar_users,user_pref,df,rating_df,synopsis_df)
    

    user_recommended_anime_list = user_recommended_animes["anime_name"].tolist()

    #### Content recommendation
    content_recommended_animes = []

    for anime in user_recommended_anime_list:
        similar_animes = find_similar_animes(anime, anime_weights, anime2anime_encoded, anime2anime_decoded, df)

        if similar_animes is not None and not similar_animes.empty:
            content_recommended_animes.extend(similar_animes["name"].tolist())
        else:
            print(f"No similar anime found {anime}")
    
    combined_scores = {}

    for anime in user_recommended_anime_list:
        combined_scores[anime] = combined_scores.get(anime,0) + user_weight

    for anime in content_recommended_animes:
        combined_scores[anime] = combined_scores.get(anime,0) + content_weight  

    sorted_animes = sorted(combined_scores.items() , key=lambda x:x[1] , reverse=True)

    return [anime for anime , score in sorted_animes[:10]] 
    

In [150]:
hybrid_recommendation(11880)

<class 'list'>


['Unknown',
 'Attack on Titan',
 'A Silent Voice',
 'Mob Psycho 100',
 'Steins;Gate',
 'Your Name.',
 'Your Lie in April',
 'Re:ZERO -Starting Life in Another World-',
 'The Promised Neverland',
 'Aldnoah.Zero']

In [1]:
import joblib

In [2]:
from config.paths_config import *

In [4]:
USER_WEIGHTS_PATH


'artifacts/weights\\anime_weights.pkl'

In [5]:
X_TEST_ARRAY

'artifacts/processed\\X_test_array.pkl'

In [7]:
x_train_array=joblib.load(USER_WEIGHTS_PATH)

FileNotFoundError: [Errno 2] No such file or directory: 'artifacts/weights\\anime_weights.pkl'