In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation,BatchNormalization,Input,Embedding,Dot,Dense,Flatten
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler,TensorBoard,EarlyStopping

from wordcloud import WordCloud
%matplotlib inline



In [2]:
import os

In [3]:
INPUT_DIR = os.path.join("..","artifacts","raw")

In [4]:
rating_df = pd.read_csv(INPUT_DIR+"/animelist.csv" , low_memory=True,usecols=["user_id","anime_id","rating"])

In [5]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,0,67,9
1,0,6702,7
2,0,242,10
3,0,4898,0
4,0,21,10


In [6]:
len(rating_df)

5000000

#### DATA PROCESSING

In [7]:
n_ratings = rating_df["user_id"].value_counts()
rating_df = rating_df[rating_df["user_id"].isin(n_ratings[n_ratings>=400].index)].copy()

In [8]:
len(rating_df)

3246641

In [9]:
min_rating =min(rating_df["rating"])
min_rating

0

In [10]:
max_rating =max(rating_df["rating"])
max_rating

10

In [11]:
avg_rating = np.mean(rating_df['rating'])
avg_rating

np.float64(4.122732695114736)

#### min max scaling 

In [None]:
rating_df['rating'] = rating_df['rating'].apply(lambda x : (x-min_rating)/(max_rating-min_rating)).values.astype(np.float64)

In [None]:
rating_df.duplicated().sum()

np.int64(0)

In [None]:
rating_df.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

In [None]:
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x : i for i , x in enumerate(user_ids)}
user2user_decoded = {i : x for i , x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)


In [None]:
n_users = len(user2user_encoded)

In [None]:
n_users

4203

In [None]:
anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x : i for i , x in enumerate(anime_ids)}
anime2anime_decoded = {i : x for i , x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)

In [None]:
n_anime = len(user2user_encoded)

In [None]:
n_anime

4203

In [None]:
rating_df = rating_df.sample(frac=1,random_state=43).reset_index(drop=True)

In [None]:
rating_df.head(2)

Unnamed: 0,user_id,anime_id,rating,user,anime
0,457,18153,0.9,120,1377
1,4903,20507,0.7,1195,1216


In [None]:
X = rating_df[["user","anime"]].values
y = rating_df["rating"]


In [None]:
test_size = 1000
train_indices = rating_df.shape[0] - test_size

In [None]:
X_train,X_test , y_train,y_test = (
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:]
    )

In [None]:
len(X_train)

3245641

In [None]:
type(X_train)

numpy.ndarray

In [None]:
X_train_array = [X_train[:,0],X_train[:,1]]
X_test_array = [X_test[:,0],X_test[:,1]]

In [None]:
type(X_test_array)

list

In [None]:
type(X_test_array[0])

numpy.ndarray

#### MODEL ARCHITECTURE


In [None]:
def RecommenderNet():
    embedding_size = 128
    user = Input(name='user',shape=[1])
    user_embedding = Embedding(name = 'user_embedding',input_dim=n_users,output_dim = embedding_size)(user)
    anime = Input(name='anime',shape=[1])
    anime_embedding = Embedding(name='anime_embedding',input_dim=n_users,output_dim= embedding_size)(anime)

    x = Dot(name='dot_product',normalize=True,axes=2)([user_embedding,anime_embedding])

    x = Flatten()(x)

    x = Dense(1,kernel_initializer='he_normal')(x)

    x= BatchNormalization()(x)
    x = Activation('sigmoid')(x)

    model = Model(inputs=[user,anime],outputs=x)
    model.compile(loss='binary_crossentropy',metrics=['mae','mse'],optimizer='Adam')
    return model




In [None]:
model = RecommenderNet()

In [None]:
model.summary()

In [None]:
start_lr = 0.00001
min_lr = 0.0001
max_lr = 0.00005
batch_size = 10000

ramup_epochs = 5
sustain_epochs = 0
exp_decay = 0.8

def lrfn(epoch):
    if epoch<ramup_epochs:
        return (max_lr-start_lr)/ramup_epochs*epoch + start_lr
    elif epoch<ramup_epochs+sustain_epochs:
        return max_lr
    else:
        return (max_lr-min_lr) * exp_decay ** (epoch-ramup_epochs-sustain_epochs)+min_lr

In [None]:
lr_callback = LearningRateScheduler(lambda epoch:lrfn(epoch) , verbose=0)
checkpoint_filepath = './weights.weights.h5'

model_checkpoint = ModelCheckpoint(filepath=checkpoint_filepath,save_weights_only=True,monitor="val_loss",mode="min",save_best_only=True)

early_stopping = EarlyStopping(patience=3,monitor="val_loss",mode="min",restore_best_weights=True)

In [None]:
my_callbacks = [model_checkpoint,lr_callback,early_stopping]

In [None]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=batch_size,
    epochs=20,
    verbose=1,
    validation_data = (X_test_array,y_test),
    callbacks=my_callbacks
)

Epoch 1/20


InvalidArgumentError: Graph execution error:

Detected at node functional_1/anime_embedding_1/GatherV2 defined at (most recent call last):
  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\runpy.py", line 196, in _run_module_as_main

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\runpy.py", line 86, in _run_code

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py", line 16, in <module>

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelapp.py", line 677, in start

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\tornado\platform\asyncio.py", line 211, in start

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\asyncio\base_events.py", line 603, in run_forever

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\asyncio\base_events.py", line 1909, in _run_once

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\asyncio\events.py", line 80, in _run

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 461, in dispatch_queue

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 450, in process_one

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 357, in dispatch_shell

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 652, in execute_request

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\ipkernel.py", line 353, in do_execute

  File "C:\Users\MONISH\AppData\Roaming\Python\Python310\site-packages\ipykernel\zmqshell.py", line 532, in run_cell

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\IPython\core\interactiveshell.py", line 3077, in run_cell

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\IPython\core\interactiveshell.py", line 3132, in _run_cell

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\IPython\core\interactiveshell.py", line 3336, in run_cell_async

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\IPython\core\interactiveshell.py", line 3519, in run_ast_nodes

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\IPython\core\interactiveshell.py", line 3579, in run_code

  File "C:\Users\MONISH\AppData\Local\Temp\ipykernel_26320\2898952208.py", line 1, in <module>

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 377, in fit

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 220, in function

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 133, in multi_step_on_iterator

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 114, in one_step_on_data

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 58, in train_step

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\layers\layer.py", line 941, in __call__

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\ops\operation.py", line 59, in __call__

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\models\functional.py", line 183, in call

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\ops\function.py", line 206, in _run_through_graph

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\models\functional.py", line 644, in call

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\layers\layer.py", line 941, in __call__

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\ops\operation.py", line 59, in __call__

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\layers\core\embedding.py", line 150, in call

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\ops\numpy.py", line 5795, in take

  File "c:\Users\MONISH\anaconda3\envs\anime_recommender\lib\site-packages\keras\src\backend\tensorflow\numpy.py", line 2340, in take

indices[9898,0] = 8633 is not in [0, 4203)
	 [[{{node functional_1/anime_embedding_1/GatherV2}}]] [Op:__inference_multi_step_on_iterator_1946]

In [None]:
model.load_weights(checkpoint_filepath)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'weights.weights.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
metrics = ["loss", "mae", "mse"]

# Create subplots
fig, axes = plt.subplots(len(metrics), 1, figsize=(8, len(metrics) * 4))

for i, metric in enumerate(metrics):
    ax = axes[i]
    ax.plot(history.history[metric][0:-2], marker="o", label=f"train {metric}")
    ax.plot(history.history[f"val_{metric}"][0:-2], marker="o", label=f"test {metric}")
    ax.set_title(f"Model {metric.capitalize()}")
    ax.set_ylabel(metric.capitalize())
    ax.set_xlabel("Epoch")
    ax.legend(loc="upper left")
    ax.grid(True)

plt.tight_layout()
plt.show()

NameError: name 'history' is not defined