In [1]:
from hybrid_filtering_pytorch.ncf_torch import Neural_Collaborative_filtering
from hybrid_filtering_pytorch.content_filtering_torch import Content_Based_filtering
from hybrid_filtering_pytorch.hybrid_recommender_torch import Hybrid_recommendation_system
import numpy as np
import pandas as pd
import pickle

In [2]:
num_user = 300
num_item = 300

In [3]:
with open("scalers/item_scaler.pickle", "rb") as f :
    xm_scaler = pickle.load(f)
with open("scalers/label_scaler.pickle", "rb") as f :
    labelScaler = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [4]:
movie_feat = pd.read_csv("my_anime_data_cleaned/anime_data.csv", sep=",").iloc[:]
movie_feat = movie_feat.iloc[:, 2:].to_numpy()
movie_feat = xm_scaler.transform(movie_feat)

In [8]:
movie_feat = movie_feat[:num_item]

In [5]:
user_feat = pd.read_csv("my_anime_data_cleaned/user_features.csv", sep=",").iloc[:num_user]

In [6]:
def retrieve_anime_index(anime_ids:str, index=False) -> list:
    ids = anime_ids.split("|")
    ids = ids[:-1] # last one is empty string
    ids = list(map(int, ids))

    if isinstance(movie_feat, pd.DataFrame) :
        indexes = movie_feat.loc[movie_feat["anime_id"].isin(ids), "anime_index"]
    else :
        df = pd.read_csv("my_anime_data_cleaned/anime_data.csv")
        indexes = df.loc[df["anime_id"].isin(ids), "anime_index"]

    return list(indexes)


def decaying_average(anime_indexes:list) -> np.ndarray :
    decay_rate = 0.95
    num_series = len(anime_indexes)

    decay_factor = np.array([decay_rate**x for x in range(num_series)]).reshape(-1, 1)

    series_features = movie_feat[anime_indexes]

    series_features *= decay_factor

    return np.mean(series_features, axis=0)

In [7]:
user_anime_indexes = [0 for _ in range(user_feat.shape[0])]
for i in range(len(user_feat)) :
    watched = user_feat.loc[i, "watched"]
    user_anime_indexes[i] = retrieve_anime_index(watched)

# user input vectors
user_feat = np.zeros((num_user, movie_feat.shape[-1]))
for i in range(len(user_anime_indexes)) :
    user_feat[i] = decaying_average(user_anime_indexes[i])

In [9]:
ratings = pd.read_csv("my_anime_data_cleaned/rating_matrix_v2.csv").iloc[:num_item, 1:num_user+1]
ratings = ratings.to_numpy().T
ratings = ratings.reshape(-1, 1)

mask = np.where(ratings >= 0, 1, 0)
ratings = labelScaler.transform(ratings) 

In [16]:
dimension = user_feat.shape[-1]
lr = 0.0005
content_model = Content_Based_filtering(dimension, dimension, 64, lr)

Device set to : NVIDIA GeForce RTX 3070 Laptop GPU


In [19]:
content_model.train(user_feat, movie_feat, ratings, mask, verbose=True, epochs=2)

for epoch  0  the loss :  tensor(4.0636e-05, device='cuda:0', grad_fn=<SumBackward0>)


tensor(4.2869, device='cuda:0', grad_fn=<SumBackward0>)

In [18]:
content_model.load_model()

content model loaded


In [20]:
model = Neural_Collaborative_filtering(num_user, num_item, x_dim=64, learning_rate=0.005)

Device set to : NVIDIA GeForce RTX 3070 Laptop GPU


In [15]:
model._user_params[0, :10]

array([ 0.2929801 ,  0.40066066, -0.1954696 ,  0.67804885,  0.67204106,
        0.30358598, -0.336078  , -1.3843056 , -1.0710447 ,  0.72316784],
      dtype=float32)

In [22]:
model.train(ratings, mask, verbose=True, epochs=200)

for iter :  0  loss is :  tensor(1704.1024, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  20  loss is :  tensor(727.1298, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  40  loss is :  tensor(384.3828, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  60  loss is :  tensor(233.4179, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  80  loss is :  tensor(152.7849, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  100  loss is :  tensor(103.6625, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  120  loss is :  tensor(71.6121, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  140  loss is :  tensor(49.3498, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  160  loss is :  tensor(33.7711, device='cuda:0', grad_fn=<AddBackward0>)
for iter :  180  loss is :  tensor(22.9011, device='cuda:0', grad_fn=<AddBackward0>)


tensor(15.7938, device='cuda:0', grad_fn=<AddBackward0>)

In [19]:
model._user_params[0, :10]

array([ 2.8286340e-21,  3.8038054e-21,  3.9902954e-21, -2.6991354e-20,
        2.4610221e-21,  2.3697592e-21,  2.1929091e-21,  2.3917525e-15,
       -1.1771360e-21, -2.4719141e-20], dtype=float32)

In [21]:
model.load_model()

ncf loaded


In [20]:
model.save_model()

NCF saved


In [13]:
dimension = user_feat.shape[-1]
lr = 0.0005
hybrid_model = Hybrid_recommendation_system(num_user, num_item, dimension, dimension, lr=lr, l_d=64)

Device set to : NVIDIA GeForce RTX 3070 Laptop GPU
Device set to : NVIDIA GeForce RTX 3070 Laptop GPU
Device set to : NVIDIA GeForce RTX 3070 Laptop GPU


In [16]:
indexes = (np.arange(num_user), np.arange(num_item))
hybrid_model.train(user_feat, movie_feat, indexes, ratings, mask, expand=True, epochs=2, verbose=True)

for epoch  0  the loss :  tensor(1.9581e-08, device='cuda:0', grad_fn=<SumBackward0>)


tensor(0.4357, device='cuda:0', grad_fn=<SumBackward0>)

In [15]:
hybrid_model.load_model()

ncf loaded
content model loaded
all models loaded
