In [28]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("hernan4444/anime-recommendation-database-2020")



In [9]:
import pandas as pd
import numpy as np
import scipy.sparse

from tqdm.autonotebook import tqdm as notebook_tqdm

from rectools import Columns

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import make_column_transformer, ColumnTransformer
from sklearn.pipeline import Pipeline


def sparsing(features, column):
    cv = CountVectorizer()
    features = features.join(pd.DataFrame(cv.fit_transform(features.pop('Genres').fillna('')).A, index=features.index, columns=cv.get_feature_names_out()))
    features = pd.get_dummies(features)
    features_frames = []
    for feature in features.columns[1:]:
        feature_frame = features.reindex(columns=[column, feature])
        feature_frame.columns = ["id", "value"]
        feature_frame["feature"] = feature
        features_frames.append(feature_frame)
    return pd.concat(features_frames)

from sklearn.preprocessing import LabelEncoder

anime = pd.read_csv(path + '/anime.csv')
ratings = pd.read_csv(path + '/rating_complete.csv')
anime = anime[['MAL_ID', 'Score', 'Genres', 'Type', 'Episodes']]

anime = sparsing(anime, 'MAL_ID')
ratings = ratings[ratings['rating']!=-1]
ratings.columns = [Columns.User, Columns.Item,  Columns.Weight]
ratings['datetime'] = ratings.groupby(Columns.User).cumcount()
ratings.drop_duplicates(subset=[Columns.User,  Columns.Item], keep='last', inplace=True)

In [29]:
from rectools.model_selection.last_n_split import LastNSplitter
from rectools.dataset.interactions import Interactions
from lightfm import *
from rectools.models.lightfm import LightFMWrapperModel
from rectools.dataset import Dataset
NUM_OF_SAMPELS = 80

splitter  = LastNSplitter(NUM_OF_SAMPELS, filter_cold_users=True, filter_cold_items=True, filter_already_seen=True)

train_ids, test_ids = [], []

for train_ids, test_ids, _ in splitter.split(Interactions(ratings)):
    train_ids, test_ids = train_ids, test_ids


train = ratings.iloc[train_ids, :]
test = ratings.iloc[test_ids, :]


dataset = Dataset.construct(
    interactions_df=train, 
)

model = LightFMWrapperModel(LightFM(loss='warp', no_components=10), epochs=100, num_threads=12, verbose=True)
model.fit(dataset)

# Make recommendations
recos = model.recommend(
    users=test.user_id.unique(),
    dataset=dataset,
    k=NUM_OF_SAMPELS,
    filter_viewed=True
)


Epoch: 100%|██████████| 100/100 [1:04:23<00:00, 38.64s/it]


{'hit@1': 0.6800423515823022,
 'hit@5': 0.9458145727944558,
 'hit@10': 0.9838026587382223,
 'hit@15': 0.9928879287295594,
 'hit@20': 0.9963530581906462,
 'hit@25': 0.9979358951049699,
 'hit@30': 0.9986471022320139,
 'hit@35': 0.9990909382586655,
 'hit@40': 0.9992994877169716,
 'hit@45': 0.99950268975327,
 'hit@50': 0.999641722725474,
 'hit@55': 0.9997112392115761,
 'hit@60': 0.9997540185876389,
 'hit@65': 0.9998181876517331,
 'hit@70': 0.9998395773397645,
 'hit@75': 0.9998556196057881,
 'hit@80': 0.9998663144498038,
 'ndcg@1': 0.6800423515823022,
 'ndcg@5': 0.6303624110402831,
 'ndcg@10': 0.5893689889781261,
 'ndcg@15': 0.5577354528002609,
 'ndcg@20': 0.5319391252470065,
 'ndcg@25': 0.5098978208378405,
 'ndcg@30': 0.4907992946045953,
 'ndcg@35': 0.4738298528284031,
 'ndcg@40': 0.45860981455213057,
 'ndcg@45': 0.4448076029674636,
 'ndcg@50': 0.43216594434238814,
 'ndcg@55': 0.4205501040364024,
 'ndcg@60': 0.40981773340526856,
 'ndcg@65': 0.3997933806875712,
 'ndcg@70': 0.390453272415976

In [32]:
from rectools.metrics import NDCG, MAP, HitRate, CoveredUsers
from rectools.metrics.scoring import calc_metrics

metrics = {}
for k in range (0, 11, 5):
    if k == 0: k = 1
    metrics = dict(metrics, **{
    f'ndcg@{k}': NDCG(k=k, log_base=3),
    f'MAP@{k}': MAP(k=k),
    f'hit@{k}': HitRate(k=k),
    f'novelty@{k}': NoveltyMetric(k=k),
    f'cover@{k}': CoveredUsers(k=k),
    })


calc_metrics(
    metrics,
    reco=recos,
    interactions=test,
    prev_interactions=train,
    catalog=train[Columns.Item].unique()
)

{'hit@1': 0.6800423515823022,
 'hit@5': 0.9458145727944558,
 'hit@10': 0.9838026587382223,
 'ndcg@1': 0.6800423515823022,
 'ndcg@5': 0.6303624110402831,
 'ndcg@10': 0.5893689889781261,
 'MAP@1': 0.008500533625334162,
 'MAP@5': 0.0331121876931132,
 'MAP@10': 0.05570239676991309,
 'novelty@1': 2.175297139185125,
 'novelty@5': 2.293256289548861,
 'novelty@10': 2.37917103079513,
 'cover@1': 1.0,
 'cover@5': 1.0,
 'cover@10': 1.0}

In [36]:
model.save('lightfm_model.h5')

26916268

In [30]:
! pip3 install tensorflow

Traceback (most recent call last):
  File "/Users/admin/Desktop/MAI/Third/.venv/bin/pip3", line 5, in <module>
    from pip._internal.cli.main import main
ModuleNotFoundError: No module named 'pip'


In [42]:
from keras.models import load_model
model2 = load_model('lightfm_model.h5')

ModuleNotFoundError: No module named 'tensorflow'