In [2]:
!pip install rectools

Collecting rectools
  Downloading rectools-0.4.2-py3-none-any.whl (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.5/102.5 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting implicit<0.8.0,>=0.7.1 (from rectools)
  Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.0.1 (from rectools)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, implicit, rectools
Successfully installed implicit-0.7.2 rectools-0.4.2 typeguard-2.13.3


In [6]:
!git clone https://github.com/Evgeneugene/Movie-Rec-System.git

Cloning into 'Movie-Rec-System'...
remote: Enumerating objects: 53, done.[K
remote: Counting objects: 100% (53/53), done.[K
remote: Compressing objects: 100% (37/37), done.[K
remote: Total 53 (delta 14), reused 53 (delta 14), pack-reused 0[K
Receiving objects: 100% (53/53), 5.25 MiB | 6.31 MiB/s, done.
Resolving deltas: 100% (14/14), done.


In [13]:
%cd notebooks

/content/Movie-Rec-System/notebooks


In [15]:
import pandas as pd
import pandas as pd
from rectools import Columns
from rectools.dataset import Dataset

data_interim_dir = '../data/interim/'
user_groups = ['u1', 'u2', 'u3', 'u4', 'u5']
data_splits = ['base', 'test']

datasets = {}

for user_group in user_groups:
    for split in data_splits:
        # Construct file paths
        interactions_path = f"{data_interim_dir}{user_group}.{split}.csv"
        user_features_path = f"{data_interim_dir}{user_group}.{split}_user_features.csv"
        item_features_path = f"{data_interim_dir}{user_group}.{split}_item_features.csv"

        # Read the data from CSV files
        interactions_df = pd.read_csv(interactions_path)
        user_features_df = pd.read_csv(user_features_path)
        item_features_df = pd.read_csv(item_features_path)

        dataset = Dataset.construct(
            interactions_df,
            user_features_df=user_features_df,
            cat_user_features=['gender', 'occupation'],  # If these were the categorical features
            item_features_df=item_features_df,
            make_dense_item_features=True  # If this is still applicable
        )

        # Store in the data dictionary
        if user_group not in datasets:
            datasets[user_group] = {}

        datasets[user_group][split] = (dataset, interactions_df)

In [36]:
from rectools.models import ImplicitALSWrapperModel, ImplicitItemKNNWrapperModel, LightFMWrapperModel, RandomModel, PureSVDModel, PopularModel
from rectools.metrics import NDCG, Accuracy, MAP
from rectools import Columns
from implicit.nearest_neighbours import TFIDFRecommender

k = 10
ndcg = NDCG(k=k, log_base=3)
acc = Accuracy(k=k)
mmap = MAP(k=k)

# Define a list of models to test
models = [
    ('Random', RandomModel()),
    ('ImplicitItemKNN', ImplicitItemKNNWrapperModel(model=TFIDFRecommender(K=10))),
    # ('LightFM', LightFMWrapperModel()),
    ('PureSVD', PureSVDModel()),
    ('Popular', PopularModel())
]

for ug in datasets.keys():
    base_ds = datasets[ug]['base'][0]
    base_df = datasets[ug]['base'][1]
    test_ds = datasets[ug]['test'][0]
    test_df = datasets[ug]['test'][1]

    for model_name, model in models:
        # Fit the model
        model.fit(base_ds)

        # Generate recommendations
        recos = model.recommend(
            users=test_df[Columns.User].unique(),
            dataset=base_ds,
            k=10,
            filter_viewed=True,
        )

        # Evaluate the model
        print(f'{model_name} on {ug} split')
        print('MAP: ', mmap.calc(reco=recos, interactions=test_df))
        print("Accuracy: ", acc.calc(reco=recos, interactions=test_df, catalog=base_df[Columns.Item]))
        print("NDCG: ", ndcg.calc(reco=recos, interactions=test_df))
        print()


Random on u1 split
MAP:  0.0022288911966132285
Accuracy:  0.9993376361655776
NDCG:  0.02919294326504939

ImplicitItemKNN on u1 split
MAP:  0.11095594568471655
Accuracy:  0.9994421023965141
NDCG:  0.47923027186404077

PureSVD on u1 split
MAP:  0.12490142060896098
Accuracy:  0.9994523420479304
NDCG:  0.528658583680511

Popular on u1 split
MAP:  0.0515884623370606
Accuracy:  0.9994066448801744
NDCG:  0.320243894441263

Random on u2 split
MAP:  0.001821836400290521
Accuracy:  0.9994977029096477
NDCG:  0.021181258836976743

ImplicitItemKNN on u2 split
MAP:  0.12252065805277586
Accuracy:  0.9995877488514548
NDCG:  0.41468383065947245

PureSVD on u2 split
MAP:  0.13517649746661312
Accuracy:  0.999595290964778
NDCG:  0.449230375655791

Popular on u2 split
MAP:  0.0565423486642172
Accuracy:  0.9995541730474733
NDCG:  0.26440298998043166

Random on u3 split
MAP:  0.001706077073947863
Accuracy:  0.9995909953970081
NDCG:  0.01478114803666619

ImplicitItemKNN on u3 split
MAP:  0.1161651980713301
Ac