In [9]:
import pandas as pd
import numpy as np
import torch

pd.options.mode.chained_assignment = None
np.random.seed(42)
torch.manual_seed(42)

%config InlineBackend.figure_format='retina'

In [10]:
#!g1.1
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-32GB'

## Metrics and Datasets

In [11]:
from sklearn.metrics import ndcg_score
from utils import precision_at_k, recall_at_k, ndcg_at_k
from datasets import InteractionMatrixDataset, GraphDataset

In [48]:
amazon = InteractionMatrixDataset('amazon_20_core.csv.gz', usecols=['user_id', 'item_id', 'rating'])
amazon.train_test_split(test_ratio=0.3, observed_ratio=0.1)
amazon.build_sparse_interaction_matrix()

Sparsity = 99.856%
Users: 35736
Items: 38121


In [13]:
mov = InteractionMatrixDataset('ml-1m/ratings.dat', usecols=[0, 1, 2], header=None, sep='::', engine='python')
mov.train_test_split(test_ratio=0.3, observed_ratio=0.1)
mov.build_sparse_interaction_matrix()

Sparsity = 95.532%
Users: 6040
Items: 3706


# Top Popular

## Amazon Books

In [14]:
K = 20
vc = amazon._df_train['item_id'].value_counts().sort_index().values

all_ranks = torch.Tensor(np.tile(vc, (amazon.n_test_users, 1)))
all_ranks[amazon.observed_interactions.nonzero()] = 0  # exclude seen items

ranks, recs = all_ranks.topk(20, dim=1)
y_true = np.array([amazon.future_interactions[i].nonzero()[1] for i in range(amazon.n_test_users)], dtype='object')

print('====== ALS ======')
print(f'Precision = {precision_at_k(recs, y_true).round(5)}')
print(f'Recall = {recall_at_k(recs, y_true).round(5)}')
print(f'NDCG = {ndcg_at_k(ranks.numpy(), recs, amazon.future_interactions).round(5)}')

Precision = 0.02621
Recall = 0.0123
NDCG = 0.12631


## MovieLens1M

In [15]:
K = 20
vc = mov._df_train['item_id'].value_counts().sort_index().values

all_ranks = torch.Tensor(np.tile(vc, (mov.n_test_users, 1)))
all_ranks[mov.observed_interactions.nonzero()] = 0  # exclude seen items

ranks, recs = all_ranks.topk(20, dim=1)

In [16]:
y_true = np.array([mov.future_interactions[i].nonzero()[1] for i in range(mov.n_test_users)], dtype='object')

print('====== ALS ======')
print(f'Precision = {precision_at_k(recs, y_true).round(5)}')
print(f'Recall = {recall_at_k(recs, y_true).round(5)}')
print(f'NDCG = {ndcg_at_k(ranks.numpy(), recs, mov.future_interactions).round(5)}')

Precision = 0.41206
Recall = 0.08408
NDCG = 0.72144


# ALS

In [17]:
from implicit.als import AlternatingLeastSquares



## Amazon Books

In [18]:
als = AlternatingLeastSquares(256, iterations=5, regularization=0.001,
                              calculate_training_loss=True, random_state=42, num_threads=2)

als.fit(amazon.train_interactions, show_progress=True)

als.partial_fit_users(userids=np.arange(amazon.n_train_users, amazon.n_train_users + amazon.n_test_users),
                      user_items=amazon.observed_interactions)



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [21]:
K = 20
recs, ranks = np.array(als.recommend(userid=np.arange(amazon.n_train_users, amazon.n_train_users + amazon.n_test_users),
                                     filter_already_liked_items=True,
                                     user_items=amazon.observed_interactions, N=K))
y_true = np.array([amazon.future_interactions[i].nonzero()[1] for i in range(amazon.n_test_users)], dtype='object')

print('====== ALS ======')
print(f'Precision = {precision_at_k(recs, y_true).round(5)}')
print(f'Recall = {recall_at_k(recs, y_true).round(5)}')
print(f'NDCG = {ndcg_at_k(ranks, recs, amazon.future_interactions).round(5)}')

Precision = 0.13513
Recall = 0.06321
NDCG = 0.42516


## MovieLens1M

In [22]:
als = AlternatingLeastSquares(32, iterations=5, regularization=0.1,
                              calculate_training_loss=True, random_state=42, num_threads=2)

als.fit(mov.train_interactions, show_progress=True)

als.partial_fit_users(userids=np.arange(mov.n_train_users, mov.n_train_users + mov.n_test_users),
                      user_items=mov.observed_interactions)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [25]:
K = 20
recs, ranks = np.array(als.recommend(userid=np.arange(mov.n_train_users, mov.n_train_users + mov.n_test_users),
                                     filter_already_liked_items=True,
                                     user_items=mov.observed_interactions, N=K))
y_true = np.array([mov.future_interactions[i].nonzero()[1] for i in range(mov.n_test_users)], dtype='object')

print('====== ALS ======')
print(f'Precision = {precision_at_k(recs, y_true).round(5)}')
print(f'Recall = {recall_at_k(recs, y_true).round(5)}')
print(f'NDCG = {ndcg_at_k(ranks, recs, mov.future_interactions).round(5)}')

Precision = 0.52862
Recall = 0.11839
NDCG = 0.80017


# GF-CF

In [26]:
from models import GF_CF

### Amazon Books

In [27]:
#!c1.8
%%time
K = 20

gf_cf = GF_CF(amazon.train_interactions)
gf_cf.fit(64)
ranks = gf_cf.predict(amazon.observed_interactions)
recs = gf_cf.recommend_top_k(amazon.observed_interactions, K)

y_true = np.array([amazon.future_interactions[i].nonzero()[1] for i in range(amazon.n_test_users)], dtype='object')
print('====== GF-CF ======')
print(f'Precision = {precision_at_k(recs, y_true).round(5)}')
print(f'Recall = {recall_at_k(recs, y_true).round(5)}')
print(f'NDCG = {ndcg_score(np.array(ranks), np.array(amazon.future_interactions.todense())).round(5)} \n')

Precision = 0.1542
Recall = 0.07702
NDCG = 0.6333 

CPU times: user 6min 37s, sys: 2min 26s, total: 9min 3s
Wall time: 8min 14s


### MovieLens1M

In [28]:
#!c1.8
%%time
K = 20

gf_cf = GF_CF(mov.train_interactions)
gf_cf.fit(64)
ranks = gf_cf.predict(mov.observed_interactions)
recs = gf_cf.recommend_top_k(mov.observed_interactions, K)

y_true = np.array([mov.future_interactions[i].nonzero()[1] for i in range(mov.n_test_users)], dtype='object')
print('====== GF-CF ======')
print(f'Precision = {precision_at_k(recs, y_true).round(5)}')
print(f'Recall = {recall_at_k(recs, y_true).round(5)}')
print(f'NDCG = {ndcg_score(np.array(ranks), np.array(mov.future_interactions.todense())).round(5)} \n')

Precision = 0.57339
Recall = 0.13319
NDCG = 0.76124 

CPU times: user 7.66 s, sys: 4.82 s, total: 12.5 s
Wall time: 2min 15s


# LGCN-E

In [29]:
from models import LGCN_E
from utils import train
pd.options.mode.chained_assignment = None

### Amazon Books

In [50]:
amazon = GraphDataset('amazon_20_core.csv.gz', usecols=['user_id', 'item_id', 'rating'])
amazon.train_test_split(test_ratio=0.25, observed_ratio=0.1)
amazon.build_sparse_interaction_matrix()
amazon.build_interaction_graph()
amazon.train_val_split(val_ratio=0.1)

Sparsity = 99.856%
Users: 35736
Items: 38121


In [32]:
#!g1.1
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

pos_edges = amazon.pos_edges.to(device)
train_edges = amazon.train_edges.to(device)
val_edges = amazon.val_edges.to(device)
test_observed_edges = amazon.test_observed_edges.to(device)
test_future_edges = amazon.test_future_edges.to(device)
_tfe = test_future_edges.clone().to(device)
_tfe[1] -= amazon.n_train_users

config = dict(lr = 2e-4,
              emb_dim = 512,
              device = device
             )
# wandb.init(project="course_work", entity="ilyaind", config=config, reinit=True)
model = LGCN_E(n_users=amazon.n_items, emb_dim=config['emb_dim'], normalize=True).to(device)
opt = torch.optim.Adam(model.parameters(), lr=config['lr'])
model, opt = train(model, opt, amazon, train_edges, device, 250)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=250.0), HTML(value='')))




In [33]:
#!g1.1
K = 20

with torch.no_grad():
    _U, _E = model.get_embeddings(torch.cat([pos_edges, test_observed_edges], dim=1))

new_scores = _E @ _U.T
new_scores[test_observed_edges[1], test_observed_edges[0]] = -1e5  # exclude seen items
new_scores = new_scores[amazon.n_train_users:]
ranks, recs = new_scores.topk(K, dim=1)

y_true = amazon._test_future.groupby('user_id')['item_id'].unique().values
print('===== LGCN-E =====')
print(f'Precsion = {precision_at_k(recs.cpu(), y_true).round(5)}')
print(f'Recall = {recall_at_k(recs.cpu(), y_true).round(5)}')
print(f'NDCG = {ndcg_at_k(ranks.cpu().numpy(), recs.cpu(), amazon.future_interactions).round(5)}')

# wandb.run.summary["test_precision"] = precision_at_k(recs.cpu(), y_true)
# wandb.run.summary["test_recall"] = recall_at_k(recs.cpu(), y_true)

===== LGCN-E =====
Precsion = 0.14617
Recall = 0.07154
NDCG = 0.45866


## MovieLens 1M

In [35]:
mov = GraphDataset('ml-1m/ratings.dat', usecols=[0, 1, 2], header=None, sep='::', engine='python')
mov.train_test_split(test_ratio=0.25, observed_ratio=0.1)
mov.build_sparse_interaction_matrix()
mov.build_interaction_graph()
mov.train_val_split(val_ratio=0.1)

Sparsity = 95.532%
Users: 6040
Items: 3706


In [38]:
#!g1.1
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

pos_edges = mov.pos_edges.to(device)
train_edges = mov.train_edges.to(device)
val_edges = mov.val_edges.to(device)
test_observed_edges = mov.test_observed_edges.to(device)
test_future_edges = mov.test_future_edges.to(device)
_tfe = test_future_edges.clone().to(device)
_tfe[1] -= mov.n_train_users

config = dict(lr = 4e-4,
              emb_dim = 256,
              device = device
              )
# wandb.init(project="course_work", entity="ilyaind", config=config, reinit=True)
model = LGCN_E(n_users=mov.n_items, emb_dim=config['emb_dim'], normalize=True).to(device)
opt = torch.optim.Adam(model.parameters(), lr=config['lr'])
model, opt = train(model, opt, mov, train_edges, device, 600)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=250.0), HTML(value='')))




In [47]:
#!g1.1
K = 20

with torch.no_grad():
    _U, _E = model.get_embeddings(torch.cat([pos_edges, test_observed_edges], dim=1))

new_scores = _E @ _U.T
new_scores[test_observed_edges[1], test_observed_edges[0]] = -1e5  # exclude seen items
new_scores = new_scores[mov.n_train_users:]
ranks, recs = new_scores.topk(K, dim=1)

y_true = mov._test_future.groupby('user_id')['item_id'].unique().values
print('===== LGCN-E =====')
print(f'Precsion = {precision_at_k(recs.cpu(), y_true).round(5)}')
print(f'Recall = {recall_at_k(recs.cpu(), y_true).round(5)}')
print(f'NDCG = {ndcg_at_k(ranks.cpu().numpy(), recs.cpu(), mov.future_interactions).round(5)}')

===== LGCN-E =====
Precsion = 0.53209
Recall = 0.11721
NDCG = 0.80396
