In [1]:
import implicit
import pandas as pd
import numpy as np
import scipy.sparse as sp

from scipy.sparse import random
from scipy import stats
from tqdm.notebook import tqdm
from sklearn.metrics import mean_squared_error
import scipy.sparse

from scipy.sparse.linalg import spsolve

from tqdm.notebook import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import trange

In [2]:
ratings = pd.read_csv('ml-1m/ratings.dat', delimiter='::', header=None, 
        names=['user_id', 'movie_id', 'rating', 'timestamp'], 
        usecols=['user_id', 'movie_id', 'rating'], engine='python')

In [3]:
movie_info = pd.read_csv('ml-1m/movies.dat', delimiter='::', header=None, 
        names=['movie_id', 'name', 'category'], engine='python')

In [4]:
movie_info

Unnamed: 0,movie_id,name,category
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


In [5]:
ratings = ratings.loc[(ratings['rating'] >= 2)]

In [6]:
ratings['user_id'] = ratings['user_id'] - 1
ratings['movie_id'] = ratings['movie_id'] - 1

In [7]:
movie_info['movie_id'] = movie_info['movie_id'] - 1

In [8]:
import os
import time
import argparse
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

In [9]:
ratings

Unnamed: 0,user_id,movie_id,rating
0,0,1192,5
1,0,660,3
2,0,913,3
3,0,3407,4
4,0,2354,5
...,...,...,...
1000203,6039,1089,3
1000205,6039,1093,5
1000206,6039,561,5
1000207,6039,1095,4


In [10]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import auc_score
from lightfm.cross_validation import random_train_test_split
import lightfm

In [11]:
warp_model = LightFM(no_components=32,
                    loss='warp',
                    learning_schedule='adagrad',
                    max_sampled=100,
                    user_alpha=1e-05,
                    item_alpha=1e-05)

In [11]:
users = ratings["user_id"]
movies = ratings["movie_id"]
user_item = sp.coo_matrix((np.ones_like(users), (users, movies)))
#user_item_csr = user_item.tocsr()

In [12]:
train, test = random_train_test_split(user_item)

In [145]:
warp_model.fit_partial(train, epochs=70)

<lightfm.lightfm.LightFM at 0x7f68877a5320>

In [148]:
get_similars_items = lambda item_id, sim : [movie_info[movie_info["movie_id"] == x]["name"].to_string() 
                                        for x in reversed(np.argsort(sim[item_id,:])[-10:])]

get_similars_items(0, cosine_similarity(warp_model.item_embeddings))

['0    Toy Story (1995)',
 '3045    Toy Story 2 (1999)',
 '584    Aladdin (1992)',
 '1526    Hercules (1997)',
 '591    Beauty and the Beast (1991)',
 '2692    Iron Giant, The (1999)',
 '1838    Mulan (1998)',
 '2225    Antz (1998)',
 '2618    Tarzan (1999)',
 '2252    Pleasantville (1998)']

In [185]:
ranks = warp_model.predict_rank(test, train)

In [186]:
from sklearn.metrics import ndcg_score, recall_score

In [187]:
ndcg_score(ranks.toarray(), test.toarray(), k=10)

0.5438799190145628

In [184]:
lightfm.evaluation.recall_at_k(warp_model, test, train, k=10).mean()

0.16696817368762287

In [188]:
recommendations = warp_model.predict(0, np.arange(train.shape[1]))

In [189]:
np.argsort(recommedations)[-10:]

array([1176, 1015, 1081,  584,  523,    0, 1250,  360,  591,  315])

In [190]:
get_user_history = lambda user_id, implicit_ratings : [movie_info[movie_info["movie_id"] == x]["name"].to_string() 
                                            for x in implicit_ratings[implicit_ratings["user_id"] == user_id]["movie_id"]]

In [191]:
get_user_history(0, ratings)

["1176    One Flew Over the Cuckoo's Nest (1975)",
 '3339    Erin Brockovich (2000)',
 "2286    Bug's Life, A (1998)",
 '1267    Ben-Hur (1959)',
 '2735    Christmas Story, A (1983)',
 '590    Snow White and the Seven Dwarfs (1937)',
 '907    Wizard of Oz, The (1939)',
 '591    Beauty and the Beast (1991)',
 '926    Gigi (1958)',
 '2329    Miracle on 34th Street (1947)',
 "2849    Ferris Bueller's Day Off (1986)",
 '1022    Sound of Music, The (1965)',
 '2722    Airplane! (1980)',
 '1949    Bambi (1942)',
 '3036    Awakenings (1990)',
 '2728    Big (1988)',
 '1250    Back to the Future (1985)',
 "523    Schindler's List (1993)",
 '47    Pocahontas (1995)',
 '1081    E.T. the Extra-Terrestrial (1982)',
 '1672    Titanic (1997)',
 '1506    Ponette (1996)',
 '2225    Antz (1998)',
 '3117    Girl, Interrupted (1999)',
 '1526    Hercules (1997)',
 '584    Aladdin (1992)',
 '1838    Mulan (1998)',
 '773    Hunchback of Notre Dame, The (1996)',
 '1768    Last Days of Disco, The (1998)',
 '100

In [192]:
recommedations = warp_model.predict(0, movie_info["movie_id"].values)

In [193]:
ids = np.argsort(recommedations)[-(10 + 1):]

In [194]:
movie_info[movie_info['movie_id'].isin(ids)]

Unnamed: 0,movie_id,name,category
0,0,Toy Story (1995),Animation|Children's|Comedy
313,315,Stargate (1994),Action|Adventure|Sci-Fi
357,360,It Could Happen to You (1994),Drama|Romance
520,523,Rudy (1993),Drama
581,584,"Brady Bunch Movie, The (1995)",Comedy
588,591,Batman (1989),Action|Adventure|Crime|Drama
1003,1015,"Shaggy Dog, The (1959)",Children's|Comedy
1066,1081,"Candidate, The (1972)",Drama
1161,1176,Enchanted April (1991),Drama
1231,1250,8 1/2 (1963),Drama


Предсказания что посмотреть в принципе норм, Бетмен, детские комедии. Но попадают и откровенно лишние результаты, например последний кажется совсем не подходит детям.

NCF

In [11]:
user_item_np = user_item.toarray()

In [13]:
import itertools

In [169]:
class NCFData(data.Dataset):
    def __init__(self, data, ng_num=0, whole_data=None, is_training=True):
        super().__init__()
        self.data = data
        self.ng_num = ng_num
        self.whole_data = whole_data
        self.init_positive_data()
        self.is_training = is_training
        self.inited = False
        if self.is_training:
            self.neg_resample()
        else:
            self.init_testing_data()
            
        self.inited = True
    
    def init_positive_data(self):
        self.positives = [np.nonzero(self.data[idx])[0] for idx in range(self.data.shape[0])]
        self.users = list(itertools.chain.from_iterable([[idx]*len(self.positives[idx]) for idx in range(self.data.shape[0])]))
        self.items = list(itertools.chain.from_iterable([[item for item in self.positives[idx]] for idx in range(self.data.shape[0])]))
        self.labels = list(np.ones(len(self.users)))
        self.orig_users_len = len(self.users)
        
    def fill_testing_items(self, user_id):
        items = []
        nonzero_indices = np.nonzero(self.data[user_id])[0]
        if len(nonzero_indices) == 0:
            return []
        items.append(nonzero_indices[0])
        zero_indices = np.where(self.whole_data[user_id] == 0)[0]
        items.extend(list(np.random.choice(zero_indices, size=self.ng_num, replace=False)))
        return items
    
    
    def init_testing_data(self):
        valid_user_indices = []
        for idx in range(self.data.shape[0]):
            if len(np.nonzero(self.data[idx])[0]) > 0:
                valid_user_indices.append(idx)
        self.users = list(itertools.chain.from_iterable([[idx]*(self.ng_num+1) for idx in valid_user_indices]))
        self.items = list(itertools.chain.from_iterable(self.fill_testing_items(idx) for idx in valid_user_indices)) 
        self.labels = list(np.zeros(len(self.users)))
        
        
    def neg_resample(self):
        if self.inited:
            self.users = self.users[:self.orig_users_len]
            self.items = self.items[:self.orig_users_len]
            self.labels = self.labels[:self.orig_users_len]
        for idx in range(self.data.shape[0]):
            self.users.extend([idx]*self.ng_num)
            zero_indices = np.where(self.data[idx] == 0)[0]
            self.items.extend(list(np.random.choice(zero_indices, size=self.ng_num, replace=False)))
            self.labels.extend(list(np.zeros(self.ng_num)))
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]
    
    def __len__(self):
        return len(self.labels)

In [170]:
train_dataset = NCFData(
        train.toarray(), 400)

In [171]:
test_dataset = NCFData(
        test.toarray(), 99, is_training=False, whole_data=user_item.toarray())
train_loader = data.DataLoader(train_dataset,
        batch_size=8192, shuffle=True, num_workers=4)
test_loader = data.DataLoader(test_dataset,
        batch_size=100, shuffle=False, num_workers=0)

In [172]:
import torch
import torch.nn as nn
import torch.nn.functional as F 


class NCF(nn.Module):
    def __init__(self, user_num, item_num, factor_num, num_layers,
                    dropout):
        super().__init__()   
        self.dropout = dropout

        self.embed_user_GMF = nn.Embedding(user_num, factor_num)
        self.embed_item_GMF = nn.Embedding(item_num, factor_num)
        self.embed_user_MLP = nn.Embedding(
                user_num, factor_num * (2 ** (num_layers - 1)))
        self.embed_item_MLP = nn.Embedding(
                item_num, factor_num * (2 ** (num_layers - 1)))

        MLP_modules = []
        for i in range(num_layers):
            input_size = factor_num * (2 ** (num_layers - i))
            MLP_modules.append(nn.Dropout(p= self.dropout))
            MLP_modules.append(nn.Linear(input_size, input_size//2))
            MLP_modules.append(nn.ReLU())
        self.MLP_layers = nn.Sequential(*MLP_modules)

        predict_size = factor_num * 2
        self.predict_layer = nn.Linear(predict_size, 1)

    def forward(self, user, item):
        embed_user_GMF = self.embed_user_GMF(user)
        embed_item_GMF = self.embed_item_GMF(item)
        output_GMF = embed_user_GMF * embed_item_GMF
        embed_user_MLP = self.embed_user_MLP(user)
        embed_item_MLP = self.embed_item_MLP(item)
        interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
        output_MLP = self.MLP_layers(interaction)

        concat = torch.cat((output_GMF, output_MLP), -1)

        prediction = self.predict_layer(concat)
        return prediction.view(-1)


In [173]:
model = NCF(user_item.shape[0], user_item.shape[1], 128, 4, 0.)

In [174]:
model.cuda()
loss_function = nn.BCEWithLogitsLoss()

In [175]:
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [176]:
import numpy as np

def hit(gt_item, pred_items):
    if gt_item in pred_items:
        return 1
    return 0


def ndcg(gt_item, pred_items):
    if gt_item in pred_items:
        index = pred_items.index(gt_item)
        return np.reciprocal(np.log2(index+2))
    return 0


def metrics(model, test_loader, top_k):
    HR, NDCG = [], []

    for user, item, label in test_loader:
        user = user.cuda()
        item = item.cuda()
        with torch.no_grad():
            predictions = model(user, item)
        _, indices = torch.topk(predictions, top_k)
        recommends = torch.take(
                item, indices).cpu().numpy().tolist()

        gt_item = item[0].item()
        HR.append(hit(gt_item, recommends))
        NDCG.append(ndcg(gt_item, recommends))
    return np.mean(HR), np.mean(NDCG)


In [177]:
from tqdm.notebook import tqdm
for epoch in range(7):
    model.train()
    for user, item, label in tqdm(train_loader):
        user = user.cuda()
        item = item.cuda()
        label = label.float().cuda()

        model.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction, label)
        loss.backward()
        optimizer.step()
    print(loss.item())
    model.eval()
    HR, NDCG = metrics(model, test_loader, 10)
    train_loader.dataset.neg_resample()
    print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.3258693218231201
HR: 0.681	NDCG: 0.441


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.3039441704750061
HR: 0.681	NDCG: 0.446


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.3211066424846649
HR: 0.724	NDCG: 0.470


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.29095372557640076
HR: 0.752	NDCG: 0.477


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.2614993155002594
HR: 0.770	NDCG: 0.504


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.2178661972284317
HR: 0.775	NDCG: 0.517


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.22636303305625916
HR: 0.786	NDCG: 0.528


In [108]:
movie_info[movie_info['movie_id'] == 49]

Unnamed: 0,movie_id,name,category
49,49,"Usual Suspects, The (1995)",Crime|Thriller


In [178]:
get_similars_items = lambda item_id, sim : [movie_info[movie_info["movie_id"] == x]["name"].to_string() 
                                        for x in reversed(np.argsort(sim[item_id,:])[-15:])]

get_similars_items(0, cosine_similarity(model.embed_item_GMF.weight.detach().cpu().numpy()))

['0    Toy Story (1995)',
 '3045    Toy Story 2 (1999)',
 '1595    Full Monty, The (1997)',
 '360    Lion King, The (1994)',
 "2497    Doug's 1st Movie (1999)",
 '614    Two Much (1996)',
 '3144    Batman: Mask of the Phantasm (1993)',
 '2692    Iron Giant, The (1999)',
 '591    Beauty and the Beast (1991)',
 '1050    Aladdin and the King of Thieves (1996)',
 '1642    Anastasia (1997)',
 '414    Being Human (1993)',
 '2225    Antz (1998)',
 '2023    Return of Jafar, The (1993)',
 '1760    Big One, The (1997)']

In [193]:
it = iter(test_loader)

In [196]:
next(it)

[tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2]),
 tensor([ 647, 2418, 3651, 3103, 3653, 3225, 3693, 3144, 3738, 3685, 3422, 1620,
         1137, 2326, 2968, 3719,  817, 2757,  440, 3342, 2388, 3400, 2228, 2245,
         1410, 2544, 1549, 3807, 1630, 3934, 3826, 2934,  760, 3121,  311,  412,
          751,  156, 2928,  370,  413, 2514, 3567,  838,  539, 3818, 3035,  456,
         2173, 1025, 3002,  661,  704, 1008,  839, 1700,  362,  222, 3748,  102,
         1146, 1167, 1957,  898, 1487, 2862, 3740, 3856, 3274, 1918, 2443, 2289,
         2219, 3416,  668, 2479, 1420, 3125, 3114,  533, 1996, 3238,  302, 3468,
          674, 2683,  806, 3849,  842, 2622, 3537, 3515, 2744, 2743, 1308,  340,
     

In [197]:
user, item, label = next(it)

In [198]:
user = user.cuda()
item = item.cuda()

with torch.no_grad():
    predictions = model(user, item)

In [199]:
_, indices = torch.topk(predictions, 10)

recommends = torch.take(
        item, indices).cpu().numpy().tolist()

In [200]:
movie_info[movie_info['movie_id'].isin(recommends)]

Unnamed: 0,movie_id,name,category
471,474,In the Name of the Father (1993),Drama
847,857,"Godfather, The (1972)",Action|Crime|Drama
1183,1200,"Good, The Bad and The Ugly, The (1966)",Action|Western
2209,2277,Ronin (1998),Action|Crime|Thriller
2334,2402,First Blood (1982),Action
2631,2699,"South Park: Bigger, Longer and Uncut (1999)",Animation|Comedy
3410,3478,Ladyhawke (1985),Adventure|Fantasy|Romance
3509,3577,Gladiator (2000),Action|Drama
3569,3637,Moonraker (1979),Action|Romance|Sci-Fi
3694,3762,F/X (1986),Action|Crime|Thriller


Можно было учить еще и перебирать больше параметров, <font size="0.5">но у меня горел дедлайн (да, даже продленный, тайм-менеджмент это не мое)</font>

# Attention

In [120]:
from torch.nn import MultiheadAttention

In [121]:
import torch
import torch.nn as nn
import torch.nn.functional as F 


class Attention(nn.Module):
    def __init__(self, user_num, item_num, factor_num, num_layers,
                    dropout):
        super().__init__()   
        self.dropout = dropout

        self.embed_user_GMF = nn.Embedding(user_num, factor_num)
        self.embed_item_GMF = nn.Embedding(item_num, factor_num)

        MLP_modules = []
        
        self.mh = MultiheadAttention(factor_num*4, num_heads=4, kdim=factor_num, vdim=factor_num)

 
        self.mlp= nn.Linear(512,256)

        self.predict_layer = nn.Linear(256, 1)

    def forward(self, user, item):
        embed_user_GMF = self.embed_user_GMF(user)
        embed_item_GMF = self.embed_item_GMF(item)
        output_GMF = embed_user_GMF * embed_item_GMF
        output_GMF = output_GMF.unsqueeze(0)
        output_mh = self.mh(output_GMF.repeat(1,1,4), output_GMF, output_GMF)[0]
        output_MLP = F.relu(self.mlp(output_mh))

        prediction = self.predict_layer(output_MLP)
        return prediction.view(-1)


In [122]:
model = Attention(user_item.shape[0], user_item.shape[1], 128, 4, 0.)

In [123]:
model.cuda()
loss_function = nn.BCEWithLogitsLoss()

In [124]:
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [125]:
import numpy as np

def hit(gt_item, pred_items):
    if gt_item in pred_items:
        return 1
    return 0


def ndcg(gt_item, pred_items):
    if gt_item in pred_items:
        index = pred_items.index(gt_item)
        return np.reciprocal(np.log2(index+2))
    return 0


def metrics(model, test_loader, top_k):
    HR, NDCG = [], []

    for user, item, label in test_loader:
        user = user.cuda()
        item = item.cuda()
        with torch.no_grad():
            predictions = model(user, item)
        _, indices = torch.topk(predictions, top_k)
        recommends = torch.take(
                item, indices).cpu().numpy().tolist()

        gt_item = item[0].item()
        HR.append(hit(gt_item, recommends))
        NDCG.append(ndcg(gt_item, recommends))
    return np.mean(HR), np.mean(NDCG)


In [126]:
from tqdm.notebook import tqdm
for epoch in range(5):
    model.train()
    for user, item, label in tqdm(train_loader):
        user = user.cuda()
        item = item.cuda()
        label = label.float().cuda()

        model.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction, label)
        loss.backward()
        optimizer.step()
    print(loss.item())
    model.eval()
    HR, NDCG = metrics(model, test_loader, 10)
    train_loader.dataset.neg_resample()
    print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.4276605248451233
HR: 0.533	NDCG: 0.289


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.3467268645763397
HR: 0.640	NDCG: 0.385


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.3348104953765869
HR: 0.642	NDCG: 0.401


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.3202703595161438
HR: 0.650	NDCG: 0.403


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=388.0), HTML(value='')))


0.33680275082588196
HR: 0.647	NDCG: 0.407


In [135]:
get_similars_items = lambda item_id, sim : [movie_info[movie_info["movie_id"] == x]["name"].to_string() 
                                        for x in reversed(np.argsort(sim[item_id,:])[-15:])]

get_similars_items(25, cosine_similarity(model.embed_item_GMF.weight.detach().cpu().numpy()))

['25    Othello (1995)',
 '1311    Audrey Rose (1977)',
 '1014    Robin Hood: Prince of Thieves (1991)',
 '2341    Rocky III (1982)',
 '1798    Tarzan and the Lost City (1998)',
 '319    Swimming with Sharks (1995)',
 '1972    Condorman (1981)',
 '609    Jane Eyre (1996)',
 '3271    Bride of the Monster (1956)',
 '268    Losing Isaiah (1995)',
 '1514    Late Bloomers (1996)',
 '2912    Brother, Can You Spare a Dime? (1975)',
 '3000    Effect of Gamma Rays on Man-in-the-Moon Marigo...',
 '314    Santa Clause, The (1994)',
 '1585    Locusts, The (1997)']

In [142]:
it = iter(test_loader)

In [146]:
next(it)

[tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3]),
 tensor([ 259, 1444, 1588, 2078, 3837, 1958, 2236, 1436, 2209, 2674, 3160, 3458,
         3666, 2354,  528, 3673, 2773, 2074, 1348, 1583, 3227, 3936, 2597, 3216,
         1346,  258, 2730, 2075,  616,  424, 1410, 3252,  235,  626, 1090, 2361,
         1488,   22, 1074,  979,  813, 1292, 2024, 1435,  414, 1404, 1196, 1390,
         3369, 2270, 3450, 2877, 3373,  734, 2968, 3717, 3537, 2505,  742, 3830,
         2447, 1534, 1705, 3166, 2593, 3454, 2227, 2170, 1771, 2118, 3092,  622,
         3275, 3518, 3781,  252,  393, 2305, 1397, 3870,  664, 3836, 1595, 2449,
         3355, 2278, 1015, 1368, 1978,  247, 2398, 1246, 3339, 1010, 3778,  208,
     

In [147]:
user, item, label = next(it)

In [148]:
user = user.cuda()
item = item.cuda()

with torch.no_grad():
    predictions = model(user, item)

In [149]:
_, indices = torch.topk(predictions, 10)

recommends = torch.take(
        item, indices).cpu().numpy().tolist()

In [150]:
movie_info[movie_info['movie_id'].isin(recommends)]

Unnamed: 0,movie_id,name,category
534,537,Six Degrees of Separation (1993),Drama
893,904,It Happened One Night (1934),Comedy
1190,1207,Apocalypse Now (1979),Drama|War
1194,1211,"Third Man, The (1949)",Mystery|Thriller
1201,1218,Psycho (1960),Horror|Thriller
1239,1258,Stand by Me (1986),Adventure|Comedy|Drama
1265,1284,Heathers (1989),Comedy
1893,1961,Driving Miss Daisy (1989),Drama
2297,2365,King Kong (1933),Action|Adventure|Horror
3141,3209,Fast Times at Ridgemont High (1982),Comedy


Рекомендации получились не очень. Хотя метрики норм. Возможно тут надо немного по-другому делать предсказания