# This notebook the training process is defined and executed

In [None]:
from typing import Callable, Any

import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import os
import numpy as np
import scipy
from scipy.sparse import csr_matrix
from pathlib import Path
from torch.utils.data import DataLoader
export_dir = os.getcwd()
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import plot
import random
import math
import heapq
from scipy.special import expit  # Sigmoid function
import itertools
from IPython.display import Latex, display
import pickle
import warnings

# Ignore FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

# pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
torch.set_printoptions(sci_mode=False)

test_flag = 1

In [None]:
pip install ipynb

In [None]:
from ipynb.fs.defs.utils import *
from ipynb.fs.defs.data_processing import *
from ipynb.fs.defs.models import *

## SAE Architecture for MF Latent Concepts

Load the MF embedddings for users and items- input to SAE

In [None]:
# Load the dataset from the .csv file
df_item_emb_mf = pd.read_csv(Path(export_dir,'res_csv/ML1M/mf/items_embeddings_mf_model.csv'))
df_user_emb_mf = pd.read_csv(Path(export_dir,'res_csv/ML1M/mf/users_embeddings_mf_model.csv'))
df_b_i_emb_mf = pd.read_csv(Path(export_dir,'res_csv/ML1M/mf/b_i_mf_model.csv'))
df_b_u_emb_mf = pd.read_csv(Path(export_dir,'res_csv/ML1M/mf/b_u_mf_model.csv'))


# CONVERT TO TENSORS
dataset_items_mf = torch.tensor(df_item_emb_mf.values, dtype=torch.float32)
dataset_users_mf = torch.tensor(df_user_emb_mf.values, dtype=torch.float32)
dataset_bu_mf = torch.tensor(df_b_u_emb_mf.values, dtype=torch.float32)
dataset_bi_mf = torch.tensor(df_b_i_emb_mf.values, dtype=torch.float32)


interaction_embeddings = dataset_users_mf

## Test set sampling:

In [None]:
#uniform sampling:
test_subset_users = random.sample(list(df_user_emb_mf.index), k=math.floor(df_user_emb_mf.shape[0]*0.2))
test_subset_items = random.sample(list(df_item_emb_mf.index), k=math.floor(df_item_emb_mf.shape[0]*0.2))

test_users_num = len(test_subset_users)

In [None]:
train_subset_users = [i for i in df_user_emb_mf.index if i not in test_subset_users]
interaction_embeddings = dataset_users[train_subset_users]

In [None]:
def autoencoder_loss(
    user: torch.Tensor,
    user_data: torch.Tensor,
    item: torch.Tensor,
    item_data: torch.Tensor,
    user_recons: torch.Tensor,
    item_recons: torch.Tensor,
    latent_activations_item: torch.Tensor,
    latent_activations_user: torch.Tensor,
    epochs_progress: float,
    l1_weight: float=0.01,
    kl_weight: float=0,
    mse_weight = 0.1,
    sparsity_target=0.05,
    prediction_level_weight: float=0.0
    )  -> int:


    # prediction level loss term
    b_i_mat =(dataset_bi_mf[item,:].T).repeat(len(user), 1)
    b_u_mat =(dataset_bu_mf[user,:]).repeat( 1, len(item))
    inner_orig = b_i_mat + b_u_mat + torch.from_numpy(user_data.dot(item_data.T))
    inner_recons = b_i_mat + b_u_mat + torch.matmul(user_recons,item_recons.T)
    prediction_level_loss = F.mse_loss(inner_recons, inner_orig)

    # L2 reconstruction_loss term
    l2_reconstruction_loss = F.mse_loss(user_recons, torch.from_numpy(user_data)) + F.mse_loss(item_recons, torch.from_numpy(item_data))

    #---------------------------------

    ## L1 sparsity loss term
    l1_sparsity_loss_item = F.l1_loss(latent_activations_item, torch.zeros_like(latent_activations_item))
    l1_sparsity_loss_user = F.l1_loss(latent_activations_user, torch.zeros_like(latent_activations_user))
    l1_sparsity_loss = l1_sparsity_loss_item + l1_sparsity_loss_user


    # Compute KL divergence sparsity loss term
    kl_loss_user = kl_divergence_loss(latent_activations_user, sparsity_target)
    kl_loss_item = kl_divergence_loss(latent_activations_item, sparsity_target)
    kl_loss = kl_loss_user + kl_loss_item


    # # Combine losses
    if kl_weight ==0:
      total_loss =  prediction_level_weight*prediction_level_loss +  mse_weight* l2_reconstruction_loss + l1_weight* l1_sparsity_loss
    else: total_loss =  prediction_level_weight*prediction_level_loss+  mse_weight* l2_reconstruction_loss + kl_weight * kl_loss + l1_weight* l1_sparsity_loss

    return total_loss

In [None]:
def train_autoencoder(dataset_users, dataset_items, test_data, input_dim, latent_dim,datset_hist=dataset_MF_SAE, num_epochs=18, batch_size=256, learning_rate=1e-3):
    # autoencoder = Autoencoder(latent_dim, input_dim, activation=TopK(k=8), tied=True, normalize = True)
    autoencoder = Autoencoder(latent_dim, input_dim, activation=nn.ReLU(), tied=True, normalize = True)
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=learning_rate)


    # --- Create DataLoader for the user data
    user_indices = torch.arange(dataset_users.shape[0]).unsqueeze(1)
    dataset_users_wind = torch.cat((user_indices, dataset_users), dim=1)
    dataloader_users = DataLoader(dataset_users_wind, batch_size=batch_size, shuffle=True, drop_last=True)

    # --- Create DataLoader for the item data
    item_indices = torch.tensor(df_item_emb.index.tolist()).unsqueeze(1)
    dataset_items_wind = torch.cat((item_indices, dataset_items), dim=1)
    dataloader_items = DataLoader(dataset_items_wind, batch_size=batch_size, shuffle=True, drop_last=True)



    for epoch in range(num_epochs):
        batch = 0
        for data_users, data_items in zip(dataloader_users, itertools.cycle(dataloader_items)):


            # data_users: shape [batch_size, 1 + input_dim]
            user_idx  = data_users[:, 0].long()           # user indices, tensor
            input_tensor_users = data_users[:, 1:]        # the actual features, tensor
            normalized_users = normalize_matrix(input_tensor_users) # numpy.ndarray
            input_tensor_users = pad_or_truncate_tensor(normalized_users, input_dim) # numpy.ndarray

            latents_pre_act_usrs, latents_usrs, user_recons = autoencoder(input_tensor_users)


            # data_items: shape [batch_size, 1 + input_dim]
            item_idx = data_items[:, 0].long()
            input_tensor_items = data_items[:, 1:]
            normalized_items = normalize_matrix(input_tensor_items)
            input_tensor_items = pad_or_truncate_tensor(normalized_items, input_dim)

            latents_pre_act_items, latents_items, item_recons = autoencoder(input_tensor_items)

            ephochs_progress = epoch/num_epochs
            loss = autoencoder_loss(user_idx, input_tensor_users,item_idx, input_tensor_items,
                    user_recons, item_recons,latents_items,latents_usrs,ephochs_progress,mse_weight=0.1,
                                    l1_weight=0,kl_weight =0.01,prediction_level_weight = 1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch+=1



        (autoencoder.loss).append(loss.item())
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

    return autoencoder


with our training dataset

In [None]:
def train_autoencoder(dataset_users, dataset_items, test_data, test_subset_users, datset_hist= dataset_MF_SAE, input_dim, num_epochs=18 batch_size=256, learning_rate=1e-3):
    # autoencoder = Autoencoder(latent_dim, input_dim, activation=TopK(k=8), tied=True, normalize = True)
    autoencoder = Autoencoder(latent_dim, input_dim, activation=nn.ReLU(), tied=True, normalize = True)
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=learning_rate)



    for epoch in range(num_epochs):
        # batch = 0
        # len(datset_hist) - number of epochs
        # len(datset_hist[0]) - number of batches at each epoch
        for batch in range(len(datset_hist[epoch])):

        # for data_users, data_items in zip(dataloader_users, itertools.cycle(dataloader_items)):
            if test_flag == 1:
              user_idx = [t[0] for t in datset_hist[epoch][batch] if t[0] not in test_subset_users]
            else:
              user_idx = [t[0] for t in datset_hist[epoch][batch]]

            # if epoch == 0 and  len(user_idx)== 256:
            #   users_in_batch.iloc[:,batch] = user_idx

            # Get the second element of each tuple
            item_idx = [t[1] for t in datset_hist[epoch][batch]]

            # data_users: shape [batch_size, 1 + input_dim]
            # user_idx  = data_users[:, 0].long()           # user indices, tensor
            input_tensor_users = dataset_users[user_idx,:]
            normalized_users = normalize_matrix(input_tensor_users) # numpy.ndarray
            input_tensor_users = pad_or_truncate_tensor(normalized_users, input_dim) # numpy.ndarray
            # print(input_tensor_users.shape)
            latents_pre_act_usrs, latents_usrs, user_recons = autoencoder(input_tensor_users)

            # data_items: shape [batch_size, 1 + input_dim]
            # item_idx = data_items[:, 0].long()
            input_tensor_items = dataset_items[item_idx, :]
            normalized_items = normalize_matrix(input_tensor_items)
            input_tensor_items = pad_or_truncate_tensor(normalized_items, input_dim)

            latents_pre_act_items, latents_items, item_recons = autoencoder(input_tensor_items)

            ephochs_progress = epoch/num_epochs
            loss = autoencoder_loss(user_idx, input_tensor_users,item_idx, input_tensor_items,
                    user_recons, item_recons,latents_items,latents_usrs,ephochs_progress,mse_weight=0.1,
                                    l1_weight=0,kl_weight =0.01,inner_product_weight = 2)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


        (autoencoder.loss).append(loss.item())
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

    return autoencoder


## Start Training

In [None]:
test_flag=1
autoencoder=train_autoencoder(interaction_embeddings, dataset_items, dataset_users[test_subset_users], dataset_MF_SAE, input_dim=dataset_users.shape[1], latent_dim=22)

## Save Model

In [None]:
model_name = 'your_SAE_model_name'

In [None]:
# torch.save(autoencoder, Path(export_dir,f'models/ML1M/{model_name}.pth'))

-

## SAE Architecture for NCF Latent Concepts

load NCF recommender

In [None]:
# Initialize the model.
model = NeuralCollaborativeFiltering(num_users=6039, num_items=3706,
                                      embedding_dim=20, hidden_layers=[64, 32, 16])

In [None]:
# model = torch.load(Path(export_dir,'models/ML1M/NCF_recommender.pth'), weights_only=False)

# # pos_idx_ex_use = model.pos_idx_ex_use
# # pos_idx_ex_hidden = {(row): [item for item in pos_ex[row] if item not in pos_idx_ex_use[row]] for row in ratings_matrix.index}

# model.eval()

SAE training:

In [None]:
for param in model.parameters():
    param.requires_grad = False

def train_sparse_autoencoder(model, sae_model,dataload , user_embed, item_embed,dataset_hist=dataset_NCF_SAE, epochs=18, lr=0.001, device='cpu',kl_weight, l2_weight, l1_weight=0, prediction_level_weight = 1):
    model.to(device)
    sae_model.to(device)
    optimizer = optim.Adam(sae_model.parameters(), lr=lr)
    mse_loss = nn.MSELoss()
    ce_loss = nn.BCELoss()


    # Set the fixed model to evaluation mode
    model.eval()


    for epoch in range(epochs):
        total_loss = 0.0
        batch = 0

        samples_num = 0
        for batch in range(len(dataset_hist[epoch])):

            if test_flag == 1:
              user = [t[0] for t in dataset_hist[epoch][batch] if t[0] not in test_subset_users and t[2]==1]
            else:
              user = [t[0] for t in dataset_hist[epoch][batch]]


            #Get the second element of each tuple
            if test_flag == 1:
              item = [t[1] for t in dataset_hist[epoch][batch] if t[0] not in test_subset_users and t[2]==1]
            else:
              item = [t[1] for t in dataset_hist[epoch][batch]]


            if test_flag == 1:
              real_ratings = [t[2] for t in dataset_hist[epoch][batch] if t[0] not in test_subset_users and t[2]==1]
            else:
              real_ratings = [t[2] for t in dataset_hist[epoch][batch]]


            user = torch.tensor(user, dtype=torch.long)
            item = torch.tensor(item, dtype=torch.long)
            real_ratings = torch.tensor(real_ratings, dtype=torch.float)

            user = user.to(device)
            item = item.to(device)
            real_ratings = real_ratings.to(device)

            # ---------------------------
            # (a) Forward pass through fixed model with original embeddings:
            user_emb = user_embed[user]   
            item_emb = item_embed[item]  
            x_full = torch.cat([user_emb, item_emb], dim=-1)  
            y = model.fc_layers(x_full)               # (batch, 1)
            y = y.squeeze()                           # (batch,)
            # ---------------------------
            # (b) Forward pass through SAE for each embedding individually:
            user_rec, user_encoded = sae_model(user_emb)  
            item_rec, item_encoded = sae_model(item_emb)  


            # Compute reconstruction loss for each embedding.
            l2_reconstruction_loss_user = mse_loss(user_rec, user_emb)
            l2_reconstruction_loss_item = mse_loss(item_rec, item_emb)
            l2_reconstruction_loss = l2_reconstruction_loss_user + l2_reconstruction_loss_item
            # Sparsity penalty (L1 norm of the encoded activations)
            l1_sparsity_loss_user = torch.mean(torch.abs(user_encoded))
            l1_sparsity_loss_item = torch.mean(torch.abs(item_encoded))
            l1_sparsity_loss = l1_sparsity_loss_user + l1_sparsity_loss_item


            kl_loss_user = kl_divergence_loss(user_encoded)
            kl_loss_item = kl_divergence_loss(item_encoded)
            kl_loss =  (kl_loss_user + kl_loss_item)

            # ---------------------------
            # (c) Pass reconstructed embeddings through fixed model:
            x_hat = torch.cat([user_rec, item_rec], dim=-1)  
            y_hat = model.fc_layers(x_hat)  # (batch, 1)
            y_hat = y_hat.squeeze()         # (batch,)



            # prediction_level_loss difference between original output and reconstructed output.
            prediction_level_loss = ce_loss(y_hat, y)
            #------------------------------
            recons_NCF_loss = ce_loss(y_hat, real_ratings)
            #------------------------------

            # Total loss:
            loss = l2_weight*l2_reconstruction_loss + prediction_level_weight * prediction_level_loss +  l1_weight * l1_sparsity_loss + kl_weight * kl_loss

            optimizer.zero_grad()
            loss.backward()  # Gradients flow from the fixed network (frozen) through SAE.
            optimizer.step()
            samples_num += len(real_ratings)

            total_loss += loss.item() *  len(real_ratings) 

        avg_loss = total_loss / samples_num
        (sae_model.loss).append(avg_loss)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

# =============================================================================


In [None]:
test_flag=1
sae_model = SparseAutoencoderNCF(input_dim=20, hidden_dim=22, topk=7, tie_weights=True)
train_sparse_autoencoder(model, sae_model, batch_dataset, user_embeddings, item_embeddings,dataset_NCF_SAE, epochs=18, lr=0.001, device='cpu',kl_weight=0.07,l2_weight=2, l1_weight=0, prediction_level_weight=10)
sae_model.eval()

In [None]:
model_name = 'your_SAE_model_name'

In [None]:
# torch.save(autoencoder, Path(export_dir,f'models/ML1M/{model_name}.pth'))

-

## MF recommender

In [None]:
def train(mf_recommender, batch_size=256):
    """
    Trains the model using mini-batch SGD.

    """

    for epoch in range(mf_recommender.iterations):
        start_time = time.time()

        # Re-sample negatives and build the one-hot matrix using class dimensions
        neg_ex_unif = {
            row: random.sample(mf_recommender.R.columns[mf_recommender.R.iloc[row] == 0].tolist(), mf_recommender.pos_ex_num[row])
            for row in mf_recommender.R.index if mf_recommender.pos_ex_num[row] < mf_recommender.R.shape[1] / 2
        }
        neg_ex_popularity = {
            row: list(np.random.choice(mf_recommender.neg_ex_use[row],
              size=len(mf_recommender.pos_idx_ex_use[row]), replace=False, p=norm_prob_neg_use_exp[row]))
            for row in mf_recommender.R.index
        }
        neg_idx_ex_use = neg_ex_popularity

        # Build one-hot DataFrame with entries: 1 for positive, 0 for negatives, -1 otherwise
        mf_recommender.df_1hot = pd.DataFrame(-1, index=mf_recommender.R.index, columns=mf_recommender.R.columns)
        for row in range(mf_recommender.num_users):
            mf_recommender.df_1hot.loc[row, mf_recommender.neg_idx_ex_use[row]] = 0
            mf_recommender.df_1hot.loc[row, mf_recommender.pos_idx_ex_use[row]] = 1

        # Create list of training samples: (user index, item index, rating, item id)
        mf_recommender.samples = [
            (i, j, mf_recommender.df_1hot.iloc[i, j], mf_recommender.df_1hot.columns[j])
            for i in range(mf_recommender.num_users)
            for j in range(mf_recommender.num_items)
            if mf_recommender.df_1hot.iloc[i, j] != -1
        ]


        # Shuffle the samples for mini-batch creation
        random.shuffle(mf_recommender.samples)

        # Process samples in mini-batches
        for start in range(0, len(mf_recommender.samples), batch_size):
            batch_samples = mf_recommender.samples[start:start+batch_size]
            mf_recommender.sgd_batch(batch_samples)


        rmse_val = mf_recommender.rmse()
        print(f"Epoch {epoch+1}; RMSE: {rmse_val:.4f}; epoch time: {time.time()-start_time:.2f}s")

-

In [None]:
mf_recommender = MatrixFactorization(ratings_matrix, pos_idx_ex_use,neg_idx_ex_use,neg_ex_hidden, neg_ex, pos_ex_num, K=20, alpha=0.05, beta=0.01, iterations=10, pop_flag = 1)

start_time = time.time()
mf_recommender.train(batch_size=256)
print("--- %s seconds ---" % (time.time() - start_time))


In [None]:
model_name = 'your_recommender_name'

In [None]:
# with open(Path(export_dir,f'models/MF_model_{model_name}'), 'wb') as file:
#     pickle.dump(mf_recommender, file)


df_b_i= pd.DataFrame(mf_recommender.b_i)
df_b_u= pd.DataFrame(mf_recommender.b_u)
df_b_i.index = ratings_matrix.columns
df_b_u.index = ratings_matrix.index


df_b_i.to_csv(Path(export_dir,f'res_csv/b_i_{model_name}.csv'), index=False)
# df_b_i.to_csv(Path(export_dir,f'res_csv/b_i_{model_name}_Windex.csv'))

df_b_u.to_csv(Path(export_dir,f'res_csv/b_u_{model_name}.csv'), index=False)
# df_b_u.to_csv(Path(export_dir,f'res_csv/b_u_{model_name}_Windex.csv'))

pos_idx_ex_hidden_df= pd.DataFrame.from_dict(mf_recommender.pos_idx_ex_hidden, orient='index')
pos_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/test_items_{model_name}.csv'), index = False)

neg_idx_ex_hidden_df= pd.DataFrame.from_dict(mf_recommender.neg_ex_hidden, orient='index')
neg_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/neg_test_items_{model_name}.csv'), index = False)


df_P= pd.DataFrame(mf_recommender.P)
df_P.to_csv(Path(export_dir,f'res_csv/users_embeddings_{model_name}.csv'), index = False)


df_Q= pd.DataFrame(mf_recommender.Q)
df_Q.to_csv(Path(export_dir,f'res_csv/items_embeddings_{model_name}.csv'), index = False)


-

-

## NCF recommender

In [None]:
# Training loop for the model.
def train_model(model, ratings, epochs=30, dataset_hist = dataset_NCF, lr=0.05, device='cpu'):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    model.train()


    for epoch in range(epochs): # - number of epochs
        epoch_loss = 0.0
        start_time1 = time.time()


        # in order to use our saved batch cstart commenr out here
        neg_ex_unif = {(row): random.sample(ratings_matrix.columns
            [ratings_matrix.iloc[row] == 0].tolist(),pos_ex_num[row]) for row in
            ratings_matrix.index if model.pos_ex_num[row]<ratings_matrix.shape[1]/2}

        # test set with neg and pos examples, distributed wrt popularity:
        neg_ex_popularity = {(row): list(np.random.choice(neg_ex_use[row],
                size=len(model.pos_idx_ex_use[row]),replace=False, p=norm_prob_neg_use_exp[row]))
                    for row in ratings_matrix.index}
        # change wrt unif/pop:
        neg_idx_ex_use = neg_ex_popularity

        df_1hot = pd.DataFrame(-1, index=ratings_matrix.index, columns=ratings_matrix.columns)
        for row in range(ratings.shape[0]):
          df_1hot.loc[row,model.neg_idx_ex_use[row]]=0
          df_1hot.loc[row,model.pos_idx_ex_use[row]]=1

        interactions = [(i, j, df_1hot.iloc[i,j], df_1hot.columns[j]) for
            i in range(ratings.shape[0]) for j in range(ratings.shape[1]) if df_1hot.iloc[i,j]!=-1]

        dataset = InteractionDataset(interactions)
        dataloader = DataLoader(dataset, batch_size=256, shuffle=True)

        counter=0
        for user, item, rating, item_real_num in dataloader:
            user, item, rating, item_real_num = user.to(device), item.to(device), rating.to(device), item_real_num.to(device)

            optimizer.zero_grad()
            prediction = model(user, item)
            loss = criterion(prediction, rating)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * user.size(0)

        avg_loss = epoch_loss / len(dataloader.dataset) # num of batches
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, time: {time.time() - start_time1}")




use our saved training dataset:

In [None]:
# Training loop for the model.
def train_model(model, ratings, epochs=30, dataset_hist = dataset_NCF, lr=0.05, device='cpu'):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    model.train()


    for epoch in range(len(dataset_hist)): # - number of epochs
        epoch_loss = 0.0
        start_time1 = time.time()

        #Load out Batch
        for batch in range(len(dataset_hist[epoch])):
            user = torch.tensor([t[0] for t in dataset_hist[epoch][batch]], dtype=torch.long)
            item = torch.tensor([t[1] for t in dataset_hist[epoch][batch]], dtype=torch.long)
            rating= torch.tensor([t[2] for t in dataset_hist[epoch][batch]], dtype=torch.float)
            item_real_num = torch.tensor([t[3] for t in dataset_hist[epoch][batch]], dtype=torch.long)
            user, item, rating, item_real_num = user.to(device), item.to(device), rating.to(device), item_real_num.to(device)

            optimizer.zero_grad()
            prediction = model(user, item)
            loss = criterion(prediction, rating)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * user.size(0)


        avg_loss = epoch_loss / len(dataset_hist[epoch])  # num of batches
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, time: {time.time() - start_time1}")




trainings:

In [None]:
# Initialize the model.
model = NeuralCollaborativeFiltering(num_users=6039, num_items=3706,
                                      embedding_dim=20, hidden_layers=[64, 32, 16])
# Train the model.
train_model(model, ratings_matrix, epochs=30, dataset_NCF, lr=0.05, device='cpu')


model.eval()

In [None]:
model_name = 'your NCF recommender name'

In [None]:
# torch.save(model, Path(export_dir,'models/ML1M/model_name.pth'))

In [None]:
user_embeddings = model.user_embedding.weight.detach().clone()  # Tensor of shape (num_users, embedding_dim)
item_embeddings = model.item_embedding.weight.detach().clone()  # Tensor of shape (num_items, embedding_dim)


pos_idx_ex_hidden_df= pd.DataFrame.from_dict(model.pos_idx_ex_hidden, orient='index')
pos_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/test_items_{model_name}.csv'), index = False)

neg_idx_ex_hidden_df= pd.DataFrame.from_dict(model.neg_ex_hidden, orient='index')
neg_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/neg_test_items_{model_name}.csv'), index = False)


user_embeddings.to_csv(Path(export_dir,f'res_csv/NCF_{model_name}_user_embeddings.csv'), index = False)
item_embeddings.to_csv(Path(export_dir,f'res_csv/NCF_{model_name}_item_embeddings.csv'), index = False)
