# This notebook the training process is defined and executed

In [None]:
from typing import Callable, Any

import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import os
import numpy as np
import scipy
from scipy.sparse import csr_matrix
from pathlib import Path
from torch.utils.data import DataLoader
export_dir = os.getcwd()
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import plot
import random
import math
import heapq
from scipy.special import expit  # Sigmoid function
import itertools
from IPython.display import Latex, display
import pickle
import warnings
from torch.utils.data import DataLoader
import itertools


# Ignore FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

# pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
torch.set_printoptions(sci_mode=False)

test_flag = 1

In [None]:
pip install ipynb

In [None]:
from ipynb.fs.defs.utils import *
from ipynb.fs.defs.data_processing import *
from ipynb.fs.defs.models import *

## SAE MF

Load the MF embedddings for users and items- input to SAE

In [None]:
# Load the dataset from the .csv file
df_item_emb_mf = pd.read_csv(Path(export_dir,'res_csv/lastFM/mf/items_embeddings_mf_model.csv'))
df_user_emb_mf = pd.read_csv(Path(export_dir,'res_csv/lastFM/mf/users_embeddings_mf_model.csv'))
df_b_i_emb_mf = pd.read_csv(Path(export_dir,'res_csv/lastFM/mf/b_i_mf_model.csv'))
df_b_u_emb_mf = pd.read_csv(Path(export_dir,'res_csv/lastFM/mf/b_u_mf_model.csv'))


# CONVERT TO TENSORS
dataset_items_mf = torch.tensor(df_item_emb_mf.values, dtype=torch.float32)
dataset_users_mf = torch.tensor(df_user_emb_mf.values, dtype=torch.float32)
dataset_bu_mf = torch.tensor(df_b_u_emb_mf.values, dtype=torch.float32)
dataset_bi_mf = torch.tensor(df_b_i_emb_mf.values, dtype=torch.float32)


interaction_embeddings = dataset_users_mf

## Test set sampling:

In [None]:
#uniform sampling:
test_subset_users = random.sample(list(df_user_emb_mf.index), k=math.floor(df_user_emb_mf.shape[0]*0.2))
test_subset_items = random.sample(list(df_item_emb_mf.index), k=math.floor(df_item_emb_mf.shape[0]*0.2))

test_users_num = len(test_subset_users)

In [None]:
train_subset_users = [i for i in df_user_emb_mf.index if i not in test_subset_users]
interaction_embeddings = dataset_users[train_subset_users]

In [None]:
def autoencoder_loss(
    user: torch.Tensor,
    user_data:torch.Tensor,
    item: torch.Tensor,
    item_data: torch.Tensor,
    user_recons:torch.Tensor,
    item_recons: torch.Tensor,
    latent_activations_user: torch.Tensor,
    latent_activations_item: torch.Tensor,
    epochs_progress: float,
    l1_weight: float=1.2,
    kl_weight: float=0.7,
    mse_weight = 2.6,
    inner_product_weight: float=1.0,
    sparsity_target=0.1
    )  -> int:

    inner_orig = user_data@(item_data.T)
    inner_recons = torch.matmul(user_recons,item_recons.T) 

    # # Compute L1 sparsity loss
    sparsity_loss_item = F.l1_loss(latent_activations_item, torch.zeros_like(latent_activations_item))
    sparsity_loss_user = F.l1_loss(latent_activations_user, torch.zeros_like(latent_activations_user))

    # Compute KL divergence sparsity loss
    kl_loss_user = kl_divergence_loss(latent_activations_user, sparsity_target)
    kl_loss_item = kl_divergence_loss(latent_activations_item, sparsity_target)

    reconstruction_loss = F.mse_loss(user_recons, user_data) + F.mse_loss(item_recons, item_data)

    if kl_weight ==0:
      total_loss =  mse_weight* reconstruction_loss+ l1_weight* (sparsity_loss_item+sparsity_loss_user)+inner_product_weight*F.mse_loss(inner_recons, inner_orig)
    else: total_loss = mse_weight* reconstruction_loss + kl_weight * (kl_loss_item+kl_loss_user)+ l1_weight* (sparsity_loss_item+sparsity_loss_user)+ inner_product_weight*F.mse_loss(inner_recons, inner_orig)

    return total_loss, reconstruction_loss,(mse_weight* reconstruction_loss).detach(), (l1_weight* (sparsity_loss_item+sparsity_loss_user)).detach(), (kl_weight * (kl_loss_item+kl_loss_user)).detach(), (inner_product_weight*F.mse_loss(inner_recons, inner_orig)).detach()

In [None]:
def train_autoencoder(dataset_users, dataset_items, test_data, input_dim, latent_dim, num_epochs=30, batch_size=256, learning_rate=1e-3, mse_weight=8, l1_weight=0.3, kl_weight =0.003,  inner_product_weight = 1):
    autoencoder = Autoencoder(latent_dim,input_dim, activation=nn.ReLU(), tied=True, normalize = True)
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=learning_rate)


    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False

    g = torch.Generator().manual_seed(42)
    g_items = torch.Generator().manual_seed(42)

    # # --- Create DataLoader for the user data
    user_indices = torch.arange(dataset_users.shape[0]).unsqueeze(1)
    dataset_users_wind = torch.cat((user_indices, dataset_users), dim=1)
    # dataset_users_wind shape: [num_users, 1 + input_dim]
    dataloader_users = DataLoader(dataset_users_wind, batch_size=batch_size, shuffle=True, drop_last=True, generator = g)

    # Create DataLoader for the item data
    item_indices = torch.tensor(df_item_emb_mf.index).unsqueeze(1)
    dataset_items_wind = torch.cat((item_indices, dataset_items), dim=1)
    dataloader_items = DataLoader(dataset_items_wind, batch_size=256, shuffle=True, drop_last=True, generator = g_items)


    for epoch in range(num_epochs):
        batch = 0
        temp_activation_rates = []
        print(f'epoch {epoch}')
        n = 0
        recon_sum = l1_sum = kl_sum = inner_sum = 0.0
        if epoch !=0:
          dataloader_items = DataLoader(dataset_items_wind, batch_size=256, shuffle=True, drop_last=True)
        for data_users, data_items in zip(dataloader_users, itertools.cycle(dataloader_items)):

            # # --------------------------
            # # 1) Process user batch
            # # --------------------------
            # # data_users: shape [batch_size, 1 + input_dim]
            user_idx  = data_users[:, 0].long()           # user indices, tensor
            input_tensor_users = data_users[:, 1:]        # the actual features, tensor
            normalized_users = normalize_matrix(input_tensor_users) # numpy.ndarray
            input_tensor_users = pad_or_truncate_tensor(normalized_users, input_dim) # numpy.ndarray

            latents_pre_act_usrs, latents_usrs, user_recons = autoencoder(input_tensor_users)

            # --------------------------
            # 2) Process item batch
            # --------------------------
            # data_items: shape [batch_size, 1 + input_dim]
            item_idx = data_items[:, 0].long()
            input_tensor_items = data_items[:, 1:]
            normalized_items = normalize_matrix(input_tensor_items)
            input_tensor_items = pad_or_truncate_tensor(normalized_items, input_dim)

            latents_pre_act_items, latents_items, item_recons = autoencoder(input_tensor_items)

            input_tensor_users = torch.tensor(input_tensor_users, dtype=torch.float32)
            input_tensor_items = torch.tensor(input_tensor_items, dtype=torch.float32)



            ## sparsity test:
            activation_mask = (latents_items > 1e-6).float()
            activation_rate = activation_mask.mean().item()
            temp_activation_rates.append(activation_rate)
            ##

            ephochs_progress = epoch/num_epochs
        
            ##------------gradual prediction aware loss-----------------
            # if we put ephochs_progress == -1, than the losss function will ignore gradual weights selection
            # inner_product_weight = beta_schedule(epoch, num_epochs, beta_start=0.0, beta_end=1.0, warmup=5)
            loss,reconstruction_loss, recon, l1c, klc, inner = autoencoder_loss(user_idx, input_tensor_users,item_idx, input_tensor_items,
                    user_recons, item_recons,latents_usrs,latents_items,ephochs_progress,mse_weight=mse_weight,l1_weight=l1_weight, kl_weight =kl_weight,inner_product_weight = inner_product_weight
                                    )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            recon_sum += recon.item(); l1_sum += l1c.item()
            kl_sum += klc.item(); inner_sum += inner.item()
            n += 1
            batch+=1


        # print(f"Epoch {epoch}: recon={recon_sum/n:.4f} {reconstruction_loss} l1={l1_sum/n:.4f} kl={kl_sum/n:.4f} inner={inner_sum/n:.4f}")

        autoencoder.activation_rate[epoch] = temp_activation_rates
        (autoencoder.loss).append(loss.item())
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

    return autoencoder


## Start Training

In [None]:
test_flag=1
autoencoder=train_autoencoder(interaction_embeddings, dataset_items,dataset_users[test_subset_users], input_dim=dataset_users.shape[1], latent_dim=70)

## Save Model

In [None]:
model_name = 'your_SAE_model_name'

In [None]:
# torch.save(autoencoder, Path(export_dir,f'models/lastFM/{model_name}.pth'))

-

## SAE NCF

load NCF recommender

In [None]:
# Initialize the model.
model = NeuralCollaborativeFiltering(num_users=USERS_lastFM, num_items=ITEMS_lastFM,
                                      embedding_dim=100, hidden_layers=[64, 32, 16])

load existing model of NCF recommender

In [None]:
model = torch.load(Path(export_dir,'models/lastFM/NCF_recommender.pth'), weights_only=False)

# pos_idx_ex_use = model.pos_idx_ex_use
# pos_idx_ex_hidden = {(row): [item for item in pos_ex[row] if item not in pos_idx_ex_use[row]] for row in ratings_matrix.index}

model.eval()

In [None]:
user_embeddings = model.user_embedding.weight.detach().clone()  # Tensor of shape (num_users, embedding_dim)
item_embeddings = model.item_embedding.weight.detach().clone() 

In [None]:
df_user_embeddings = pd.read_csv(Path(export_dir,'rec_csv/lastFM/ncf/users_embeddings_ncf_.csv'))
user_embeddings = torch.tensor(df_user_embeddings.values, dtype=torch.float32)

df_item_embeddings = pd.read_csv(Path(export_dir,'rec_csv/lastFM/ncf/items_embeddings_ncf_.csv'))
item_embeddings = torch.tensor(df_item_embeddings.values, dtype=torch.float32)

df_item_emb1 = df_item_embeddings.copy()
df_item_emb1.index = user_artist_matrix.columns

dataset_items = torch.tensor(df_item_embeddings.values, dtype=torch.float32)
dataset_users = torch.tensor(df_user_embeddings.values, dtype=torch.float32)

SAE training:

In [None]:
# =============================================================================
# 4. SAE Training Loop
def train_sparse_autoencoder(model, sae_model, user_embed, item_embed, batch_size = 256, epochs=30, lr=0.001, device='cpu',mse_weight=2.6, sparsity_weight=1.2674255898937214e-05,kl_weight=0.7 , output_loss_weight = 1):
    model.to(device)
    sae_model.to(device)
    optimizer = optim.Adam(sae_model.parameters(), lr=lr)
    mse_loss = nn.MSELoss()

    # Set the fixed model to evaluation mode
    model.eval()

    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # for full determinism (might slow down training):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    g = torch.Generator().manual_seed(42)
    g_items = torch.Generator().manual_seed(42)

    user_indices = torch.arange(user_embed.shape[0]).unsqueeze(1)
    user_indices = [ind for ind in user_indices if ind not in sae_model.test_subset_users_ind]
    dataloader_users = DataLoader(user_indices, batch_size=batch_size, shuffle=True, drop_last=True,generator = g)

    # - Create DataLoader for the item data
    dataloader_items = DataLoader(range(item_embed.shape[0]), batch_size=batch_size, shuffle=True, drop_last=True, generator = g_items)



    for epoch in range(epochs):

        total_loss = 0.0
        batch = 0
        samples_num = 0
        # for user, item in dataloader:
        # if epoch == 0:
        for user, item in zip(dataloader_users, itertools.cycle(dataloader_items)):

            user = torch.tensor(user, dtype=torch.long)
            item = torch.tensor(item, dtype=torch.long)
            user = user.squeeze()
            item = item.squeeze()
            real_ratings = user_artist_matrix_tensor[user, item].float()


            # Forward pass through fixed model with original embeddings:
            user_emb = user_embed[user]  
            item_emb = item_embed[item]  
            x_full = torch.cat([user_emb, item_emb], dim=-1)  # (batch, 200)

            y = model.fc_layers(x_full)               # (batch, 1)
            y = y.squeeze(-1)                           # (batch,)
           
            # ---------------------------
            #  Forward pass through SAE for each embedding individually:
            user_rec, user_encoded = sae_model(user_emb)  
            item_rec, item_encoded = sae_model(item_emb)  
          
            # Compute reconstruction loss for each embedding.
            loss_rec_user = mse_loss(user_rec, user_emb)
            loss_rec_item = mse_loss(item_rec, item_emb)
            loss_reconstruction = loss_rec_user + loss_rec_item

            # Sparsity penalty (L1 norm of the encoded activations)
            sparsity_loss_user = torch.mean(torch.abs(user_encoded))
            sparsity_loss_item = torch.mean(torch.abs(item_encoded))
            sparsity_loss = sparsity_loss_user + sparsity_loss_item

            kl_loss_user = kl_divergence_loss(user_encoded)
            kl_loss_item = kl_divergence_loss(item_encoded)
            kl_loss =  (kl_loss_user + kl_loss_item)

            # ---------------------------
            # Pass reconstructed embeddings through fixed model:
            x_hat = torch.cat([user_rec, item_rec], dim=-1)  
            y_hat = model.fc_layers(x_hat)  # (batch, 1)
            y_hat = y_hat.squeeze(-1)         # (batch,)


            ce_loss = nn.BCEWithLogitsLoss() 
            loss_output = mse_loss(y_hat, y)
            #------------------------------
            recons_NCF_loss = ce_loss(y_hat, real_ratings)
            #------------------------------

            # Total loss: reconstruction loss + prediction aware loss + sparsity penalty.
            loss = mse_weight*loss_reconstruction + output_loss_weight*loss_output + sparsity_weight * sparsity_loss + kl_weight*kl_loss + 0*recons_NCF_loss
            optimizer.zero_grad()
            loss.backward()  # Gradients flow from the fixed network (frozen) through SAE.
            optimizer.step()
            samples_num += len(real_ratings)

            total_loss += loss.item() *  len(real_ratings)


        avg_loss = total_loss / samples_num
        (sae_model.loss).append(avg_loss)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")


In [None]:
test_flag=1
sae_model = SparseAutoencoderNCF(input_dim=100, hidden_dim=70, topk=7, tie_weights=True)
train_sparse_autoencoder(model, sae_model, user_embeddings, item_embeddings, epochs=30, lr=0.001, device='cpu',mse_weight=2.6, sparsity_weight=1.2674255898937214e-05,kl_weight=0.7, output_loss_weight = 10)
sae_model.eval()

In [None]:
model_name = 'your_SAE_model_name'

In [None]:
# torch.save(autoencoder, Path(export_dir,f'models/lastFM/{model_name}.pth'))

## SAE Matryoshka

In [None]:
def train_matryoshka(
    sae: MatryoshkaAutoencoder,
    user_loader: DataLoader,
    item_loader: DataLoader,
    num_epochs: int = 30,
    lr: float = 1e-3,
    mse_weight: float = 8.0,
    l1_weight: float = 0.3,
    kl_weight: float = 0.003,
    inner_weight: float = 5.0,
    sparsity_target: float = 0.1,
    device: str = 'cuda'
):
    """
    trains MatryoshkaSAEs on user and item embeddings.
    """
    # sae.to(device)

    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # for full determinism (might slow down training):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    optimizer = torch.optim.Adam(sae.parameters(), lr=lr)

    for epoch in range(1, num_epochs+1):
        epoch_loss = 0.0

        # Iterate in parallel over user and item batches
        for u,i in zip(user_loader, item_loader):
            # u = batch_u.to(device)  # user MF embeddings
            # i = batch_i.to(device)  # item MF embeddings

            # Forward pass through autoencoder
            _, z_u, recons_u = sae(u)
            _, z_i, recons_i = sae(i)


            # 1) Nested reconstruction loss (sum over all prefix levels)
            recon_u_loss = sum(F.mse_loss(r, u) for r in recons_u)
            recon_i_loss = sum(F.mse_loss(r, i) for r in recons_i)
            recon_term = mse_weight * (recon_u_loss + recon_i_loss)

            # 2) Sparsity penalties on full latent vectors- z
            l1_term = l1_weight * (z_u.abs().mean() + z_i.abs().mean())
            kl_term = kl_weight * (
                kl_divergence_loss(z_u, sparsity_target) +
                kl_divergence_loss(z_i, sparsity_target)
            )

            # 3) Dynamic inner-product loss on full reconstructions
            full_u_hat = recons_u[-1]  # highest-level reconstruction
            full_i_hat = recons_i[-1]
            inner_orig = u @ i.T
            inner_recons = full_u_hat @ full_i_hat.T

            inner_term = inner_weight * F.mse_loss(inner_recons, inner_orig)

            # Total loss
            loss = recon_term + l1_term + kl_term + inner_term

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() * u.size(0)

        avg_loss = epoch_loss / len(user_loader.dataset)
        print(f"Epoch {epoch}/{num_epochs} â€” avg_loss: {avg_loss:.4f}")



-

## MF recommender

In [None]:
def train_gpu(self, batch_size=8, save_batches=False, batches_save_path=None):
    self.batch_history = []
    # self.device = torch.device(device if torch.cuda.is_available() else 'cpu')

    self.P = torch.tensor(self.P, dtype=torch.float32)
    self.Q = torch.tensor(self.Q, dtype=torch.float32)
    self.b_u = torch.tensor(self.b_u, dtype=torch.float32)
    self.b_i = torch.tensor(self.b_i, dtype=torch.float32)

    for epoch in range(self.iterations):
        start_time = time.time()

        # Sampling
        neg_idx_ex_use__ = {
            row: torch.tensor(
                np.random.choice(self.neg_ex_use[row],
                                 size=len(self.pos_idx_ex_use[row]),
                                 replace=False,
                                 p=norm_prob_neg_use_exp_[row]),
                dtype=torch.long
            ) for row in range(self.num_users)
        }


        # Building unified tensor of (user, item, label)
        user_ids, item_ids, labels = [], [], []
        for user in range(self.num_users):
            user_tensor = torch.full((len(pos_idx_ex_use_[user]) + len(neg_idx_ex_use__[user]),),
                                     user, dtype=torch.long)

            item_tensor = torch.cat([pos_idx_ex_use_[user], neg_idx_ex_use__[user]])
            label_tensor = torch.cat([
                torch.ones(len(pos_idx_ex_use_[user])),
                torch.zeros(len(neg_idx_ex_use__[user]))
            ])
            user_ids.append(user_tensor)
            item_ids.append(item_tensor)
            labels.append(label_tensor)

        user_ids = torch.cat(user_ids)
        item_ids = torch.cat(item_ids)
        labels = torch.cat(labels)

        # Shuffle all samples
        indices = torch.randperm(len(user_ids))
        user_ids = user_ids[indices]
        item_ids = item_ids[indices]
        labels = labels[indices]

        if save_batches:
            epoch_batches = []

        # Training in mini-batches
        for start in range(0, len(user_ids), batch_size):
            end = start + batch_size
            u_batch = user_ids[start:end]
            i_batch = item_ids[start:end]
            r_batch = labels[start:end]

            u_batch = u_batch.long()
            i_batch = i_batch.long()

            self.sgd_batch_tensor(u_batch, i_batch, r_batch)

            if save_batches:
                epoch_batches.append((u_batch.cpu(), i_batch.cpu(), r_batch.cpu()))

        if save_batches:
            self.batch_history.append(epoch_batches)

        rmse_val = self.rmse_tensor(user_ids, item_ids, labels)
        self.rmse_train[epoch] = rmse_val
        print(f"Epoch {epoch+1}; RMSE: {rmse_val:.4f}; epoch time: {time.time()-start_time:.2f}s")

    if save_batches and batches_save_path:
        with open(Path(export_dir, f'dataset/lastFM/{batches_save_path}'), 'wb') as f:
            pickle.dump(self.batch_history, f)


-

train new model:

In [None]:
mf_recommender = MatrixFactorization(user_artist_matrix_tensor, user_artist_matrix, pos_idx_ex_use_,neg_idx_ex_use_,neg_ex_hidden_, neg_ex_, pos_ex_num_, K=100, alpha=0.05, beta=0.01, iterations=30)

start_time = time.time()
train_gpu(mf_recommender, batch_size=8)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
model_name = 'your_recommender_name'

save the model

In [None]:
# with open(Path(export_dir,'models/lastFM', f'{model_name}.pkl'), 'wb') as file:
#     pickle.dump(mf_recommender, file)


# df_Q= pd.DataFrame(mf_recommender.Q)
# df_Q.index = user_artist_matrix.columns

# df_Q.to_csv(Path(export_dir,f'res_csv/lastFM/items_embeddings_mf_model_{model_name}_Windex.csv'))
# df_Q.to_csv(Path(export_dir,'res_csv/lastFM/items_embeddings_mf_model_{model_name}.csv'), index=False)


# df_P=  pd.DataFrame(mf_recommender.P)
# df_P.index = user_artist_matrix.index
# df_P.to_csv(Path(export_dir,f'res_csv/lastFM/users_embeddings_mf_model_{model_name}.csv'), index=False)
# df_P.to_csv(Path(export_dir,f'res_csv/lastFM/users_embeddings_mf_model_{model_name}_Windex.csv'))

# df_b_i= pd.DataFrame(mf_recommender.b_i) 
# df_b_u= pd.DataFrame(mf_recommender.b_u) 

# df_b_i.index = user_artist_matrix.columns
# df_b_u.index = user_artist_matrix.index

# df_b_i.to_csv(Path(export_dir,f'res_csv/lastFM/b_i_mf_model_{model_name}.csv'), index=False)
# df_b_i.to_csv(Path(export_dir,f'res_csv/lastFM/b_i_mf_model_{model_name}_Windex.csv'))

# df_b_u.to_csv(Path(export_dir,f'res_csv/lastFM/b_u_mf_model_{model_name}.csv'), index=False)
# df_b_u.to_csv(Path(export_dir,f'res_csv/lastFM/b_u_mf_model_{model_name}_Windex.csv'))


In [None]:
# with open(Path(export_dir,f'models/lastFM/MF_model_{model_name}'), 'wb') as file:
#     pickle.dump(mf_recommender, file)


df_b_i= pd.DataFrame(mf_recommender.b_i)
df_b_u= pd.DataFrame(mf_recommender.b_u)
df_b_i.index = user_artist_matrix.columns
df_b_u.index = user_artist_matrix.index


df_b_i.to_csv(Path(export_dir,f'res_csv/lastFM/b_i_{model_name}.csv'), index=False)
# df_b_i.to_csv(Path(export_dir,f'res_csv/b_i_{model_name}_Windex.csv'))

df_b_u.to_csv(Path(export_dir,f'res_csv/lastFM/b_u_{model_name}.csv'), index=False)
# df_b_u.to_csv(Path(export_dir,f'res_csv/b_u_{model_name}_Windex.csv'))

pos_idx_ex_hidden_df= pd.DataFrame.from_dict(mf_recommender.pos_idx_ex_hidden, orient='index')
pos_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/lastFM/test_items_{model_name}.csv'), index = False)

neg_idx_ex_hidden_df= pd.DataFrame.from_dict(mf_recommender.neg_ex_hidden, orient='index')
neg_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/lastFM/neg_test_items_{model_name}.csv'), index = False)


df_P= pd.DataFrame(mf_recommender.P)
df_P.to_csv(Path(export_dir,f'res_csv/lastFM/lastFM/users_embeddings_{model_name}.csv'), index = False)


df_Q= pd.DataFrame(mf_recommender.Q)
df_Q.to_csv(Path(export_dir,f'res_csv/lastFM/items_embeddings_{model_name}.csv'), index = False)


-

-

## NCF recommender

In [None]:
# Training loop for the model.
for param in model.parameters():
    param.requires_grad = False

def train_model(model, ratings, epochs=30, lr=0.001, device='cpu'):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss() 

    model.train()
    for epoch in range(epochs):
        counter = 0
        epoch_loss = 0.0
        start_time1 = time.time()


        #  Sampling
        neg_idx_ex_use__ = {
            row: torch.tensor(
                np.random.choice(user_artist_matrix.columns.get_indexer(model.neg_ex_use[row]),
                                 size=len(model.pos_idx_ex_use[row]),
                                 replace=False,
                                 p=norm_prob_neg_use_exp_[row]),
                dtype=torch.long
            ) for row in range(model.num_users)
        }

        #  Build unified tensor of (user, item, label)
        user_ids, item_ids, labels = [], [], []
        for user in range(model.num_users):
            user_tensor = torch.full((len(pos_idx_ex_use_[user]) + len(neg_idx_ex_use__[user]),),
                                     user, dtype=torch.long)
            item_tensor = torch.cat([torch.Tensor(list(user_artist_matrix.columns.get_indexer(pos_idx_ex_use_[user]))), neg_idx_ex_use__[user]])

            label_tensor = torch.cat([
                torch.ones(len(pos_idx_ex_use_[user]), dtype=torch.float32),
                torch.zeros(len(neg_idx_ex_use__[user]), dtype=torch.float32)
            ])
            user_ids.append(user_tensor)
            item_ids.append(item_tensor)
            labels.append(label_tensor)

        user_ids = torch.cat(user_ids)
        item_ids = torch.cat(item_ids)
        labels = torch.cat(labels)

        # Shuffle all samples
        indices = torch.randperm(len(user_ids))
        user_ids = user_ids[indices]
        item_ids = item_ids[indices]
        labels = labels[indices]

        # if save_batches:
        #     epoch_batches = []

        # print('print batch')
        #Training in mini-batches
        for start in range(0, len(user_ids), batch_size):
            end = start + batch_size
            user = user_ids[start:end] # u_batch
            item = item_ids[start:end] # i_batch
            rating = labels[start:end] # r_batch

            user = user.long()
            item = item.long()


            optimizer.zero_grad()
            prediction = model(user, item)
            prediction = prediction.view_as(rating)
            # print(prediction)
            loss = criterion(prediction, rating)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * user.size(0)
            counter +=1
        


        avg_loss = epoch_loss / counter # num of batches
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, time: {time.time() - start_time1}")




trainings:

In [None]:
# Initialize the model.
model = NeuralCollaborativeFiltering(num_users=USERS_lastFM, num_items=ITEMS_lastFM,
                                      embedding_dim=100, hidden_layers=[64, 32, 16])
# Train the model.
for param in model.parameters():
    param.requires_grad = False

train_model(model, user_artist_matrix_tensor)

model.eval()

In [None]:
model_name = 'your NCF recommender name'

save the model

In [None]:
# torch.save(model, Path(export_dir,f'models/lastFM/{model_name}.pth'))

In [None]:
# user_embeddings = model.user_embedding.weight.detach().clone()  # Tensor of shape (num_users, embedding_dim)
# item_embeddings = model.item_embedding.weight.detach().clone()  # Tensor of shape (num_items, embedding_dim)


# pos_idx_ex_hidden_df= pd.DataFrame.from_dict(model.pos_idx_ex_hidden, orient='index')
# pos_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/lastFM/test_items_{model_name}.csv'), index = False)

# neg_idx_ex_hidden_df= pd.DataFrame.from_dict(model.neg_ex_hidden, orient='index')
# neg_idx_ex_hidden_df.to_csv(Path(export_dir,f'res_csv/lastFM/neg_test_items_{model_name}.csv'), index = False)


# user_embeddings.to_csv(Path(export_dir,f'res_csv/lastFM/NCF_{model_name}_user_embeddings.csv'), index = False)
# item_embeddings.to_csv(Path(export_dir,f'res_csv/lastFM/NCF_{model_name}_item_embeddings.csv'), index = False)
