In [1]:
# Import modules
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix
from aroma import learnBPR

In [2]:
data = pd.read_csv('ml-100k/u.data', header= None , sep = '\t')
data.columns = ['user id' , 'movie id' , 'rating' , 'timestamp']
data = data.drop(['timestamp'], axis=1)
sample_data = data.head(100)

In [3]:
user_item_matrix = data.pivot_table(index='user id', columns='movie id', values='rating')
user_item_matrix.fillna(0, inplace=True)

In [4]:
user_item_matrix_sparse = csr_matrix(user_item_matrix)
U, sigma, Vt = svds(user_item_matrix_sparse, k=20)
# Reconstruct the user-item interaction matrix
predicted_ratings = np.dot(np.dot(U, np.diag(sigma)), Vt) # type: ignore

# Calculate item biases
item_biases = np.mean(user_item_matrix_sparse - predicted_ratings, axis=0).tolist()[0]

In [5]:
items = pd.read_csv('ml-100k/u.item' , header = None , sep = "|" , encoding='latin-1')

items.columns = ['movie id' , 'movie title' , 'release date' , 'video release date' ,
              'IMDb URL' , 'unknown' , 'Action' , 'Adventure' , 'Animation' ,
              'Childrens' , 'Comedy' , 'Crime' , 'Documentary' , 'Drama' , 'Fantasy' ,
              'Film_Noir' , 'Horror' , 'Musical' , 'Mystery' , 'Romance' , 'Sci_Fi' ,
              'Thriller' , 'War' , 'Western']

items = items.drop(['movie title' , 'release date' , 'video release date' ,
              'IMDb URL'], axis=1)

items = items.set_index('movie id')

In [6]:
A = []

for _, row_i in sample_data.iterrows():
    ui, i, ri = row_i['user id'], row_i['movie id'], row_i['rating']
    for _, row_j in sample_data.iterrows():
        uj, j, rj = row_j['user id'], row_j['movie id'], row_j['rating']

        if ui != uj or i == j:
            continue
        if ri > rj:
            A.append((int(ui), int(i), int(j)))

In [7]:
user_latent_vectors = learnBPR(user_item_matrix.values, A, items.values, item_biases)