## Sparse User-Item Matrix

### Load Datasets

In [1]:
import pandas as pd

reviews = pd.read_csv('../datasets/slimmed/reviews.csv')
items = pd.read_csv('../datasets/slimmed/items.csv')

### Creating Sparse Matrix

A sparse matrix where the user_ids are rows, item_ids are columns, and ratings are the elements is created

In [2]:
from scipy.sparse import coo_matrix

# Map user_id and item_id to index-based values
user_map = {u: i for i, u in enumerate(reviews['user_id'].unique())}
item_map = {i: j for j, i in enumerate(reviews['parent_asin'].unique())}

user_idx = reviews['user_id'].map(user_map)
item_idx = reviews['parent_asin'].map(item_map)
ratings = reviews['rating'].astype('float')

# Create a sparse matrix
sparse_matrix = coo_matrix((ratings, (user_idx, item_idx)))

# Convert to CSR format for efficiency in operations
sparse_matrix_csr = sparse_matrix.tocsr()

A reverse mapping will have to be created to go from indices to ids

In [3]:
reverse_user_map = {v:k for k, v in user_map.items()}
reverse_item_map = {v:k for k, v in item_map.items()}

A binary version of the sparse matrix where non-zero ratings are set to 1

In [5]:
# Ratings are 0s or 1s
ratings_binary = ratings.astype(bool).astype(int)

# Create a binary sparse matrix
sparse_matrix_binary = coo_matrix((ratings_binary, (user_idx, item_idx)))

# Convert to CSR format for efficiency in operations
sparse_matrix_csr_binary = sparse_matrix_binary.tocsr()