<a href="https://colab.research.google.com/github/HMy2912/LTSSUD-RecommenderSys-ColabFiltering/blob/main/Group_7_Seminar_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Parallel Collaborative Filtering Recommender System
**Author**: Đằng Hoàn Mỹ (19127216)  
**Course**: CSC14116 - Applied Parallel Programming  
**Framework**: Python 3.10, NumPy, pandas, scikit-learn, SciPy for V1; Numba for V2; CUDA for V3/V4.  
This notebook implements V1 (sequential Python) for user-user NBCF on MovieLens 100K, targeting MAE < 1.5, Precision@10 ~4%, and a baseline for 100× speedup.

# Library Installation

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import scipy.sparse as sp
import time
print("Libraries installed successfully!")

Libraries installed successfully!


# Dataset Setup

## MoviesLen 100K

In [3]:
data_url = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.data'
item_url = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.item'
ratings = pd.read_csv(data_url, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
movies = pd.read_csv(item_url, sep='|', encoding='latin-1', usecols=[0, 1], names=['item_id', 'title'])
print("MovieLens 100K loaded: ratings shape =", ratings.shape, ", movies shape =", movies.shape)

MovieLens 100K loaded: ratings shape = (100000, 4) , movies shape = (1682, 2)


In [4]:
ratings.to_csv('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/ml-100k_ratings.csv', index=False)
movies.to_csv('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/ml-100k_movies.csv', index=False)

# Create User-Item Matrix

In [5]:
# Constants for MovieLens 100K
n_users = 943
n_items = 1682

# Create user-item matrix (0-based indexing)
R = sp.csr_matrix((ratings['rating'], (ratings['user_id'] - 1, ratings['item_id'] - 1)), shape=(n_users, n_items))

# Verify shape
print("User-item matrix shape:", R.shape)  # Should be (943, 1682)

# Save to Drive for reuse
np.save('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/R_sparse.npy', R)

User-item matrix shape: (943, 1682)


# Normalize User-Item Matrix

In [6]:
def normalize_matrix(R):
    # Compute user means for non-zero ratings
    user_means = np.array(R.mean(axis=1)).flatten()  # Mean of non-zero ratings per user
    R_norm = R.copy()  # Preserve original matrix

    # Subtract mean from non-zero ratings using row indices
    row_indices, col_indices = R_norm.nonzero()
    R_norm.data = R_norm.data - user_means[row_indices]

    return R_norm, user_means

# Normalize
R_norm, user_means = normalize_matrix(R)
print("Normalized matrix non-zero count:", R_norm.nnz)  # Should be ~100K
print("User means shape:", user_means.shape)  # Should be (943,)

# Save normalized matrix and means
np.save('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/R_norm_sparse.npy', R_norm)
np.save('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/user_means.npy', user_means)

Normalized matrix non-zero count: 100000
User means shape: (943,)


# Compute Cosine Similarity

In [7]:
# Compute cosine similarity (V1: Sequential)
start = time.time()
similarity_matrix = cosine_similarity(R_norm)
end = time.time()
print(f"Cosine similarity (V1) time: {end - start} s")
print("Similarity matrix shape:", similarity_matrix.shape)  # Should be (943, 943)

# Save similarity matrix
np.save('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/similarity_matrix_v1.npy', similarity_matrix)

Cosine similarity (V1) time: 0.09528207778930664 s
Similarity matrix shape: (943, 943)


# Numba and CUDA implementations for cosine similarity

In [8]:
from numba import jit, prange

@jit(nopython=True, parallel=True)
def cosine_similarity_numba(R_data, R_indices, R_indptr, n_users, result):
    for i in prange(n_users):
        for j in range(i + 1, n_users):
            dot = 0.0
            norm_i = 0.0
            norm_j = 0.0
            # Get non-zero indices for user i and j
            start_i, end_i = R_indptr[i], R_indptr[i + 1]
            start_j, end_j = R_indptr[j], R_indptr[j + 1]
            idx_i = R_indices[start_i:end_i]
            idx_j = R_indices[start_j:end_j]
            ratings_i = R_data[start_i:end_i]
            ratings_j = R_data[start_j:end_j]
            # Compute intersection of rated items
            k, l = 0, 0
            while k < len(idx_i) and l < len(idx_j):
                if idx_i[k] == idx_j[l]:
                    dot += ratings_i[k] * ratings_j[l]
                    norm_i += ratings_i[k] ** 2
                    norm_j += ratings_j[l] ** 2
                    k += 1
                    l += 1
                elif idx_i[k] < idx_j[l]:
                    norm_i += ratings_i[k] ** 2
                    k += 1
                else:
                    norm_j += ratings_j[l] ** 2
                    l += 1
            # Remaining items
            while k < len(idx_i):
                norm_i += ratings_i[k] ** 2
                k += 1
            while l < len(idx_j):
                norm_j += ratings_j[l] ** 2
                l += 1
            # Compute cosine
            if norm_i * norm_j > 0:
                result[i, j] = dot / (np.sqrt(norm_i) * np.sqrt(norm_j))
                result[j, i] = result[i, j]

In [9]:
# Run Numba version
start = time.time()
similarity_matrix_v2 = np.zeros((n_users, n_users), dtype=np.float32)
cosine_similarity_numba(R_norm.data, R_norm.indices, R_norm.indptr, n_users, similarity_matrix_v2)
end = time.time()
print(f"Cosine similarity (V2 Numba) time: {end - start} s")
np.save('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/similarity_matrix_v2.npy', similarity_matrix_v2)

Cosine similarity (V2 Numba) time: 1.7122488021850586 s


In [10]:
import math

In [13]:
!pip install --upgrade numba



In [14]:
from numba import cuda
@cuda.jit
def cosine_similarity_cuda(R_data, R_indices, R_indptr, n_users, result):
    i, j = cuda.grid(2)
    if i < n_users and j < n_users and i < j:
        dot = 0.0
        norm_i = 0.0
        norm_j = 0.0
        start_i, end_i = R_indptr[i], R_indptr[i + 1]
        start_j, end_j = R_indptr[j], R_indptr[j + 1]
        idx_i = R_indices[start_i:end_i]
        idx_j = R_indices[start_j:end_j]
        ratings_i = R_data[start_i:end_i]
        ratings_j = R_data[start_j:end_j]
        k, l = 0, 0
        while k < len(idx_i) and l < len(idx_j):
            if idx_i[k] == idx_j[l]:
                dot += ratings_i[k] * ratings_j[l]
                norm_i += ratings_i[k] * ratings_i[k]
                norm_j += ratings_j[l] * ratings_j[l]
                k += 1
                l += 1
            elif idx_i[k] < idx_j[l]:
                norm_i += ratings_i[k] * ratings_i[k]
                k += 1
            else:
                norm_j += ratings_j[l] * ratings_j[l]
                l += 1
        while k < len(idx_i):
            norm_i += ratings_i[k] * ratings_i[k]
            k += 1
        while l < len(idx_j):
            norm_j += ratings_j[l] * ratings_j[l]
            l += 1
        if norm_i * norm_j > 0:
            result[i, j] = dot / (math.sqrt(norm_i) * math.sqrt(norm_j))
            result[j, i] = result[i, j]

# Run CUDA version
start = time.time()
R_data = cuda.to_device(R_norm.data)
R_indices = cuda.to_device(R_norm.indices)
R_indptr = cuda.to_device(R_norm.indptr)
similarity_matrix_v3 = cuda.device_array((n_users, n_users), dtype=np.float32)
threadsperblock = (16, 16)  # Smaller for sparse access
blockspergrid_x = (n_users + threadsperblock[0] - 1) // threadsperblock[0]
blockspergrid_y = (n_users + threadsperblock[1] - 1) // threadsperblock[1]
cosine_similarity_cuda[blockspergrid_x, blockspergrid_y, threadsperblock](R_data, R_indices, R_indptr, n_users, similarity_matrix_v3)
similarity_matrix_v3_host = similarity_matrix_v3.copy_to_host()
end = time.time()
print(f"Cosine similarity (V3 CUDA) time: {end - start} s")
np.save('/content/drive/MyDrive/2025/HK3/LTSSUD/Data/similarity_matrix_v3.npy', similarity_matrix_v3_host)

ERROR:numba.cuda.cudadrv.driver:Call to cuLinkAddData results in CUDA_ERROR_UNSUPPORTED_PTX_VERSION


LinkerError: [222] Call to cuLinkAddData results in CUDA_ERROR_UNSUPPORTED_PTX_VERSION
ptxas application ptx input, line 9; fatal   : Unsupported .version 8.5; current version is '8.4'

After installing the updated Numba, you can try running the CUDA code again. If the issue persists, it might indicate a more fundamental incompatibility with the specific Colab runtime environment and the desired CUDA version. In such cases, exploring different Colab runtime types or potentially using a different environment might be necessary.