In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from scipy import sparse
from precision import calculate_precision
import torch



In [2]:

def getInitialMatrix(csv_file):
    '''
    Gets data from a single CSV file and creates user-item matrices.
    :param csv_file: Path to the CSV file containing all data.
    :return: A, R user-item matrices
    '''
    # Load the data from the single CSV file
    rating_matrix = None
    adj_matrix = None
    with open(csv_file, 'r') as file:
        data = pd.read_csv(file)
        data.columns = ['user_id', 'movie_id', 'rating', 'timestamp']
        # Create the A matrix: Rows as users, columns as movies, values as ratings
        A = sparse.csr_matrix((data['rating'].values, (data['user_id'].values, data['movie_id'].values)))
        A = A.toarray()
        # Create the R matrix: Binary matrix indicating presence of a rating
        R = A > 0.5
    rating_matrix = A
    adj_matrix = R
    return rating_matrix, adj_matrix

def get_test_matrix(csv_file, sha):
    data = pd.read_csv(csv_file)
    data.columns = ['user_id', 'movie_id', 'rating', 'timestamp']
    
    print("Processing the data to create 'A' matrix...")
    # Create the A matrix: Rows as users, columns as movies, values as ratings
    A = sparse.csr_matrix((data['rating'].values, (data['user_id'].values, data['movie_id'].values)), shape=sha)
    A = A.toarray()
    # Create the R matrix: Binary matrix indicating presence of a rating
    R = A > 0.5  # Threshold to consider a rating
    R = R.astype(np.float64, copy=False)
    rating_matrix = A
    adj_matrix = R
    return rating_matrix, adj_matrix

def compute_xu(Y, Cu, p_u, reg_lambda):
    """
    Calculate xu for a single user in the ALS algorithm.
    
    Args:
        Y (torch.Tensor): Item latent factor matrix of size (m, k).
        Cu (torch.Tensor): Confidence diagonal matrix for user u of size (m, m).
        p_u (torch.Tensor): Interaction vector for user u of size (m,).
        reg_lambda (float): Regularization parameter.
    
    Returns:
        torch.Tensor: Latent factors for user u (size k).
    """
    # Ensure Y is a 2D tensor (m x k)
    m, k = Y.shape
    
    # Compute Y^T C_u Y
    YTCuY = Y.T @ Cu @ Y
    
    # Add regularization term (λI)
    reg_term = reg_lambda * torch.eye(k, device=Y.device)
    
    # Compute (Y^T C_u Y + λI)^-1
    inverse_term = torch.linalg.inv(YTCuY + reg_term)
    
    # Compute Y^T C_u p(u)
    YTCup_u = Y.T @ Cu @ p_u
    
    # Compute xu
    x_u = inverse_term @ YTCup_u
    
    return x_u

def compute_yi(X, Ci, p_i, reg_lambda):
    """
    Calculate yi for a single item in the ALS algorithm.

    Args:
        X (torch.Tensor): User latent factor matrix of size (n, k).
        Ci (torch.Tensor): Confidence diagonal matrix for item i of size (n, n).
        p_i (torch.Tensor): Interaction vector for item i of size (n,).
        reg_lambda (float): Regularization parameter.

    Returns:
        torch.Tensor: Latent factors for item i (size k).
    """
    # Ensure X is a 2D tensor (n x k)
    n, k = X.shape
    
    # Compute X^T C_i X
    XTCiX = X.T @ Ci @ X
    
    # Add regularization term (λI)
    reg_term = reg_lambda * torch.eye(k, device=X.device)
    
    # Compute (X^T C_i X + λI)^-1
    inverse_term = torch.linalg.inv(XTCiX + reg_term)
    
    # Compute X^T C_i p(i)
    XTCip_i = X.T @ Ci @ p_i
    
    # Compute yi
    y_i = inverse_term @ XTCip_i
    
    return y_i

def runALS(A, R, n_factors, n_iterations, lambda_, device, test_adj_matrix, k):
    '''
    Runs Alternating Least Squares algorithm in order to calculate matrix.

    :param A: User-Item Matrix with ratings
    :param R: User-Item Matrix with 1 if there is a rating or 0 if not
    :param n_factors: How many factors each of user and item matrix will consider
    :param n_iterations: How many times to run algorithm
    :param lambda_: Regularization parameter
    :return:
    '''
    print("Initiating")
    n, m = A.shape
    A = torch.tensor(A).to(device).type(torch.float64)
    R = torch.tensor(R).to(device).type(torch.float64)
    Users = 5 * torch.randn(n, n_factors).to(device).type(torch.float64)  # User matrix initialization
    Items = 5 * torch.randn(n_factors, m).to(device).type(torch.float64)  # Item matrix initialization

    def get_error(A, Users, Items, R):
        # Calculates the MSE of nonzero elements
        return torch.sum((R * (A - torch.matmul(Users, Items))) ** 2) / torch.sum(R)

    def precision_train(Users, Items, test_item_adj, topk):
        # Calculates the precision of the model
        return calculate_precision(torch.matmul(Users, Items), test_item_adj, topk)

    MSE_List = []
    precision_list = []
    best_precision = float('-inf')

    print("Starting Iterations")
    for iteration in range(n_iterations):
        # Update user factors
        for i, Ri in enumerate(R):
            Users[i] = compute_xu(Items.T, torch.diag(Ri), A[i], lambda_)
        
        print(f"Error after solving for User Matrix at iteration {iteration + 1}: {get_error(A, Users, Items, R)}")

        # Update item factors
        for j, Rj in enumerate(R.T):
            Items[:, j] = compute_yi(Users, torch.diag(Rj), A[:, j], lambda_)
        
        print(f"Error after solving for Item Matrix at iteration {iteration + 1}: {get_error(A, Users, Items, R)}")

        # Compute and record MSE
        MSE = get_error(A, Users, Items, R)
        MSE_List.append(MSE.to('cpu'))
        precision = precision_train(Users, Items, test_adj_matrix, k).to('cpu').item()
        precision_list.append(precision)
        if best_precision < precision:
            best_precision = precision
            torch.save(Users, "Users.pt")
            torch.save(Items, "Items.pt")
        print(f"{iteration + 1}th iteration is complete. MSE: {MSE}, precision = {precision}")

    MSE_List = MSE_List
    return precision_list, MSE_List

In [3]:
def origin_matrix_shape_finder(file_name):
    file = open("ml-1m.csv", "r")
    lines = file.readlines()
    file.close()
    interactions = []
    for i in lines:
        inter = interactions.append(i.strip("\n").split(","))
    interactions = np.array(interactions).astype(int)
    
    return (np.amax(interactions[:, 0]) + 1, np.amax(interactions[:, 1]) + 1)


In [4]:
summation_file = "./ml-1m.csv"

In [5]:
original_matrix_shape = origin_matrix_shape_finder(summation_file)
A, R = getInitialMatrix("./train.csv")  # Replace this with actual data loading logic
test_rating_matrix, test_adj_matrix = get_test_matrix("./test.csv", R.shape)
test_adj_matrix = torch.tensor(test_adj_matrix).to("cuda:0").type(torch.float64)

Processing the data to create 'A' matrix...


In [None]:
topk = 40
factor_dim = 200
n_iterations = 30
reg_lambda = 500

runALS(A, R, factor_dim, n_iterations, reg_lambda, "cuda:0", test_adj_matrix, topk)  # Replace this with actual ALS logic

Initiating
Starting Iterations
Error after solving for User Matrix at iteration 1: 4.0262843466681435
