In [24]:
import pandas as pd
import numpy as np
import os
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve

In [31]:
class Matrix_Factorizer:
    def __init__(self, size, seed):
        if size != "":
            self.read_matrices(size)
        
        self.seed = seed
    def read_matrices(self, size = "medium"):
        # Read_Data: load ratings matrices from csv & numpy data. If data is not available run Pre-Processing first.
        self.matrix_asFrame = pd.read_csv(f"df_ratings_matrix_{size}.csv")
        self.matrix_asFrame = self.matrix_asFrame.set_index("Unnamed: 0")
        self.matrix_asFrame.index.name = "UserID"
        
        try:
            self.ratings_matrix = sparse.load_npz("ratings_matrix.npz")
        except:
            self.ratings_matrix = sparse.csr_matrix(self.matrix_asFrame.values)
            sparse.save_npz("ratings_matrix", self.sparse_matrix, compressed=True)
        
        self.num_users, self.num_items = self.ratings_matrix.shape
        
    def read_train_test(self):
        self.training_data = sparse.load("training_data.npz")
        self.test_data = sparse.load("test_data.npz")
        
    def create_training_data(self, percent_test_set = 0.15):
        training_set = self.ratings_matrix.copy()
        positive_indices = training_set.nonzero()
        positive_values = list(zip(positive_indices[0], positive_indices[1]))
        test_set = self.ratings_matrix.copy()
        test_set[test_set != 0] = 1
        random.seed(self.seed)
        n_samples = int(percent_test_set*len(positive_values))
        samples = random.sample(positive_values, n_samples)
        user_indices = [index[0] for index in samples]
        item_indices = [index[1] for index in samples]
        training_set[user_indices, item_indices] = 0
        training_set.eliminate_zeros()
        
        self.training_set = training_set
        self.test_set = test_setlist(set(user_indices))
        self.indices = list(set(user_indices))
        
    def alternating_least_squares(self, k, alpha, v_lambda, n):
        # K - Latent Factors, Alpha - Learning rate, V_lambda - Regulation, N - Iterations
        training_set = self.training_set
        test_set = self.test_set
        
        confidence = (alpha*training_set)
        n_user = confidence.shape[0]
        n_items = confidence.shape[1]
        
        X = sparse.csr_matrix(np.random.RandomState(self.seed).normal(size = (n_user, k)))
        Y = sparse.csr_matrix(np.random.RandomState(self.seed).normal(size = (n_items, k)))
        X_eye = sparse.eye(num_users)
        Y_eye = sparse.eye(num_items)
        # The regulization term
        lambda_eye = v_lambda * sparse.eye(k)
        
        for iteration in range(n):
            Y_tY = Y.transpose.dot(Y)
            X_tX = X.transpose.dot(X)
            
            for user in range(n_user):
                confidence_sample = conf[user,:].toarray()
                preference = confidence_sample.copy()
                preference[preference != 0] = 1

In [32]:
Factorizer = Matrix_Factorizer("small", 941)
#Factorizer.set_factorizer(k = 30, alpha = 40, inlambda = 0.08, n = 50)

In [34]:
Factorizer.ratings_matrix.data

array([16, 12,  1, ...,  1,  1,  1], dtype=int64)

6328