# Recommender System

In [None]:
# Read data.txt and find set users and items

data_ui = []
with open("./data.txt") as f:
    line = f.readline()
    while line:
        data_ui.append([int(line.split(" ")[0]), [int(i) for i in line.split(" ")[1:len(line.split(" "))-1]]])
        line = f.readline()

In [None]:
import numpy as np

set_item = set()
for u, i in data_ui:
    set_item.update(i)

R = np.zeros((len(data_ui), len(set_item)+1))
for user_id, item_ids in data_ui:
    for item_id in item_ids:
        R[user_id, item_id] = 1

In [None]:
import random

random.seed(42)

R_train = R

user_test_sample = random.sample(range(0, len(data_ui)), int(len(data_ui)*0.4))

for i in user_test_sample:
    user_id, items_ids = data_ui[i]

    item_test_sample = random.sample(item_ids, int(len(item_ids)*0.2))

    for item_id in item_ids:
        R_train[i, item_id] = 0

In [None]:
class MatrixFactorization:
    def __init__(self, num_items, num_users, num_factors, learning_rate, regularization_rate, num_iterations):
        """
        Initialize the Matrix Factorization model.

        Args:
            items (array): All items.
            users (array): All users.
            train_item (array): Train items.
            num_factors (int): Number of latent factors.
            learning_rate (float): Learning rate for gradient descent.
            regularization_rate (float): Regularization rate for L2 regularization.
            num_iterations (int): Number of iterations for training.
        """
        self.num_items = num_items
        self.num_users = num_users
        self.num_factors = num_factors
        self.learning_rate = learning_rate
        self.regularization_rate = regularization_rate
        self.num_iterations = num_iterations

        # Initialize Q and P matrices with random values
        # Start your code

        # Initialize user and item feature matrices
        self.user_features = np.random.normal(scale= 1/self.num_factors, size=(self.num_users, self.num_factors))
        self.item_features = np.random.normal(scale= 1/self.num_factors, size=(self.num_items+1, self.num_factors))

        # End your code

    def sigmoid(self, x):
        """
        Compute the sigmoid function.

        Args:
            x (float): Input value.

        Returns:
            float: Sigmoid value.
        """
        return 1 / (1 + np.exp(-x))

    def update_parameters(self, R):
        """
        Update the parameters Q and P using Stochastic Gradient Descent.

        Args:
            R (ndarray): Rating matrix.
        """
        # Start your code
        
        temp = [
            (u, i)
            for u in range(self.num_users)
            for i in range(self.num_items)
            if R[u, i] > 0
        ]
        # Stochastic Gradient Descent (SGD) training loop
        for epoch in range(self.num_iterations):
            print(f"Epoch {epoch+1} started")
            for u, i in temp:
                predict = self.predict_rating(i, u)
                self.user_features[u, :] += self.learning_rate * ((1- self.sigmoid(R[u, i] - predict)) + 2 * (self.regularization_rate * self.user_features[u, :]))
                self.item_features[i, :] += self.learning_rate * ((1- self.sigmoid(R[u, i] - predict)) + 2 * (self.regularization_rate * self.item_features[i, :]))
        
        print("Train completed")
        # End your code

    def train(self, R):
        """
        Train the Matrix Factorization model.

        Args:
            R (ndarray): Rating matrix.
        """
        self.update_parameters(R)

    def predict_rating(self, i, u):
        """
        Predict the rating for item i and user u.

        Args:
            i (int): Item index.
            u (int): User index.

        Returns:
            float: Predicted rating.
        """
        # Start your code

        return np.dot(self.item_features[i, :], self.user_features[u, :].T)

        # End your code

    def evaluate(self, users_list, groundTruth_list, topk=10):
        """
        Evaluate trained model for item i and user u

        Args:
            users_list (list): Users indexes list.
            groundTruth_list (list) : list of items in users test set
            topk (int): threshold for top item selection

        Returns:
            float: sum(Intersection between topk predicted items and user profile in test set / user profile size in test set) / len(users_list)
        """
        # Start your code
        acc = 0
        for user_index in users_list:
            items = groundTruth_list[user_index]
            predict = np.dot(self.user_features[user_index, :], self.item_features.T)
            index_sorted_predict = np.argsort(predict)[::-1]
            topk_items = index_sorted_predict[:topk]
            same = set(topk_items).intersection(items)
            acc += (len(same) / len(items))        
        return acc / len(users_list)
    
        # End your code

In [None]:
num_items = len(set_item)
num_users = len(data_ui)
num_factors = 500
learning_rate = 0.1
regularization_rate = 0.1
num_iterations = 50

R = None  # rating matrix

In [None]:
learning_rate_list = [0.0001, 0.001, 0.01, 0.1, 0.2, 0.02, 0.015, 0.000001, 1.000001, 0.0000001]

for learning_rate in learning_rate_list:
    model = MatrixFactorization(num_items, num_users, num_factors, learning_rate, regularization_rate, num_iterations)

    model.train(R_train)

    print(f"Learning_rate = {learning_rate}")
    # Test prediction for item 0 and user 0
    item_index = 0
    user_index = 0
    prediction = model.predict_rating(item_index, user_index)
    print(f"Predicted rating for item {item_index} and user {user_index}: {prediction}")

    # Evaluate model for users in test set
    user_indexes = user_test_sample
    groudTruths = {}

    for i in user_test_sample :
        (user_id, item_ids) = data_ui[i]
        groudTruths[user_id] = item_ids

    result = model.evaluate(user_indexes, groudTruths)
    print(f"Accuracy for model: {result}")
    print("********************************************")