In [7]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.model_selection import train_test_split

data = pd.read_csv(r'C:\Users\Stefano\Desktop\Tesi\recomm_retail_b2b.csv')

train_data, test_data = train_test_split(data, test_size=0.2)

user_item_matrix = csr_matrix((train_data['NUMBER_ORDERS'], (train_data['ID_CLIENT'], train_data['ID_PRODUCT'])))


In [8]:
class FISM:
    def __init__(self, num_factors=64, learning_rate=0.01, regularization=0.1, alpha=0.5, iterations=50, rho=10):

        self.num_factors = num_factors
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.alpha = alpha
        self.iterations = iterations
        self.rho = rho

    def fit(self, user_item_matrix):

        num_users, num_items = user_item_matrix.shape
        self.item_factors_P = np.random.normal(scale=1./self.num_factors, size=(num_items, self.num_factors))
        self.item_factors_Q = np.random.normal(scale=1./self.num_factors, size=(num_items, self.num_factors))
        self.item_biases = np.zeros(num_items)
        
        for iteration in range(self.iterations):
            for u in range(num_users):
                items_u = user_item_matrix[u].indices
                items_u_neg = np.setdiff1d(np.arange(num_items), items_u)
                np.random.shuffle(items_u_neg)
                items_u_neg = items_u_neg[:self.rho]
                
                for i in items_u:
                    p_u = (len(items_u) ** -self.alpha) * np.sum(self.item_factors_P[items_u], axis=0)
                    q_i = self.item_factors_Q[i]
                    r_ui = self.item_biases[i] + np.dot(p_u, q_i)
                    
                    for j in items_u_neg:
                        q_j = self.item_factors_Q[j]
                        r_uj = self.item_biases[j] + np.dot(p_u, q_j)
                        error = 1 - (r_ui - r_uj)
                        
                        self.item_biases[i] += self.learning_rate * (error - self.regularization * self.item_biases[i])
                        self.item_biases[j] -= self.learning_rate * (error - self.regularization * self.item_biases[j])
                        self.item_factors_Q[i] += self.learning_rate * (error * p_u - self.regularization * self.item_factors_Q[i])
                        self.item_factors_Q[j] -= self.learning_rate * (error * p_u - self.regularization * self.item_factors_Q[j])
                        
                        for k in items_u:
                            self.item_factors_P[k] += self.learning_rate * (error * (self.item_factors_Q[i] - self.item_factors_Q[j]) / len(items_u) ** self.alpha - self.regularization * self.item_factors_P[k])
            
            print(f"Iteration {iteration + 1}/{self.iterations} completed")

    def predict_single(self, u, i, items_u):
        items_u_exclude_i = [item for item in items_u if item != i]
        if len(items_u_exclude_i) == 0:
            return 0
        p_u = np.sum(self.item_factors_P[items_u_exclude_i], axis=0)
        q_i = self.item_factors_Q[i]
        n_u_alpha = len(items_u_exclude_i) ** self.alpha
        score = self.item_biases[i] + np.dot(p_u, q_i) / n_u_alpha
        return score

    def recommend(self, user_id, user_item_matrix, N=10):

        items_u = user_item_matrix[user_id].indices
        scores = np.array([self.predict_single(user_id, i, items_u) for i in range(user_item_matrix.shape[1])])
        top_items = np.argsort(scores)[::-1][:N]
        return top_items


In [9]:
def evaluate_model(model, user_item_matrix, test_data):
    total_rank = 0
    total_orders = 0
    
    for user_id in test_data['ID_CLIENT'].unique():
        user_test_data = test_data[test_data['ID_CLIENT'] == user_id]
        recommended_items = model.recommend(user_id, user_item_matrix, N=user_item_matrix.shape[1])
        item_ranks = {item: rank for rank, item in enumerate(recommended_items)}
        
        for _, row in user_test_data.iterrows():
            product_id = row['ID_PRODUCT']
            orders = row['NUMBER_ORDERS']
            rank_ui = item_ranks.get(product_id, len(recommended_items)) / len(recommended_items)
            total_rank += rank_ui * orders
            total_orders += orders
    
    average_rank = total_rank / total_orders
    return average_rank * 100 

In [16]:
model = FISM(num_factors=100, learning_rate=0.01, regularization=0.1, alpha=0.5, iterations=20, rho=10)
model.fit(user_item_matrix)

Iteration 1/20 completed
Iteration 2/20 completed
Iteration 3/20 completed
Iteration 4/20 completed
Iteration 5/20 completed
Iteration 6/20 completed
Iteration 7/20 completed
Iteration 8/20 completed
Iteration 9/20 completed
Iteration 10/20 completed
Iteration 11/20 completed
Iteration 12/20 completed
Iteration 13/20 completed
Iteration 14/20 completed
Iteration 15/20 completed
Iteration 16/20 completed
Iteration 17/20 completed
Iteration 18/20 completed
Iteration 19/20 completed
Iteration 20/20 completed


In [18]:
average_rank = evaluate_model(model, user_item_matrix, test_data)
print("Average percentile rank: {:.2f}%".format(average_rank))