Added PCRL model, MAE, RMSE. Modified PMF model. Restructured models …

…folder and modified the rest of the library accordingly
PreferredAI · Oct 12, 2018 · 88d2e17 · 88d2e17
1 parent a05ef2d
commit 88d2e17
Show file tree

Hide file tree

Showing 64 changed files with 11,841 additions and 4,984 deletions.
diff --git a/README.md b/README.md
@@ -56,17 +56,19 @@ mat_office = office['mat']
 
 #Instantiate a pfm recommender model.
 #Please refer to the documentation for details on parameter settings.
-rec_pmf = Pmf(k=10, max_iter=10, learning_rate=0.001, lamda=0.001, init_params={'U':None,'V':None})
+rec_pmf = Pmf(k=10, max_iter=100, learning_rate=0.001, lamda=0.001, init_params={'U':None,'V':None})
 
 #Instantiate an evaluation strategy.
 es_split = Split(data = mat_office, prop_test=0.2, prop_validation=0.0, good_rating=4)
 
 #Instantiate evaluation metrics.
 rec = metrics.Recall(m=20)
 pre = metrics.Precision(m=20)
+mae = metrics.Mae()
+rmse = metrics.Rmse()
 
 #Instantiate and then run an experiment.
-res_pmf = Experiment(es_split, [rec_pmf], metrics=[pre,rec])
+res_pmf = Experiment(es_split, [rec_pmf], metrics=[mae,rmse,pre,rec])
 res_pmf.run_()
 
 #Get average results.

diff --git a/cornac/evaluation_strategies/split.py b/cornac/evaluation_strategies/split.py
@@ -67,8 +67,9 @@ def __init__(self, data,prop_test=0.2,prop_validation=0.0,good_rating = 1., data
         self.index_train = index_train
         self.index_validation = index_validation
         self.index_test = index_test
-        #this is an internal attribute, useful to check whether the data are already split or not
-        self.split_ran  = False
+        #Additional attributes, 
+        self.split_ran  = False          #check whether the data is already split or not           
+        self.rank_met = False            #Check wether there is no ranking metric to save some computation
 
 
     def train_test_split_(self):
@@ -140,10 +141,17 @@ def run_(self):
 
     #This function is callable from the experiement class so as to run an experiment 
     def run_exp(self, model, metrics):
+        #check wether we have at least one ranking metric
+        for mt in metrics:
+            if mt.type == 'ranking':
+                self.rank_met = True
+                break
+
 
         if not self.split_ran:
             self.run_()
-
+
+
         model.fit(self.data_train)
         print("Starting evaluation")
         res = sp.csc_matrix((self.data_test.shape[0],len(metrics)+1)) #this matrix will contain the evaluation results for each user
@@ -157,16 +165,16 @@ def run_exp(self, model, metrics):
             else:
                 pred_u = model.predict(index_user=u)
                 pred_u[which_(self.data_train[u,:].todense().A1,">",0)] = 0.   #remove known ratings #.A1 allows to flatten a dense matrix
-                rec_list_u = (-pred_u).argsort()  #ordering the items (in decreasing order) according to the predictions
+                if self.rank_met:
+                    rec_list_u = (-pred_u).argsort()  #ordering the items (in decreasing order) according to the predictions
 
                 #computing the diffirent metrics
                 idx = 0
                 for mt in metrics:
                     if mt.type == 'ranking':
                         res[u,idx] = mt.compute(data_test = self.data_test_bin[u,:].todense().A1, reclist=rec_list_u)
                     else:
-                        #res[u,idx] = mt.compute(data_test = self.data_test_bin[u,:].todense().A1, prediction=pred_u)
-                        print("Only ranking type metrics are implemented so far!")
+                        res[u,idx] = mt.compute(data_test = self.data_test[u,:].todense().A1, prediction=pred_u)
                     idx = idx + 1
                 res[u,len(metrics)] = 1 # This column indicates whether a user have been preprocessed
                 nb_processed_users +=1

diff --git a/cornac/metrics/__init__.py b/cornac/metrics/__init__.py
@@ -1,16 +1,20 @@
 
-from .metric import Ndcg
-from .metric import Ncrr
-from .metric import Mrr
-from .metric import Precision
-from .metric import Recall
-from .metric import Fmeasure
+from .recom_metrics import Ndcg
+from .recom_metrics import Ncrr
+from .recom_metrics import Mrr
+from .recom_metrics import Precision
+from .recom_metrics import Recall
+from .recom_metrics import Fmeasure
 
+from .pred_metrics import Mae
+from .pred_metrics import Rmse
 
 
 __all__ = ['Ndcg',
 		   'Ncrr',
 		   'Mrr',
 		   'Precision',
 		   'Recall',
-		   'Fmeasure']
+		   'Fmeasure',
+		   'Mae',
+		   'Rmse']
diff --git a/cornac/metrics/pred_metrics.py b/cornac/metrics/pred_metrics.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+"""
+@author: Aghiles Salah
+"""
+import numpy as np
+from ..utils.util_functions import which_
+
+
+
+class Mae:
+    """Mean Absolute Error.
+
+    Parameters
+    ----------
+    name: string, value: 'MAE'
+        Name of the measure.
+
+    type: string, value: 'prediction'
+        Type of the metric, e.g., "ranking", "prediction".
+    """
+
+    def __init__(self):
+        self.name = 'MAE'
+        self.type = 'prediction'
+
+    #Compute MAE for a single user
+    def compute(self,data_test,prediction):
+        index_rated = which_(data_test,'>',0.)
+        mae_u = np.sum(abs(data_test[index_rated] - prediction[index_rated]))/len(index_rated)
+
+        return mae_u
+
+
+class Rmse:
+    """Root Mean Squared Error.
+
+    Parameters
+    ----------
+    name: string, value: 'RMSE'
+        Name of the measure.
+
+    type: string, value: 'prediction'
+        Type of the metric, e.g., "ranking", "prediction".
+    """
+
+    def __init__(self):
+        self.name = 'RMSE'
+        self.type = 'prediction'
+
+    #Compute MAE for a single user
+    def compute(self,data_test,prediction):
+        index_rated = which_(data_test,'>',0.)
+        mse_u = np.sum((data_test[index_rated] - prediction[index_rated])**2)/len(index_rated)
+
+        return np.sqrt(mse_u)
diff --git a/cornac/metrics/metric.py → cornac/metrics/recom_metrics.py b/cornac/metrics/metric.py → cornac/metrics/recom_metrics.py
@@ -131,7 +131,7 @@ def __init__(self, m = 20, name=None):
         self.tp_fp = None
 
 
-    #Evaluate TopMlist for a single user: Precision@M, Recall@M, F-meansure@M (F2)
+    #Evaluate TopMlist for a single user: Precision@M, Recall@M, F-meansure@M (F1)
     def measures_at_m(self,data_test,reclist):
 
         data_test_bin = np.full(len(data_test), 0)
@@ -200,22 +200,22 @@ def compute(self,data_test,reclist):
 
 
 class Fmeasure(MeasureAtM):
-    """F-meansure@M.
+    """F-measure@M.
 
     Parameters
     ----------
     m: int, optional, default: 20
         The number of items in the top@m list.
         
-    name: string, value: 'F2@m'
+    name: string, value: 'F1@m'
         Name of the measure.
 
     type: string, value: 'ranking'
         Type of the metric, e.g., "ranking".
     """ 
 
     def __init__(self, m = 20):
-        MeasureAtM.__init__(self,m = m, name="F2@"+str(m))
+        MeasureAtM.__init__(self,m = m, name="F1@"+str(m))
 
 
     #Compute Precision@M for a single user i
@@ -225,9 +225,9 @@ def compute(self,data_test,reclist):
         prec = self.tp/self.tp_fp
         rec = self.tp/self.tp_fn
         if (prec+rec):
-            f2 = 2*(prec*rec)/(prec+rec)
+            f1 = 2*(prec*rec)/(prec+rec)
         else:
-            f2 = 0
-        return f2
+            f1 = 0
+        return f1
 
 
diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py
@@ -1,2 +1,5 @@
-from .cf import *
-from .context_cf import *
+from .bpr import *
+from .c2pf import *
+from .hpf import *
+from .pcrl import *
+from .pmf import *
diff --git a/cornac/models/bpr/__init__.py b/cornac/models/bpr/__init__.py
@@ -0,0 +1,5 @@
+
+from .recom_bpr import Bpr
+
+
+__all__ = ['Bpr']
diff --git a/cornac/models/cf/bpr.py → cornac/models/bpr/bpr.py b/cornac/models/cf/bpr.py → cornac/models/bpr/bpr.py
diff --git a/cornac/models/cf/recom_bpr.py → cornac/models/bpr/recom_bpr.py b/cornac/models/cf/recom_bpr.py → cornac/models/bpr/recom_bpr.py
@@ -66,23 +66,47 @@ def __init__(self, k=5, max_iter=100, learning_rate = 0.001, lamda = 0.01,batch_
 
     # fit the recommender model to the traning data
     def fit(self, X):
-        #change the data to original user Id item Id and rating format
-        X = X.tocoo()
-        data = np.ndarray(shape=(len(X.data), 3), dtype=float)
-        data[:, 0] = X.row
-        data[:, 1] = X.col
-        data[:, 2] = X.data
-
-        print('Learning...')
-        res = bpr(X, data, k=self.k, n_epochs=self.max_iter,lamda = self.lamda, learning_rate= self.learning_rate, batch_size = self.batch_size, init_params=self.init_params)
-        self.U = res['U']
-        self.V = res['V']
-        print('Learning completed')
+        """Fit the model to observations.
+
+        Parameters
+        ----------
+        X: scipy sparse matrix, required
+            the user-item preference matrix (traning data), in a scipy sparse format\
+            (e.g., csc_matrix).
+        """
+        if self.trainable:
+            #change the data to original user Id item Id and rating format
+            X = X.tocoo()
+            data = np.ndarray(shape=(len(X.data), 3), dtype=float)
+            data[:, 0] = X.row
+            data[:, 1] = X.col
+            data[:, 2] = X.data
+
+            print('Learning...')
+            res = bpr(X, data, k=self.k, n_epochs=self.max_iter,lamda = self.lamda, learning_rate= self.learning_rate, batch_size = self.batch_size, init_params=self.init_params)
+            self.U = res['U']
+            self.V = res['V']
+            print('Learning completed')
+        else:
+            print('%s is trained already (trainable = False)' % (self.name))
 
     #get prefiction for a single user (predictions for one user at a time for efficiency purposes)
     #predictions are not stored for the same efficiency reasons"""
 
     def predict(self, index_user):
+        """Predic the scores (ratings) of a user for all items.
+
+        Parameters
+        ----------
+        index_user: int, required
+            The index of the user for whom to perform predictions.
+
+        Returns
+        -------
+        Numpy 1d array 
+            Array containing the predicted values for all items
+        """
+
         user_pred = self.U[index_user, :].dot(self.V.T)
         # transform user_pred to a flatten array, but keep thinking about another possible format
         user_pred = np.array(user_pred, dtype='float64').flatten()

diff --git a/cornac/models/context_cf/__init__.py → cornac/models/c2pf/__init__.py b/cornac/models/context_cf/__init__.py → cornac/models/c2pf/__init__.py
diff --git a/cornac/models/context_cf/cpp/cpp_c2pf.cpp → cornac/models/c2pf/cpp/cpp_c2pf.cpp b/cornac/models/context_cf/cpp/cpp_c2pf.cpp → cornac/models/c2pf/cpp/cpp_c2pf.cpp
diff --git a/cornac/models/context_cf/cpp/cpp_c2pf.h → cornac/models/c2pf/cpp/cpp_c2pf.h b/cornac/models/context_cf/cpp/cpp_c2pf.h → cornac/models/c2pf/cpp/cpp_c2pf.h