Skip to content

Commit

Permalink
Added PCRL model, MAE, RMSE. Modified PMF model. Restructured models …
Browse files Browse the repository at this point in the history
…folder and modified the rest of the library accordingly
  • Loading branch information
Aghiles SALAH committed Oct 12, 2018
1 parent a05ef2d commit 88d2e17
Show file tree
Hide file tree
Showing 64 changed files with 11,841 additions and 4,984 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,19 @@ mat_office = office['mat']

#Instantiate a pfm recommender model.
#Please refer to the documentation for details on parameter settings.
rec_pmf = Pmf(k=10, max_iter=10, learning_rate=0.001, lamda=0.001, init_params={'U':None,'V':None})
rec_pmf = Pmf(k=10, max_iter=100, learning_rate=0.001, lamda=0.001, init_params={'U':None,'V':None})

#Instantiate an evaluation strategy.
es_split = Split(data = mat_office, prop_test=0.2, prop_validation=0.0, good_rating=4)

#Instantiate evaluation metrics.
rec = metrics.Recall(m=20)
pre = metrics.Precision(m=20)
mae = metrics.Mae()
rmse = metrics.Rmse()

#Instantiate and then run an experiment.
res_pmf = Experiment(es_split, [rec_pmf], metrics=[pre,rec])
res_pmf = Experiment(es_split, [rec_pmf], metrics=[mae,rmse,pre,rec])
res_pmf.run_()

#Get average results.
Expand Down
20 changes: 14 additions & 6 deletions cornac/evaluation_strategies/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ def __init__(self, data,prop_test=0.2,prop_validation=0.0,good_rating = 1., data
self.index_train = index_train
self.index_validation = index_validation
self.index_test = index_test
#this is an internal attribute, useful to check whether the data are already split or not
self.split_ran = False
#Additional attributes,
self.split_ran = False #check whether the data is already split or not
self.rank_met = False #Check wether there is no ranking metric to save some computation


def train_test_split_(self):
Expand Down Expand Up @@ -140,10 +141,17 @@ def run_(self):

#This function is callable from the experiement class so as to run an experiment
def run_exp(self, model, metrics):
#check wether we have at least one ranking metric
for mt in metrics:
if mt.type == 'ranking':
self.rank_met = True
break


if not self.split_ran:
self.run_()



model.fit(self.data_train)
print("Starting evaluation")
res = sp.csc_matrix((self.data_test.shape[0],len(metrics)+1)) #this matrix will contain the evaluation results for each user
Expand All @@ -157,16 +165,16 @@ def run_exp(self, model, metrics):
else:
pred_u = model.predict(index_user=u)
pred_u[which_(self.data_train[u,:].todense().A1,">",0)] = 0. #remove known ratings #.A1 allows to flatten a dense matrix
rec_list_u = (-pred_u).argsort() #ordering the items (in decreasing order) according to the predictions
if self.rank_met:
rec_list_u = (-pred_u).argsort() #ordering the items (in decreasing order) according to the predictions

#computing the diffirent metrics
idx = 0
for mt in metrics:
if mt.type == 'ranking':
res[u,idx] = mt.compute(data_test = self.data_test_bin[u,:].todense().A1, reclist=rec_list_u)
else:
#res[u,idx] = mt.compute(data_test = self.data_test_bin[u,:].todense().A1, prediction=pred_u)
print("Only ranking type metrics are implemented so far!")
res[u,idx] = mt.compute(data_test = self.data_test[u,:].todense().A1, prediction=pred_u)
idx = idx + 1
res[u,len(metrics)] = 1 # This column indicates whether a user have been preprocessed
nb_processed_users +=1
Expand Down
18 changes: 11 additions & 7 deletions cornac/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@

from .metric import Ndcg
from .metric import Ncrr
from .metric import Mrr
from .metric import Precision
from .metric import Recall
from .metric import Fmeasure
from .recom_metrics import Ndcg
from .recom_metrics import Ncrr
from .recom_metrics import Mrr
from .recom_metrics import Precision
from .recom_metrics import Recall
from .recom_metrics import Fmeasure

from .pred_metrics import Mae
from .pred_metrics import Rmse


__all__ = ['Ndcg',
'Ncrr',
'Mrr',
'Precision',
'Recall',
'Fmeasure']
'Fmeasure',
'Mae',
'Rmse']
56 changes: 56 additions & 0 deletions cornac/metrics/pred_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-

"""
@author: Aghiles Salah
"""
import numpy as np
from ..utils.util_functions import which_



class Mae:
"""Mean Absolute Error.
Parameters
----------
name: string, value: 'MAE'
Name of the measure.
type: string, value: 'prediction'
Type of the metric, e.g., "ranking", "prediction".
"""

def __init__(self):
self.name = 'MAE'
self.type = 'prediction'

#Compute MAE for a single user
def compute(self,data_test,prediction):
index_rated = which_(data_test,'>',0.)
mae_u = np.sum(abs(data_test[index_rated] - prediction[index_rated]))/len(index_rated)

return mae_u


class Rmse:
"""Root Mean Squared Error.
Parameters
----------
name: string, value: 'RMSE'
Name of the measure.
type: string, value: 'prediction'
Type of the metric, e.g., "ranking", "prediction".
"""

def __init__(self):
self.name = 'RMSE'
self.type = 'prediction'

#Compute MAE for a single user
def compute(self,data_test,prediction):
index_rated = which_(data_test,'>',0.)
mse_u = np.sum((data_test[index_rated] - prediction[index_rated])**2)/len(index_rated)

return np.sqrt(mse_u)
14 changes: 7 additions & 7 deletions cornac/metrics/metric.py → cornac/metrics/recom_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def __init__(self, m = 20, name=None):
self.tp_fp = None


#Evaluate TopMlist for a single user: Precision@M, Recall@M, F-meansure@M (F2)
#Evaluate TopMlist for a single user: Precision@M, Recall@M, F-meansure@M (F1)
def measures_at_m(self,data_test,reclist):

data_test_bin = np.full(len(data_test), 0)
Expand Down Expand Up @@ -200,22 +200,22 @@ def compute(self,data_test,reclist):


class Fmeasure(MeasureAtM):
"""F-meansure@M.
"""F-measure@M.
Parameters
----------
m: int, optional, default: 20
The number of items in the top@m list.
name: string, value: 'F2@m'
name: string, value: 'F1@m'
Name of the measure.
type: string, value: 'ranking'
Type of the metric, e.g., "ranking".
"""

def __init__(self, m = 20):
MeasureAtM.__init__(self,m = m, name="F2@"+str(m))
MeasureAtM.__init__(self,m = m, name="F1@"+str(m))


#Compute Precision@M for a single user i
Expand All @@ -225,9 +225,9 @@ def compute(self,data_test,reclist):
prec = self.tp/self.tp_fp
rec = self.tp/self.tp_fn
if (prec+rec):
f2 = 2*(prec*rec)/(prec+rec)
f1 = 2*(prec*rec)/(prec+rec)
else:
f2 = 0
return f2
f1 = 0
return f1


7 changes: 5 additions & 2 deletions cornac/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from .cf import *
from .context_cf import *
from .bpr import *
from .c2pf import *
from .hpf import *
from .pcrl import *
from .pmf import *
5 changes: 5 additions & 0 deletions cornac/models/bpr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

from .recom_bpr import Bpr


__all__ = ['Bpr']
File renamed without changes.
48 changes: 36 additions & 12 deletions cornac/models/cf/recom_bpr.py → cornac/models/bpr/recom_bpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,23 +66,47 @@ def __init__(self, k=5, max_iter=100, learning_rate = 0.001, lamda = 0.01,batch_

# fit the recommender model to the traning data
def fit(self, X):
#change the data to original user Id item Id and rating format
X = X.tocoo()
data = np.ndarray(shape=(len(X.data), 3), dtype=float)
data[:, 0] = X.row
data[:, 1] = X.col
data[:, 2] = X.data

print('Learning...')
res = bpr(X, data, k=self.k, n_epochs=self.max_iter,lamda = self.lamda, learning_rate= self.learning_rate, batch_size = self.batch_size, init_params=self.init_params)
self.U = res['U']
self.V = res['V']
print('Learning completed')
"""Fit the model to observations.
Parameters
----------
X: scipy sparse matrix, required
the user-item preference matrix (traning data), in a scipy sparse format\
(e.g., csc_matrix).
"""
if self.trainable:
#change the data to original user Id item Id and rating format
X = X.tocoo()
data = np.ndarray(shape=(len(X.data), 3), dtype=float)
data[:, 0] = X.row
data[:, 1] = X.col
data[:, 2] = X.data

print('Learning...')
res = bpr(X, data, k=self.k, n_epochs=self.max_iter,lamda = self.lamda, learning_rate= self.learning_rate, batch_size = self.batch_size, init_params=self.init_params)
self.U = res['U']
self.V = res['V']
print('Learning completed')
else:
print('%s is trained already (trainable = False)' % (self.name))

#get prefiction for a single user (predictions for one user at a time for efficiency purposes)
#predictions are not stored for the same efficiency reasons"""

def predict(self, index_user):
"""Predic the scores (ratings) of a user for all items.
Parameters
----------
index_user: int, required
The index of the user for whom to perform predictions.
Returns
-------
Numpy 1d array
Array containing the predicted values for all items
"""

user_pred = self.U[index_user, :].dot(self.V.T)
# transform user_pred to a flatten array, but keep thinking about another possible format
user_pred = np.array(user_pred, dtype='float64').flatten()
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 88d2e17

Please sign in to comment.