In [1]:
import pandas as pd
import numpy as np
import random
import math
import os
import matplotlib.pyplot as plt
import time
from sklearn.preprocessing import MinMaxScaler
import mars
from mars import MARS    #Proposed method module, it contains all functions and methods explained in the paper
import metrics    #Proposed metrics module, it contains all metric functions explained in the paper

DS = r'\MovieLens'
np.random.seed(1994)



In [2]:
# Get the directory where the data is
outdir = r"Data" + str(DS)

In [3]:
# Get the prices/profit of each item in the data set
df_price =pd.read_csv(outdir + "\item_profit.csv", engine='python',index_col=0,header=0)
price_df = df_price.copy()

In [4]:
#Get MovieLens1-M ratings
rating_df = pd.read_csv(outdir + r'\ratings.csv',header=0)

In [5]:
#MovieLens1-M data preprocesing
rating_df.drop('timestamp', axis=1,inplace=True)
users = rating_df['user_id'].nunique()
items = rating_df['movie_id'].nunique()
rating_df['user_id']=rating_df['user_id'].astype('category').cat.codes
rating_df['movie_id']=rating_df['movie_id'].astype('category').cat.codes

#Creation of sparce rating matrix of size (users x items)
rating_df = rating_df.to_numpy()
rating_matrix = np.zeros((users,items), dtype='int8')
for rating in rating_df:
    rating_matrix[rating[0]][rating[1]] = rating[2]
print(rating_matrix.shape)

(6040, 3706)


In [6]:
#Get predicted matrix (R^*)
nR = pd.read_csv(outdir + r'\MF_matrix.csv', header=None)
nR = nR.to_numpy()
print(nR.shape)

(6040, 3706)


In [7]:
#Compare actual vs predicted (R vs R^*) and get metrics
print('Accuracy R vs R^*')
print('Sparcity = ',rating_df.shape[0]/(users*items))
print('MAE = ',metrics.rating_mae(rating_matrix,nR))
print('RMSE = ',metrics.rating_rmse(rating_matrix,nR))

Accuracy R vs R^*
Sparcity =  0.044683625622312845
MAE =  0.6963651619429324
RMSE =  0.8887821070861819


In [8]:
#Set hyperparameters
th =3.5     #High rating threshold
tr = 4.5    #Ranking threshold
N=20        #TopN items
acceptance = 0.02    #Tolerance for the optimization process
max_iter=20
max_iter2=20
lr=4

In [9]:
score_df = pd.DataFrame(columns=['Accuracy','Impact','NDCG','RMSE','MAE','Presicion','Avg. Profit','Items'])
sc_ind = []    #List of index, Name of each method.

In [10]:
#Standard method
sc_ind.append('Standard')

rec_list_usersnR = mars.topN_rec(N,nR)    #TopN recommendations in the standard method

#Obtain metrics for standard model
uniquenR = np.unique(rec_list_usersnR)
profitnR, pful_nR = metrics.get_average_profit(rec_list_usersnR, price_df['profit'])
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_usersnR)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_usersnR)

#Save scores in Dataframe
score_df = score_df.append({'Accuracy':acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_usersnR)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_usersnR,th)[0],
                            'Avg. Profit':profitnR,
                            'Items': len(uniquenR)}, ignore_index=True)

In [11]:
#Method proposed by Jannach & Adomavicius (2017) (Baseline)
sc_ind.append('Baseline')

#Get baseline model
var = 'profit'
profit = price_df[var]
rank_x = np.array([1/(profit[i]+1) for i in range(len(profit))])
jan_rat_mat = mars.baseline(nR, rank_x, th,tr)

#TopN recommendations in baseline model
rec_list_jan = mars.topN_rec(N,-jan_rat_mat)

#Obtain metrics comparing baseline vs MARS
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_jan)
profitJAN, pful_JAN = metrics.get_average_profit(rec_list_jan, price_df['profit'])
unique = np.unique(rec_list_jan)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_jan)

#Save scores in Dataframe
score_df = score_df.append({'Accuracy': acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_jan)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_jan,th)[0],
                            'Avg. Profit':profitJAN,
                            'Items': len(unique)}, ignore_index=True)

In [12]:
#MARS(0) model
W = np.tile([0,1],(nR.shape[0],1))

profit_vec=price_df[['profit']].to_numpy()

#Create MARS model based on nR
mars_model = MARS(nR)
#Define and Transform the attributes
mars_model.to_rank(vectors=profit_vec, tensor=None)

#Get MARS model with the optimal weights
mars_rat_mat = mars_model.get_mars(tr,W)

#Recommend TopN items to all the users
rec_list_users = mars.topN_rec(N,-mars_rat_mat)    #TopN recommendations in MARS method

#Obtain metrics comparing standard vs MARS
profitMARS, pful_MARS = metrics.get_average_profit(rec_list_users, price_df['profit'])
unique2 = np.unique(rec_list_users)
acc2 = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_users)
rmse2, mae2 = metrics.rmse_mae(nR,rec_list_users)

#Save scores in Dataframe
score_df = score_df.append({'Accuracy': acc2,
                            'Impact':1-acc2,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_users)[0], 
                            'RMSE':rmse2,
                            'MAE':mae2,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_users,th)[0],
                            'Avg. Profit':profitMARS,
                            'Items': len(unique2)}, ignore_index=True)

w_sum = mars_model.weights.sum(axis=0)
sc_ind.append('MARS('+str(round(w_sum[0]/w_sum[1],4))+')')

In [13]:
#Proposed method (MARS)
for impact in [0,0.1,0.25,0.5,0.75,1,2]:
    #Create MARS model based on nR
    mars_model = MARS(nR)
    #Define and Transform the attributes
    mars_model.to_rank(vectors=profit_vec, tensor=None)


    #Training process to obtain the weights to get the desired impact or lower
    print('Starting weights optimization process...')
    mars_model.weights_optimization(impact=impact,tol=acceptance,W=2,tr=tr,N=N,
                                    iterations=max_iter,s_iterations=max_iter2,lr=lr,uf=0.97,auf=1.5)
    print('... finishing optimization process.')

    #Get MARS model with the optimal weights
    mars_rat_mat = mars_model.get_mars(tr,mars_model.weights)

    #Recommend TopN items to all the users
    rec_list_usersnR = mars.topN_rec(N,nR)    #TopN recommendations in the standard method
    rec_list_users = mars.topN_rec(N,-mars_rat_mat)    #TopN recommendations in MARS method

    #Obtain metrics comparing standard vs MARS
    profitMARS, pful_MARS = metrics.get_average_profit(rec_list_users, price_df['profit'])
    unique2 = np.unique(rec_list_users)
    acc2 = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_users)
    rmse2, mae2 = metrics.rmse_mae(nR,rec_list_users)

    #Save scores in Dataframe
    score_df = score_df.append({'Accuracy': acc2,
                                'Impact':1-acc2,
                                'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_users)[0], 
                                'RMSE':rmse2,
                                'MAE':mae2,
                                'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_users,th)[0],
                                'Avg. Profit':profitMARS,
                                'Items': len(unique2)}, ignore_index=True)

    w_sum = mars_model.weights.sum(axis=0)
    sc_ind.append('MARS('+str(round(w_sum[0]/w_sum[1],4))+')')
    

Starting weights optimization process...
Cost=  0.6642301324503311 Impact=  0.6642301324503248
Cost=  0.604908940397351 Impact=  0.604908940397347
Cost=  0.5273261589403974 Impact=  0.5273261589403961
Cost=  0.47410596026490065 Impact=  0.4741059602649013
Cost=  0.44513245033112586 Impact=  0.445132450331126
Cost=  0.429908940397351 Impact=  0.4299089403973491
Cost=  0.42103476821192054 Impact=  0.42103476821191926
Cost=  0.4151241721854305 Impact=  0.41512417218543
Cost=  0.41158112582781453 Impact=  0.41158112582781464
Cost=  0.40902317880794703 Impact=  0.4090231788079481
Cost=  0.4071854304635762 Impact=  0.40718543046357775
Cost=  0.4056788079470199 Impact=  0.4056788079470214
Cost=  0.4044205298013245 Impact=  0.4044205298013258
Cost=  0.403203642384106 Impact=  0.4032036423841074
Cost=  0.4022847682119205 Impact=  0.40228476821192216
Stopped at iteration: 15
... finishing optimization process.
Starting weights optimization process...
Cost=  0.6037582781456954 Impact=  0.66531456

In [14]:
score_df.index = sc_ind
score_df

Unnamed: 0,Accuracy,Impact,NDCG,RMSE,MAE,Presicion,Avg. Profit,Items
Standard,1.0,0.0,1.0,0.0,0.0,0.903953,40.31255,2189.0
Baseline,0.305836,0.694164,0.884719,224.511339,188.383568,0.887083,64.982464,1839.0
MARS(0.0),0.305836,0.694164,0.884719,224.511339,188.383568,0.887083,64.982464,1839.0
MARS(6.6967),1.0,0.0,0.998521,1.063661,0.751142,0.903953,40.31255,2189.0
MARS(4.074),0.966051,0.033949,0.996941,1.909485,1.45707,0.905196,41.702238,2206.0
MARS(3.5671),0.908642,0.091358,0.992509,3.450464,2.657724,0.905937,43.908788,2231.0
MARS(2.5901),0.773262,0.226738,0.980309,10.174874,7.53976,0.905675,48.446116,2273.0
MARS(1.4969),0.590877,0.409123,0.957813,29.675901,23.026631,0.899862,54.265137,2230.0
MARS(0.2974),0.308411,0.691589,0.904803,183.724907,151.202483,0.887704,64.811844,1835.0
MARS(0.0045),0.306192,0.693808,0.885762,223.333531,187.178609,0.886845,64.982464,1837.0
