In [1]:
import pandas as pd
import numpy as np
import random
import math
import os
import matplotlib.pyplot as plt
import time
from sklearn.preprocessing import MinMaxScaler
import mars
from mars import MARS    #Proposed method module, it contains all functions and methods explained in the paper
import metrics    #Proposed metrics module, it contains all metric functions explained in the paper

DS = r'\AmazonVG'
np.random.seed(1994)



In [2]:
# Get the directory where the data is
outdir = r'Data' + str(DS)

In [3]:
#Get Amazon Video Games ratings
rating_df = pd.read_csv(outdir + r'\ratings.csv',header=0)

In [4]:
# Get the prices/profit of each item in the data set
df_price =pd.read_csv(outdir + '\item_profit.csv', engine='python',index_col=0,header=0)
price_df = df_price.copy()

In [5]:
#Amazon Video Games data preprocesing
rating_df.columns = ['rating','user_id','movie_id']

users = rating_df['user_id'].nunique()
items = rating_df['movie_id'].nunique()

price_df=price_df[price_df.index.isin(rating_df['movie_id'].unique())]

rating_df['user_id']=rating_df['user_id'].astype('category').cat.codes
rating_df['movie_id']=rating_df['movie_id'].astype('category').cat.codes

In [6]:
#Creation of sparce rating matrix of size (users x items)
rating_df = rating_df.to_numpy()
rating_matrix = np.zeros((users,items), dtype='int8')
for rating in rating_df:
    rating_matrix[rating[1]][rating[2]] = rating[0]
rating_matrix.shape

(8369, 5643)

In [7]:
#Get predicted matrix (R^*)
nR = pd.read_csv(outdir + r'\MF_matrix.csv', header=None)
nR = nR.to_numpy()
nR.shape

(8369, 5643)

In [8]:
#Compare actual vs predicted (R vs R^*) and get metrics
print('Accuracy R vs R^*')
print('Sparcity = ',rating_df.shape[0]/(users*items))
print('MAE = ',metrics.rating_mae(rating_matrix,nR))
print('RMSE = ',metrics.rating_rmse(rating_matrix,nR))

Accuracy R vs R^*


In [9]:
#Set hyperparameters
th =3.5     #High rating threshold
tr = 4.5    #Ranking threshold
N=20        #TopN items
acceptance = 0.02    #Tolerance for the optimization process
max_iter=20
max_iter2=20
lr=4

In [10]:
score_df = pd.DataFrame(columns=['Accuracy','Impact','NDCG','RMSE','MAE','Presicion','Avg. Profit','Items'])
sc_ind = []    #List of index, Name of each method.

In [11]:
#Standard method
sc_ind.append('Standard')

rec_list_usersnR = mars.topN_rec(N,nR)    #TopN recommendations in the standard method

#Obtain metrics for standard model
uniquenR = np.unique(rec_list_usersnR)
profitnR, pful_nR = metrics.get_average_profit(rec_list_usersnR, price_df['profit'])
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_usersnR)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_usersnR)

#Save scores in Dataframe
score_df = score_df.append({'Accuracy':acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_usersnR)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_usersnR,th)[0],
                            'Avg. Profit':profitnR,
                            'Items': len(uniquenR)}, ignore_index=True)


In [12]:
#Method proposed by Jannach & Adomavicius (2017) (Baseline)
sc_ind.append('Baseline')

#Get baseline model
var = 'profit'
profit = price_df[var]
rank_x = np.array([1/(profit[i]+1) for i in range(len(profit))])
jan_rat_mat = mars.baseline(nR, rank_x, th,tr)

#TopN recommendations in baseline model
rec_list_jan = mars.topN_rec(N,-jan_rat_mat)

#Obtain metrics comparing baseline vs MARS
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_jan)
profitJAN, pful_JAN = metrics.get_average_profit(rec_list_jan, price_df['profit'])
unique = np.unique(rec_list_jan)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_jan)

#Save scores in Dataframe
score_df = score_df.append({'Accuracy': acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_jan)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_jan,th)[0],
                            'Avg. Profit':profitJAN,
                            'Items': len(unique)}, ignore_index=True)

In [13]:
W = np.tile([0,1],(nR.shape[0],1))

profit_vec = price_df[['profit']].to_numpy()

mars_model = MARS(nR)
mars_model.to_rank(vectors=profit_vec, tensor=None)
mars_rat_mat = mars_model.get_mars(tr,W)

rec_list_users = mars.topN_rec(N,-mars_rat_mat)


uniquenR = np.unique(rec_list_users)
profitnR, pful_nR = metrics.get_average_profit(rec_list_users, price_df['profit'])
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_users)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_users)

score_df = score_df.append({'Accuracy':acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_users)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_users,th)[0],
                            'Avg. Profit':profitnR,
                            'Items': len(uniquenR)}, ignore_index=True)

w_sum = mars_model.weights.sum(axis=0)
sc_ind.append('MARS('+str(round(w_sum[0]/w_sum[1],4))+')')

In [14]:
#Proposed method (MARS)
for impact in [0,0.1,0.25,0.5,0.75,1,2]:
    #Create MARS model based on nR
    mars_model = MARS(nR)
    #Define and Transform the attributes
    mars_model.to_rank(vectors=profit_vec, tensor=None)


    #Training process to obtain the weights to get the desired impact or lower
    print('Starting weights optimization process...')
    mars_model.weights_optimization(impact=impact,tol=acceptance,W=2,tr=tr,N=N,
                                    iterations=max_iter,s_iterations=max_iter2,lr=lr,uf=0.97,auf=1.5)
    print('... finishing optimization process.')

    #Get MARS model with the optimal weights
    mars_rat_mat = mars_model.get_mars(tr,mars_model.weights)

    #Recommend TopN items to all the users
    rec_list_users = mars.topN_rec(N,-mars_rat_mat)    #TopN recommendations in MARS method

    #Obtain metrics comparing standard vs MARS
    profitMARS, pful_MARS = metrics.get_average_profit(rec_list_users, price_df['profit'])
    unique2 = np.unique(rec_list_users)
    acc2 = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_users)
    rmse2, mae2 = metrics.rmse_mae(nR,rec_list_users)

    #Save scores in Dataframe
    score_df = score_df.append({'Accuracy': acc2,
                                'Impact':1-acc2,
                                'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_users)[0], 
                                'RMSE':rmse2,
                                'MAE':mae2,
                                'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_users,th)[0],
                                'Avg. Profit':profitMARS,
                                'Items': len(unique2)}, ignore_index=True)

    w_sum = mars_model.weights.sum(axis=0)
    sc_ind.append('MARS('+str(round(w_sum[0]/w_sum[1],4))+')')

Starting weights optimization process...
Cost=  0.8404707850400286 Impact=  0.8404707850400187
Cost=  0.7951248655753375 Impact=  0.7951248655753216
Cost=  0.5961345441510335 Impact=  0.5961345441510333
Cost=  0.21289879316525273 Impact=  0.2128987931652522
Cost=  0.1104074560879436 Impact=  0.11040745608794397
Cost=  0.07591110048990322 Impact=  0.07591110048990576
Cost=  0.057707013980164916 Impact=  0.05770701398017095
Cost=  0.046857450113514186 Impact=  0.046857450113519154
Cost=  0.03956267176484648 Impact=  0.03956267176485043
Cost=  0.034084119966543217 Impact=  0.03408411996654628
Cost=  0.029854223921615512 Impact=  0.02985422392161788
Cost=  0.026418926992472242 Impact=  0.026418926992474008
Cost=  0.02334209582984827 Impact=  0.023342095829849398
Cost=  0.02074321902258336 Impact=  0.020743219022583945
Cost=  0.018472935834627814 Impact=  0.018472935834627897
Stopped at iteration: 15
... finishing optimization process.
Starting weights optimization process...
Cost=  0.76472

In [15]:
score_df.index = sc_ind
score_df

Unnamed: 0,Accuracy,Impact,NDCG,RMSE,MAE,Presicion,Avg. Profit,Items
Standard,1.0,0.0,1.0,0.0,0.0,0.894737,225.550883,313.0
Baseline,0.146362,0.853638,0.789851,1464.897841,1265.546517,0.892562,1456.906005,473.0
MARS(0.0),0.146362,0.853638,0.789851,1464.897841,1265.546517,0.892562,1456.906005,473.0
MARS(11.4108),1.0,0.0,0.999777,1.034879,0.689616,0.894737,225.550883,313.0
MARS(7.5259),0.912851,0.087149,0.998098,4.619079,3.04488,0.898734,269.739373,325.0
MARS(7.0263),0.824131,0.175869,0.9937,11.871826,6.959201,0.926829,329.802737,338.0
MARS(5.9813),0.644563,0.355437,0.97616,47.150154,26.883606,0.913462,489.698693,371.0
MARS(4.4852),0.41735,0.58265,0.936376,189.805854,113.565755,0.93913,782.697094,416.0
MARS(0.7156),0.147389,0.852611,0.795781,1420.718107,1215.541678,0.890756,1453.716559,468.0
MARS(0.0288),0.146583,0.853417,0.790652,1462.957749,1263.215599,0.892562,1456.905573,471.0
