In [1]:
import pandas as pd
import numpy as np
import random
import math
import os
import matplotlib.pyplot as plt
import time
from sklearn.preprocessing import MinMaxScaler
import mars
from mars import MARS    #Proposed method module, it contains all functions and methods explained in the paper
import metrics    #Proposed metrics module, it contains all metric functions explained in the paper

DS = r'\AmazonVG'
np.random.seed(1994)



In [2]:
# Get the directory where the data is
outdir = r'Data' + str(DS)

In [3]:
#Get Amazon Video Games ratings
rating_df = pd.read_csv(outdir + r'\ratings.csv',header=0)

In [4]:
# Get the prices/profit of each item in the data set
df_price =pd.read_csv(outdir + '\AVG2_item_profit.csv', engine='python',index_col=0,header=0)
price_df = df_price.copy()

In [5]:
#Amazon Video Games data preprocesing
rating_df.columns = ['rating','user_id','movie_id']

users = rating_df['user_id'].nunique()
items = rating_df['movie_id'].nunique()

price_df=price_df[price_df.index.isin(rating_df['movie_id'].unique())]

rating_df['user_id']=rating_df['user_id'].astype('category').cat.codes
rating_df['movie_id']=rating_df['movie_id'].astype('category').cat.codes

In [6]:
#Creation of sparce rating matrix of size (users x items)
rating_df = rating_df.to_numpy()
rating_matrix = np.zeros((users,items), dtype='int8')
for rating in rating_df:
    rating_matrix[rating[1]][rating[2]] = rating[0]
rating_matrix.shape

(8369, 5643)

In [7]:
#Get predicted matrix (R^*)
nR = pd.read_csv(outdir + r'\MF_matrix.csv', header=None)
nR = nR.to_numpy()
nR.shape

(8369, 5643)

In [8]:
#Compare actual vs predicted (R vs R^*) and get metrics
print('Accuracy R vs R^*')
print('Sparcity = ',rating_df.shape[0]/(users*items))
print('MAE = ',metrics.rating_mae(rating_matrix,nR))
print('RMSE = ',metrics.rating_rmse(rating_matrix,nR))

Accuracy R vs R^*


In [9]:
#Set hyperparameters
th =3.5     #High rating threshold
tr = 4.5    #Ranking threshold
N=20        #TopN items
acceptance = 0.02    #Tolerance for the optimization process
max_iter=20
max_iter2=20
lr=4

In [10]:
score_df = pd.DataFrame(columns=['Accuracy','Impact','NDCG','RMSE','MAE','Presicion','Avg. Profit','Items'])
sc_ind = []    #List of index, Name of each method.

In [11]:
#Standard method
sc_ind.append('Standard')

rec_list_usersnR = mars.topN_rec(N,nR)    #TopN recommendations in the standard method

#Obtain metrics for standard model
uniquenR = np.unique(rec_list_usersnR)
profitnR, pful_nR = metrics.get_average_profit(rec_list_usersnR, price_df['profit'])
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_usersnR)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_usersnR)

#Save scores in Dataframe
score_df = score_df.append({'Accuracy':acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_usersnR)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_usersnR,th)[0],
                            'Avg. Profit':profitnR,
                            'Items': len(uniquenR)}, ignore_index=True)


In [12]:
#MARS(0) model
W = np.tile([0,1,1],(nR.shape[0],1))

mars_model = MARS(nR)
mars_model.to_rank(vectors=price_df[['profit','price']].to_numpy(), tensor=None)
mars_rat_mat = mars_model.get_mars(tr,W)

rec_list_users = mars.topN_rec(N,-mars_rat_mat)

uniquenR = np.unique(rec_list_users)
profitnR, pful_nR = metrics.get_average_profit(rec_list_users, price_df['profit'])
pricenR, pful_nR = metrics.get_average_profit(rec_list_users, price_df['price'])
rmse1, mae1 = metrics.rmse_mae(nR,rec_list_users)
acc = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_users)

score_df = score_df.append({'Accuracy':acc,
                            'Impact':1-acc,
                            'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_users)[0],
                            'RMSE':rmse1,
                            'MAE':mae1,
                            'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_users,th)[0],
                            'Avg. Profit':profitnR,
                            'Avg. Price':pricenR,
                            'Items': len(uniquenR)}, ignore_index=True)

w_sum = mars_model.weights.mean(axis=0)
sc_ind.append('MARS('+str(round(w_sum[0],2))+','+str(round(w_sum[1],2))+','+str(round(w_sum[2],2))+')')

In [13]:
#Proposed method (MARS)
profit_vec=price_df[['profit','price']].to_numpy()
for impact in [0,0.1,0.25,0.5,0.75,1,2]:
    #Create MARS model based on nR
    mars_model = MARS(nR)
    #Define and Transform the attributes
    mars_model.to_rank(vectors=profit_vec, tensor=None)


    #Training process to obtain the weights to get the desired impact or lower
    print('Starting weights optimization process...')
    mars_model.weights_optimization(impact=impact,tol=acceptance,W=3,tr=tr,N=N,
                                    iterations=max_iter,s_iterations=max_iter2,lr=lr,uf=0.97,auf=1.5)
    print('... finishing optimization process.')

    #Get MARS model with the optimal weights
    mars_rat_mat = mars_model.get_mars(tr,mars_model.weights)

    #Recommend TopN items to all the users
    rec_list_users = mars.topN_rec(N,-mars_rat_mat)    #TopN recommendations in MARS method

    #Obtain metrics comparing standard vs MARS
    profitMARS, pful_MARS = metrics.get_average_profit(rec_list_users, price_df['profit'])
    priceMARS, pful_MARS = metrics.get_average_profit(rec_list_users, price_df['price'])
    unique2 = np.unique(rec_list_users)
    acc2 = metrics.accuracy_in_recommendations(rec_list_usersnR,rec_list_users)
    rmse2, mae2 = metrics.rmse_mae(nR,rec_list_users)

    #Save scores in Dataframe
    score_df = score_df.append({'Accuracy': acc2,
                                'Impact':1-acc2,
                                'NDCG':metrics.get_NDCG(nR,rec_list_usersnR,rec_list_users)[0], 
                                'RMSE':rmse2,
                                'MAE':mae2,
                                'Presicion':metrics.presicion_in_recommendations(rating_matrix, rec_list_users,th)[0],
                                'Avg. Profit':profitMARS,
                                'Avg. Price':priceMARS,
                                'Items': len(unique2)}, ignore_index=True)

    w_sum = mars_model.weights.mean(axis=0)
    sc_ind.append('MARS('+str(round(w_sum[0],2))+','+str(round(w_sum[1],2))+','+str(round(w_sum[2],2))+')')

Starting weights optimization process...
Cost=  0.8528139562671765 Impact=  0.8528139562671687
Cost=  0.8442287011590394 Impact=  0.8442287011590273
Cost=  0.7470127852790058 Impact=  0.7470127852789893
Cost=  0.3115366232524794 Impact=  0.3115366232524803
Cost=  0.1316883737603059 Impact=  0.13168837376030731
Cost=  0.08538057115545467 Impact=  0.0853805711554567
Cost=  0.06335284980284384 Impact=  0.06335284980284933
Cost=  0.05038236348428726 Impact=  0.05038236348429269
Cost=  0.0420002389771777 Impact=  0.042000238977181976
Cost=  0.036438045166686604 Impact=  0.03643804516669007
Cost=  0.0317481180547258 Impact=  0.0317481180547285
Cost=  0.02803799737125107 Impact=  0.02803799737125312
Cost=  0.02469829131317961 Impact=  0.02469829131318102
Cost=  0.02203369578205283 Impact=  0.02203369578205369
Cost=  0.019602102999163595 Impact=  0.019602102999163932
Stopped at iteration: 15
... finishing optimization process.
Starting weights optimization process...
Cost=  0.7753435296929144 

In [14]:
#Show metrics scores dataframe
score_df.index = sc_ind
score_df

Unnamed: 0,Accuracy,Impact,NDCG,RMSE,MAE,Presicion,Avg. Profit,Items,Avg. Price
Standard,1.0,0.0,1.0,0.0,0.0,0.894737,219.97613,313.0,
"MARS(0.0,1.0,1.0)",0.146206,0.853794,0.791656,1493.649638,1278.081569,0.912,1601.668294,480.0,9337.622488
"MARS(0.96,0.08,0.07)",1.0,0.0,0.999775,1.032491,0.687501,0.894737,219.97613,313.0,1263.662477
"MARS(0.89,0.12,0.12)",0.912893,0.087107,0.998089,4.743222,3.069578,0.901235,263.572988,324.0,1563.355945
"MARS(0.89,0.12,0.12)",0.817368,0.182632,0.993382,12.986395,7.460557,0.920455,328.463695,337.0,2001.131477
"MARS(0.87,0.14,0.14)",0.698202,0.301798,0.981328,39.762938,22.115504,0.933962,444.14226,375.0,2722.923201
"MARS(0.84,0.17,0.17)",0.423981,0.576019,0.933066,231.469304,134.06879,0.942149,844.109338,421.0,5093.112037
"MARS(0.57,0.73,0.74)",0.14635,0.85365,0.793458,1452.623113,1238.623462,0.91129,1594.30014,482.0,9344.1607
"MARS(0.03,0.99,0.99)",0.146218,0.853782,0.791681,1493.397126,1277.760031,0.912,1601.705927,480.0,9337.328308
