In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib.ticker import StrMethodFormatter
import numpy as np
import pareto
import math

In [None]:
dataset = 'ml1m' # 'ft', 'ml100k', 'ml1m'

## Jenetics search

In [None]:
jenetics_no_recklessness_df = pd.read_csv('./results/' + dataset +'-jenetic-search-recklessness-no.csv', sep=';')
jenetics_no_recklessness_df

In [None]:
jenetics_recklessness_df = pd.read_csv('./results/' + dataset +'-jenetic-search-recklessness-yes.csv', sep=';')
jenetics_recklessness_df

In [None]:
# process results with recklessness
recklessness_last = jenetics_recklessness_df[(jenetics_recklessness_df['generation']==150) & (jenetics_recklessness_df['cumulativeCoverage']>0)]

def is_pareto_front_r(row):
    for i in range(len(recklessness_last)):
        i_row = recklessness_last.iloc[i]
        if i_row['cumulativeMAE'] > row['cumulativeMAE'] and i_row['cumulativeCoverage'] > row['cumulativeCoverage']:
            return False
    return True

recklessness_last['is_pareto_front'] = recklessness_last.apply(is_pareto_front_r, axis = 1)

# process results without recklessness
no_recklessness_last = jenetics_no_recklessness_df[(jenetics_no_recklessness_df['generation']==150) & (jenetics_no_recklessness_df['cumulativeCoverage']>0)]

def is_pareto_front_nr(row):
    for i in range(len(no_recklessness_last)):
        i_row = no_recklessness_last.iloc[i]
        if i_row['cumulativeMAE'] > row['cumulativeMAE'] and i_row['cumulativeCoverage'] > row['cumulativeCoverage']:
            return False
    return True

no_recklessness_last['is_pareto_front'] = no_recklessness_last.apply(is_pareto_front_nr, axis = 1)

# plot results
plt.figure(figsize=(6,6)) 

plt.gca().xaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))

plt.scatter(recklessness_last['cumulativeCoverage'], recklessness_last['cumulativeMAE'], facecolors='none', edgecolors='blue', label="yes")
plt.scatter(no_recklessness_last['cumulativeCoverage'], no_recklessness_last['cumulativeMAE'], facecolors='none', edgecolors='orange', label="no")

recklessness_pareto = recklessness_last[recklessness_last['is_pareto_front']].sort_values('cumulativeCoverage')
plt.plot(recklessness_pareto['cumulativeCoverage'], recklessness_pareto['cumulativeMAE'], color='blue')

no_recklessness_pareto = no_recklessness_last[no_recklessness_last['is_pareto_front']].sort_values('cumulativeCoverage')
plt.plot(no_recklessness_pareto['cumulativeCoverage'], no_recklessness_pareto['cumulativeMAE'], color='orange')

plt.xlabel('coverage', fontsize=16)
plt.ylabel('1-MAE', fontsize=16)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

plt.legend(title='recklessness', title_fontsize=12, fontsize=12)

plt.savefig('figures/' + dataset + '-pareto.png', dpi=300, bbox_inches='tight')


In [None]:
plt.figure(figsize=(7,6)) 

plt.gca().xaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))

plt.scatter(jenetics_recklessness_df['cumulativeCoverage'], jenetics_recklessness_df['cumulativeMAE'], c=jenetics_recklessness_df['recklessness'], cmap='Blues')

plt.xlabel('coverage', fontsize=16)
plt.ylabel('1-MAE', fontsize=16)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

cbar = plt.colorbar()
cbar.set_label('recklessness', fontsize=15)
cbar.ax.tick_params(labelsize=14)

plt.savefig('figures/' + dataset + '-recklessness-value.png', dpi=300, bbox_inches='tight')


## Test split error

In [None]:
df = pd.read_csv('results/' + dataset + '-test-split-error.csv', sep=';')
df = df.dropna()
df

In [None]:
def is_pareto_front(row):
    aux_df = df[(df['recklessness'] == row.recklessness) & (df['reliability'] == row.reliability)]

    for i in range(len(aux_df)):
        i_row = aux_df.iloc[i]
        if i_row['1-mae'] > row['1-mae'] and i_row['coverage'] > row['coverage']:
            return False
    
    return True

df['is_pareto_front'] = df.apply(is_pareto_front, axis = 1)
df

In [None]:
import matplotlib.pyplot as plt

rel_values = [0.30, 0.40, 0.50]

fig, axs = plt.subplots(1, len(rel_values), figsize=(6*len(rel_values), 6))
fig.tight_layout(h_pad=0, w_pad=4)

for i, rel_value in enumerate(rel_values):
    
    axs[i].set_xlabel('coverage', fontsize=15)
    axs[i].set_ylabel('1-MAE', fontsize=15)

    axs[i].set_title("threshold=" + str(rel_value), fontsize=20)

    axs[i].tick_params(axis='both', which='major', labelsize=14)

    rel_df = df[(df['reliability'] == rel_value) & (df['recklessness'] == 'yes')]   
    axs[i].scatter(rel_df['coverage'], rel_df['1-mae'], facecolors='none', edgecolors='blue', label='yes')
    
    rel_pareto_front = rel_df[rel_df['is_pareto_front']].sort_values('coverage', ascending=False)
    axs[i].plot(rel_pareto_front['coverage'], rel_pareto_front['1-mae'], color='blue')

    not_rel_df = df[(df['reliability'] == rel_value) & (df['recklessness'] == 'no')]   
    axs[i].scatter(not_rel_df['coverage'], not_rel_df['1-mae'], facecolors='none', edgecolors='orange', label='no')
    
    not_rel_pareto_front = not_rel_df[not_rel_df['is_pareto_front']].sort_values('coverage', ascending=False)
    axs[i].plot(not_rel_pareto_front['coverage'], not_rel_pareto_front['1-mae'], color='orange')

    axs[i].legend(title='recklessness', title_fontsize=12, fontsize=12)

plt.savefig('figures/' + dataset + '-test-error.png', dpi=300, bbox_inches='tight')

In [None]:
import seaborn as sns
from pygmo import *

reliabilities = [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70]

hv_df = pd.DataFrame(columns=['reliability', 'recklessness', 'hv'])

for i, reliability in enumerate(reliabilities):
    for k, recklessness in enumerate(['yes', 'no']):

        aux_df = df[(df['reliability'] == reliability) & (df['recklessness'] == recklessness) & (df['is_pareto_front'])]

        hv = hypervolume(aux_df[['coverage', '1-mae']])

        hv_df = hv_df.append({
            'reliability': reliability,
            'recklessness': recklessness,
            'hv': 1.0 - hv.compute([1,1])
        }, ignore_index = True)
        
        
plt.figure(figsize=(7,6)) 

plt.gca().xaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))

plt.plot(hv_df[hv_df['recklessness']=='yes']['reliability'], hv_df[hv_df['recklessness']=='yes']['hv'], label='yes')
plt.plot(hv_df[hv_df['recklessness']=='no']['reliability'], hv_df[hv_df['recklessness']=='no']['hv'], label='no')

plt.xlabel('threshold', fontsize=16)
plt.ylabel('HV', fontsize=16)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

plt.legend(title='recklessness', title_fontsize=12, fontsize=12)

plt.savefig('figures/' + dataset + '-hv.png', dpi=300, bbox_inches='tight')