In [6]:
import pandas as pd
import seaborn as sns
import scipy

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

import pprint
pp = pprint.PrettyPrinter(indent=4)

print("Using pandas %s version" % pd.__version__)
print("Using seaborn %s version"% sns.__version__)
print("Using scipy %s version" % scipy.__version__)


Using pandas 1.2.0 version
Using seaborn 0.11.1 version
Using scipy 1.6.0 version


In [12]:
splits = ['GroupShuffleSplit',
'KFold',
'ShuffleSplit',
'StratifiedKFold',
'StratifiedShuffleSplit',
'TimeSeriesSplit']

In [24]:
#fig1
for split in splits:
    print(split)
    fig1 = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure1.txt',sep='\t',header=4)
    f1 = np.array(fig1)
    df = pd.DataFrame(index=['Full', 'Test'])
    for i,r in enumerate(fig1.Recommender):
        df[r] = f1[i][1:]

    df.plot(kind='bar', color=('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange'),legend=None)
    plt.ylabel('P@10')
    plt.gca().xaxis.set_tick_params(rotation=0)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='xx-large')
    plt.title(split)
    plt.gcf().tight_layout()
    
    data = [list(a) for a in zip(fig1.Recommender, fig1.Full.rank(ascending=False),fig1.Test.rank(ascending=False))]

    artists = []
    for row, color in zip(data, ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')):
        artists.append(plt.Line2D(xdata=[1,2], ydata=[row[1:]], lw=1, color=color, marker='o'))

    fig, ax = plt.subplots()

    for artist in artists:
        a = ax.add_artist(artist)

    ax.set_ybound([0.8,8.2])
    ax.set_xbound([0.94,2.06])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.invert_yaxis()
    ax.set_xticks([1,2])
    ax.set_xticklabels(['Full', 'Test'])

    plt.ylabel('System ranking')
    plt.legend(artists, fig1.Recommender, bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='xx-large')
    plt.title(split)
    
    fig.tight_layout()
plt.show()




GroupShuffleSplit
KFold
ShuffleSplit
StratifiedKFold
StratifiedShuffleSplit
TimeSeriesSplit


In [26]:
#fig3

def rank(data):
    artists = []
    colors = ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')
    data.set_index('Target size') 
    for i,j in data.groupby(['Target size'], axis=0):
        del j['Target size']
        rank = j.rank(axis=1,ascending=False)
        artist = np.array(rank)[0]
        artists.append(artist)

    rank = []
    for i,t in enumerate(data.keys()[1:]):
        rank.append([t] + np.array(artists)[:,i].tolist())
    return rank

def plot_system_rankings(data, ax, xlabel='|N_u|', ylabel=''):
    artists = []
    for row, color in zip(data, ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')):
        x = list(range(len(data[0])-1))
        artists.append(plt.Line2D(xdata=x, ydata=[row[1:]], lw=1, color=color, marker='o'))

    for artist in artists:
        ax.add_artist(artist)

    ax.set_ybound([0.8,9])
    ax.set_xbound([-0.2,13.06])

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.set_xticks(list(range(len(ndcg_rank[0])-1)))
    ax.set_xticklabels(ndcg['Target size'].array)
    #plt.xticks(list(range(len(ndcg_rank[0])-1)), ndcg['Target size'].array)
    ax.set_yticks(list(range(1,9)))
    ax.invert_yaxis()
    ax.invert_xaxis()
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    return artists

def plot_metrics(data, ax, xlabel='|N_u|', ylabel='System ranking'):
    artists = []
    for rec, color in zip(data.columns[1:], ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')):
        x = list(range(len(data[rec])))
        artists.append(plt.Line2D(xdata=x, ydata=[data[rec]], lw=1, color=color, marker='o'))

    for artist in artists:
        ax.add_artist(artist)

    max = np.array((data.max()[1:])).max()
    ax.set_ybound([0, max + max*0.1])
    ax.set_xbound([-0.1,13.06])

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.set_xticks(list(range(len(data['Target size']))))
    ax.set_xticklabels(data['Target size'].array)
    ax.invert_xaxis()
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    
    return artists


In [34]:
for split in splits:
    print(split)
    ndcg = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure3.txt',sep='\t',header=4,nrows=13)
    precision = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure3.txt',sep='\t',header=19,nrows=13)
    recall = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure3.txt',sep='\t',header=34,nrows=13)

    ndcg_rank = rank(ndcg)
    precision_rank = rank(precision)
    recall_rank = rank(recall)

    cols = ['Column {}'.format(col) for col in range(1, 2)]
    rows = ['Row {}'.format(row) for row in ['Precision@10', 'Recall@10', 'nDCG@10']]

    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 8))

    #for ax, col in zip(axes[0], cols):
    #    ax.set_title(col)
    #    #ax.set_xlabel('|Nu|')

    for ax, row in zip(axes[:,0], rows):
        ax.set_ylabel(row, rotation=90, size='large')

    plot_metrics(precision, axes[0][0], ylabel='Precision@10')
    plot_metrics(recall, axes[1][0], ylabel='Recall@10')
    artists = plot_metrics(ndcg, axes[2][0], ylabel='nDCG@10')

    plot_system_rankings(precision_rank, axes[0][1])
    plot_system_rankings(recall_rank, axes[1][1])
    artists = plot_system_rankings(ndcg_rank, axes[2][1]) 

    plt.legend(artists, ndcg.columns[1:], bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='x-large')
    fig.suptitle(split)
    fig.tight_layout()
plt.show()

GroupShuffleSplit
KFold
ShuffleSplit
StratifiedKFold
StratifiedShuffleSplit
TimeSeriesSplit
