In [4]:
import pandas as pd
import seaborn as sns
import scipy
import itertools

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import os
import pprint
pp = pprint.PrettyPrinter(indent=4)

print("Using pandas %s version" % pd.__version__)
print("Using seaborn %s version"% sns.__version__)
print("Using scipy %s version" % scipy.__version__)


Using pandas 1.2.0 version
Using seaborn 0.11.1 version
Using scipy 1.6.0 version


In [5]:
splits = [#'GroupShuffleSplit',
'ShuffleSplit',
'StratifiedShuffleSplit',
'StratifiedKFold',
'KFold',
'TimeSeriesSplit']

data_dir = r'C:\Projects\RecSys2020\results\\'


In [6]:
fig, ax = plt.subplots(len(splits), 2)
    
colors = ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')
for i,split in enumerate(splits):
    print(split)
    ax1 = ax[i,0]
    ax2 = ax[i,1]
    ax1.set_title(split)
    ax2.set_title(split)

    fig1 = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure1.txt',sep='\t',header=4)
    f1 = np.array(fig1)
    df = pd.DataFrame(index=['Full', 'Test'])
    for i,r in enumerate(fig1.Recommender):
        df[r] = f1[i][1:]

    ax00 = df.plot(kind='bar', color=colors,legend=None, ax = ax1)

    ax00.set_ylabel('P@10')
    plt.gca().xaxis.set_tick_params(rotation=0)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='xx-large')
    
    data = [list(a) for a in zip(fig1.Recommender, fig1.Full.rank(ascending=False),fig1.Test.rank(ascending=False))]

    artists = []
    for row, color in zip(data, colors):
        artists.append(plt.Line2D(xdata=[1,2], 
                                  ydata=[row[1:]], lw=1, color=color, marker='o'))


    for artist in artists:
        a = ax2.add_artist(artist)

    ax2.set_ybound([0.8,8.2])
    ax2.set_xbound([0.94,2.06])
    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    ax2.spines['bottom'].set_visible(False)
    ax2.spines['left'].set_visible(False)
    ax2.invert_yaxis()
    ax2.set_xticks([1,2])
    ax2.set_xticklabels(['Full', 'Test'])

    ax2.set_ylabel('System ranking')
    plt.legend(artists, fig1.Recommender, bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='xx-large')
    fig.set_size_inches(7, 6)
    fig.tight_layout()
    plt.gcf().tight_layout()


    
fig.set_size_inches(10,12)
plt.gcf().tight_layout()

png = os.path.join(data_dir, 'figure1.cross-validation.png')
fig.savefig(png, format='png', dpi=300)
    
#plt.show()

ShuffleSplit


No handles with labels found to put in legend.


StratifiedShuffleSplit


No handles with labels found to put in legend.


StratifiedKFold


No handles with labels found to put in legend.


KFold


No handles with labels found to put in legend.


TimeSeriesSplit


No handles with labels found to put in legend.


In [36]:
#fig3

def rank(data):
    artists = []
    colors = ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')
    data.set_index('Target size') 
    for i,j in data.groupby(['Target size'], axis=0):
        del j['Target size']
        rank = j.rank(axis=1,ascending=False)
        artist = np.array(rank)[0]
        artists.append(artist)

    rank = []
    for i,t in enumerate(data.keys()[1:]):
        rank.append([t] + np.array(artists)[:,i].tolist())
    return rank

def plot_system_rankings(data, ax, xlabel='|N_u|', ylabel=''):
    for row, color in zip(data, ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')):
        x = list(range(len(data[0])-1))
        artist = plt.Line2D(xdata=x, ydata=[row[1:]], lw=1, color=color, marker='o')
        ax.add_artist(artist)

    ax.set_ybound([0.8,9])
    ax.set_xbound([-0.2,13.06])

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.set_xticks(list(range(len(data[0])-1)))
    ax.set_xticklabels(list(itertools.chain(precision['Target size'].array[:-1] , ['Full'])))
    #plt.xticks(list(range(len(ndcg_rank[0])-1)), ndcg['Target size'].array)
    ax.set_yticks(list(range(1,9)))
    ax.invert_yaxis()
    ax.invert_xaxis()
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    return artists

In [103]:
def plot_metrics(data, ax, xlabel='|N_u|', ylabel='System ranking', twinx=None):
    #if twinx != '':
    #    print(twinx)
    #    del data[twinx]
    #    #ax222 = ax.twinx()
    #    #ax222.add_artist(artist)
    
    for column_title, color in zip(data.columns[1:], ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')):
        x = list(range(len(data[column_title])))
        artist = plt.Line2D(xdata=x, ydata=[data[column_title]], lw=1, color=color, marker='o')
        ax.add_artist(artist)
        
    #ax2 = ax.twinx()  # instantiate a second axes that shares the same x-axis
    #color = 'tab:blue'
    #ax2.set_ylabel('sin', color=color)  # we already handled the x-label with ax1
    #ax2.plot(t, data2, color=color)
    #ax2.tick_params(axis='y', labelcolor=color)

    max = np.array((data.max()[1:])).max()
    ax.set_ybound([0, max + max*0.1+.00001])
    ax.set_xbound([-0.1,13.06])

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.set_xticks(list(range(len(data['Target size']))))
    ax.set_xticklabels(list(itertools.chain(data['Target size'].array[:-1] , ['Full'])))
    ax.invert_xaxis()
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    
    if twinx is not None and twinx.max() > 0:
        label='Sum of p-values'
        ax2 = ax.twinx()
        ax2.plot(list(range(len(twinx))),
            twinx,
            label=label,
            lw=1,
            color='magenta',
            fillstyle='none',
            markeredgecolor='black',
            marker='s')
        ax2.set_ylabel(label)
        max = twinx.max()
        ax2.set_ylim(0, max+0.1*max+0.01)

        ax2.spines['top'].set_visible(False)
        ax2.spines['right'].set_visible(False)
        ax2.spines['bottom'].set_visible(False)
        ax2.spines['left'].set_visible(False)
        ax2.legend(loc=0)
    
    return artists

In [9]:
for split in splits:
    print(split)
    ndcg = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure3.txt',sep='\t',header=4,nrows=13)
    precision = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure3.txt',sep='\t',header=19,nrows=13)
    recall = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure3.txt',sep='\t',header=34,nrows=13)

    ndcg_rank = rank(ndcg)
    precision_rank = rank(precision)
    recall_rank = rank(recall)

    cols = ['Column {}'.format(col) for col in range(1, 2)]
    rows = ['Row {}'.format(row) for row in ['Precision@10', 'Recall@10', 'nDCG@10']]

    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(14, 8))

    #for ax, col in zip(axes[0], cols):
    #    ax.set_title(col)
    #    #ax.set_xlabel('|Nu|')

    for ax, row in zip(axes[:,0], rows):
        ax.set_ylabel(row, rotation=90, size='large')

    plot_metrics(precision, axes[0][0], ylabel='Precision@10')
    plot_metrics(recall, axes[1][0], ylabel='Recall@10')
    artists = plot_metrics(ndcg, axes[2][0], ylabel='nDCG@10')

    plot_system_rankings(precision_rank, axes[0][1])
    plot_system_rankings(recall_rank, axes[1][1])
    artists = plot_system_rankings(ndcg_rank, axes[2][1]) 

    plt.legend(artists, ndcg.columns[1:], bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='x-large')
    fig.suptitle(split)
    fig.tight_layout()
    png = os.path.join(data_dir, 'figure3.' + split +'.png')
    fig.savefig(png, format='png', dpi=300)
    #dpi=1200
    plt.gcf().tight_layout()
#plt.show()

ShuffleSplit
StratifiedShuffleSplit
StratifiedKFold
KFold
TimeSeriesSplit


In [35]:
precision['Sum of p-values']

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
Name: Sum of p-values, dtype: int64

In [104]:
for split in splits:
    print(split)
    ndcg = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure4.txt.ml1m.txt',sep='\t',header=4,nrows=13)
    precision = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure4.txt.ml1m.txt',sep='\t',header=19,nrows=13)
    recall = pd.read_table(r'C:\Projects\RecSys2020\results\\' +split+ r'\figure4.txt.ml1m.txt',sep='\t',header=34,nrows=13)

    cols = ['Column {}'.format(col) for col in range(1, 2)]
    rows = ['Row {}'.format(row) for row in ['Precision@10', 'Recall@10', 'nDCG@10']]

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(14, 8))

    #for ax, col in zip(axes[0], cols):
    #    ax.set_title(col)
    #    #ax.set_xlabel('|Nu|')

    for ax, row in zip(axes, rows):
        ax.set_ylabel(row, rotation=90, size='large')

    twinx='Sum of p-values'
    plot_metrics(precision.iloc[:,:-1], axes[0], ylabel='Precision@10', twinx=precision[twinx])
    
    plot_metrics(recall.iloc[:,:-1], axes[1], ylabel='Recall@10', twinx=recall[twinx])
    artists = plot_metrics(ndcg.iloc[:,:-1], axes[2], ylabel='nDCG@10', twinx=ndcg[twinx])

   
    plt.legend(artists, ndcg.columns[1:], bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='x-large')
    fig.suptitle(split)
    fig.tight_layout()
    png = os.path.join(data_dir, 'figure4.txt.ml1m.' + split +'.png')
    plt.gcf().tight_layout()
    fig.savefig(png, format='png', dpi=300)
    #dpi=1200
#plt.show()
plt.close('all')

ShuffleSplit
StratifiedShuffleSplit
StratifiedKFold
KFold
TimeSeriesSplit


In [79]:
twinx='Sum of p-values'
#max = np.array((precision[twinx].max()[1:])).max()
precision[twinx].max()

89.54485863610144

In [75]:
precision.iloc[:,:-1]

Unnamed: 0,Target size,Expected intersection ratio in top n,Ratio of ties,Ratio of ties at zero
0,0,0.755365,0.711731,0.047174
1,1,0.740828,0.689903,0.047187
2,2,0.724712,0.666934,0.047196
3,5,0.665961,0.596682,0.047294
4,10,0.518498,0.480311,0.047739
5,20,0.328134,0.383908,0.051421
6,50,0.162692,0.312036,0.065829
7,100,0.090152,0.288077,0.085564
8,200,0.0481,0.285885,0.113931
9,500,0.020156,0.31043,0.170257


In [88]:
plt.close('all')