In [1]:
import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import scipy.stats as stats
import seaborn as sns

pp = pprint.PrettyPrinter(indent=4)

print("Using pandas %s version" % pd.__version__)
print("Using seaborn %s version" % sns.__version__)
print("Using scipy %s version" % scipy.__version__)

Using pandas 1.2.0 version
Using seaborn 0.11.1 version
Using scipy 1.6.0 version


In [16]:
ndcg = pd.read_table(
    r"C:\Projects\RecSys2020\results\figure3.txt", sep="\t", header=4, nrows=13
)
precision = pd.read_table(
    r"C:\Projects\RecSys2020\results\figure3.txt", sep="\t", header=19, nrows=13
)
recall = pd.read_table(
    r"C:\Projects\RecSys2020\results\figure3.txt", sep="\t", header=34, nrows=13
)

In [17]:
# print(ndcg)
ndcg.head()

# print(precision)
precision.head()

# print(recall)
recall.head()

Unnamed: 0,1,0.7848709438664028,0.811072900454497,0.8028735224503573,0.7799999058337843,0.8005441220011715,0.7930742129440296,0.7452840017718366,0.6408381573274978
0,2,0.781922820727004,0.8023033621410844,0.7998954032182548,0.769758856461682,0.7814871766589947,0.771274733729588,0.7382710640617874,0.6118826377742608
1,5,0.772160524933437,0.7897981803122325,0.7914644258554876,0.74165393715674,0.7337897084828633,0.715913380005833,0.7194685880787896,0.5327169114394773
2,10,0.7548635454930462,0.7650809913379222,0.7781583822463828,0.7047366464484879,0.6687091100323038,0.6394155537901458,0.6912316939477936,0.4163976013467345
3,20,0.7361920983120515,0.7246068743045091,0.7556716100424434,0.6682109287873848,0.5692020533372218,0.5309663204086958,0.6450942169720567,0.3035835103137908
4,50,0.6909346984735689,0.6329110839153907,0.7065961644463636,0.6090594535197674,0.4107672657504233,0.3617097937335133,0.5591900268690599,0.1759348238528961


In [4]:
def rank(data):
    artists = []
    colors = ("red", "blue", "green", "magenta", "black", "purple", "grey", "orange")
    data.set_index("Target size")
    for i, j in data.groupby(["Target size"], axis=0):
        del j["Target size"]
        rank = j.rank(axis=1, ascending=False)
        artist = np.array(rank)[0]
        artists.append(artist)

    rank = []
    for i, t in enumerate(data.keys()[1:]):
        rank.append([t] + np.array(artists)[:, i].tolist())
    return rank


ndcg_rank = rank(ndcg)
precision_rank = rank(precision)
recall_rank = rank(recall)

KeyError: "None of ['Target size'] are in the columns"

In [5]:
def plot_system_rankings(data, ax, xlabel="|N_u|", ylabel=""):
    artists = []
    for row, color in zip(
        data, ("red", "blue", "green", "magenta", "black", "purple", "grey", "orange")
    ):
        x = list(range(len(data[0]) - 1))
        artists.append(
            plt.Line2D(xdata=x, ydata=[row[1:]], lw=1, color=color, marker="o")
        )

    for artist in artists:
        ax.add_artist(artist)

    ax.set_ybound([0.8, 9])
    ax.set_xbound([-0.2, 13.06])

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.set_xticks(list(range(len(ndcg_rank[0]) - 1)))
    ax.set_xticklabels(ndcg["Target size"].array)
    # plt.xticks(list(range(len(ndcg_rank[0])-1)), ndcg['Target size'].array)
    ax.set_yticks(list(range(1, 9)))
    ax.invert_yaxis()
    ax.invert_xaxis()

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    return artists


##fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex='row', constrained_layout=True)
# fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, nrows=1, constrained_layout=False)

##combine_figure(precision_rank, ax2)
##combine_figure(recall_rank, ax3)
##artists = combine_figure(ndcg_rank, ax1)
##
###plt.gca().invert_yaxis()
##
##
##plt.legend(artists, np.array(ndcg_rank)[:,0], bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='x-large')
##plt.ylabel('System ranking')
##plt.xlabel('|N_u|')
##
###fig.tight_layout(h_pad=5, w_pad=5)
##
###fig.set_constrained_layout_pads(w_pad=2/72, h_pad=2/72, hspace=0.2, wspace=0.2)
##plt.show()

In [6]:
def plot_metrics(data, ax, xlabel="|N_u|", ylabel="System ranking"):
    artists = []
    for rec, color in zip(
        data.columns[1:],
        ("red", "blue", "green", "magenta", "black", "purple", "grey", "orange"),
    ):
        x = list(range(len(data[rec])))
        artists.append(
            plt.Line2D(xdata=x, ydata=[data[rec]], lw=1, color=color, marker="o")
        )

    for artist in artists:
        ax.add_artist(artist)

    max = np.array((data.max()[1:])).max()
    ax.set_ybound([0, max + max * 0.1])
    ax.set_xbound([-0.1, 13.06])

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.set_xticks(list(range(len(data["Target size"]))))
    ax.set_xticklabels(data["Target size"].array)
    ax.invert_xaxis()

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    return artists


##fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex='row', constrained_layout=True)
# fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, nrows=1, constrained_layout=False)

##plot_system_ranking(precision, ax2, 'Precision@10')
##plot_system_ranking(recall, ax3, 'Recall@10')
##artists = plot_system_ranking(ndcg, ax1, 'nDCG@10')
##
##plt.legend(artists, ndcg.columns[1:], bbox_to_anchor=(1.05, 1), loc='upper left', title='Recommender', title_fontsize='x-large')
##plt.xlabel('|N_u|')
##
##plt.show()

In [15]:
cols = ["Column {}".format(col) for col in range(1, 2)]
rows = ["Row {}".format(row) for row in ["Precision@10", "Recall@10", "nDCG@10"]]

fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 8))

# for ax, col in zip(axes[0], cols):
#    ax.set_title(col)
#    #ax.set_xlabel('|Nu|')

for ax, row in zip(axes[:, 0], rows):
    ax.set_ylabel(row, rotation=90, size="large")

plot_metrics(precision, axes[0][0], ylabel="Precision@10")
plot_metrics(recall, axes[1][0], ylabel="Recall@10")
artists = plot_metrics(ndcg, axes[2][0], ylabel="nDCG@10")

plot_system_rankings(precision_rank, axes[0][1])
plot_system_rankings(recall_rank, axes[1][1])
artists = plot_system_rankings(ndcg_rank, axes[2][1])

plt.legend(
    artists,
    ndcg.columns[1:],
    bbox_to_anchor=(1.05, 1),
    loc="upper left",
    title="Recommender",
    title_fontsize="x-large",
)
fig.tight_layout()
plt.show()

ValueError: zero-size array to reduction operation maximum which has no identity

In [14]:
from matplotlib.transforms import offset_copy

cols = ["Column {}".format(col) for col in range(1, 2)]
rows = ["Row {}".format(row) for row in ["Precision@10", "Recall@10", "nDCG@10"]]

fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 8))
plt.setp(axes.flat, xlabel="X-label", ylabel="Y-label")

pad = 5  # in points

for ax, col in zip(axes[0], cols):
    ax.annotate(
        col,
        xy=(0.5, 1),
        xytext=(0, pad),
        xycoords="axes fraction",
        textcoords="offset points",
        size="large",
        ha="center",
        va="baseline",
    )

for ax, row in zip(axes[:, 0], rows):
    ax.annotate(
        row,
        xy=(0, 0.5),
        xytext=(-ax.yaxis.labelpad - pad, 0),
        xycoords=ax.yaxis.label,
        textcoords="offset points",
        size="large",
        ha="right",
        va="center",
    )

fig.tight_layout()
# tight_layout doesn't take these labels into account. We'll need
# to make some room. These numbers are are manually tweaked.
# You could automatically calculate them, but it's a pain.
fig.subplots_adjust(left=0.15, top=0.95)

plt.show()

In [11]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_theme(style="ticks")

# Create a dataset with many short random walks
rs = np.random.RandomState(4)
pos = rs.randint(-1, 2, (6, 13)).cumsum(axis=1)
pos -= pos[:, 0, np.newaxis]
step = np.tile(range(13), 6)
walk = np.repeat(range(6), 13)
df = pd.DataFrame(np.c_[pos.flat, step, walk], columns=["position", "step", "walk"])

# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(
    df, col="walk", hue="walk", palette="tab20c", col_wrap=2, height=1.5
)

# Draw a horizontal line to show the starting point
grid.map(plt.axhline, y=0, ls=":", c=".5")

# Draw a line plot to show the trajectory of each random walk
grid.map(plt.plot, "step", "position", marker="o")

# Adjust the tick positions and labels
grid.set(xticks=np.arange(5), yticks=[-3, 3], xlim=(-0.5, 4.5), ylim=(-3.5, 3.5))

# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
plt.show()

In [12]:
print(np.array((recall.max()[1:])).max())

ValueError: zero-size array to reduction operation maximum which has no identity

In [None]:
datasets = ["ml1m", "ml100k"]
splits = ["", "-male", "-female"]

for dataset in datasets:
    fig, axes = plt.subplots(4, 6, sharex="row")
    i = 0
    for split in splits:

        file = os.path.join(
            r"C:\Projects\RecSys2020\results", "figure101." + dataset + split + ".txt"
        )
        print(file)
        lines_with_metrics = [
            (i, linecache.getline(file, i + 1))
            for i, line in enumerate(open(file))
            if line.endswith("@10\n")
        ]
        print(lines_with_metrics)

        params = dict(filepath_or_buffer=file, sep="\t", nrows=9)
        axes[0, 0 + i].set_title(dataset + split)

        for j, metric in enumerate(lines_with_metrics):
            metric_name = metric[1]
            metric_line = metric[0]
            fig1 = pd.read_table(header=metric_line - j, **params)
            pp.pprint(metric_name, metric_line, fig1)
            fig111(data=fig1, title=metric_name, ax1=axes[j, 0 + i], ax2=axes[j, 1 + i])

        # plt.suptitle(dataset + split)
        fig.set_size_inches(22, 12)
        fig.tight_layout()
        i = i + 2
plt.show()