In [1]:
import itertools
import linecache
import os
import pprint
from itertools import chain

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import scipy.stats as stats
import seaborn as sns

pp = pprint.PrettyPrinter(indent=4)

print("Using pandas %s version" % pd.__version__)
print("Using seaborn %s version" % sns.__version__)
print("Using scipy %s version" % scipy.__version__)

Using pandas 1.2.0 version
Using seaborn 0.11.1 version
Using scipy 1.6.0 version


In [5]:
imfFull = "#548235"
imfTest = "#99C979"

knnFull = "#C55A11"
knnTest = "#ff5811"
normalizedKnnFull = "#4A76C6"
normalizedKnnTest = "#19C3FF"

average = "#FFC000"
popularity = "#F18F8F"
random = "#BFBFBF"

rec_colors_dict = {
    "iMF (full)": "#548235",
    "iMF (test)": "#99C979",
    "kNN (full/test)": "#C55A11",
    "kNN (full)": "#C55A11",
    "kNN (test)": "#ff5811",
    "Normalized kNN (full)": "#4A76C6",
    "Normalized kNN (test)": "#19C3FF",
    "Average Rating": "#FFC000",
    "Popularity": "#F18F8F",
    "Random": "#BFBFBF",
}
rec_colors = (
    imfFull,
    imfTest,
    knnFull,
    knnTest,
    normalizedKnnFull,
    normalizedKnnTest,
    average,
    popularity,
    random,
)


In [13]:
def plot_system_rankings(data, ax):
    ranks = data.iloc[:, 1:].rank(axis=1, ascending=False)
    ranks.insert(0, "Target size", data["Target size"])

    artists = []
    for rec in ranks.columns[1:]:
        x = list(range(len(ranks[rec])))
        artist = plt.Line2D(
            xdata=x, ydata=[ranks[rec]], lw=1, color=rec_colors_dict[rec], marker="o"
        )
        artists.append(artist)
        ax.add_artist(artist)

    ax.set_ybound([0.2, len(artists) + 0.5])
    ax.set_xbound([-0.2, len(ranks["Target size"]) + 0.06])

    hide_spines(ax)

    ax.set_xticks(list(range(len(data["Target size"]))))
    ax.set_xticklabels(ranks["Target size"].values)
    ax.set_xticklabels(
        list(itertools.chain(ranks["Target size"].values[:-1], ["Full"]))
    )

    ax.set_yticks(list(range(1, len(ranks.keys()))))
    ax.invert_yaxis()

    # ax.set_ylabel(ylabel)

    for tick in ax.get_xticklabels():
        tick.set_rotation(90)

    return artists


def plot_metrics(data, ax):
    artists = []
    for rec in data.columns[1:]:
        x = list(range(len(data[rec])))
        artist = plt.Line2D(
            xdata=x, ydata=[data[rec]], lw=1, color=rec_colors_dict[rec], marker="o"
        )
        artists.append(artist)
        ax.add_artist(artist)

    max_value = data.iloc[:, 1:].max().max()
    ax.set_ybound([0, max_value * 1.1])
    # ax.set_xbound([-0.1, len(data['Target size']) + 0.06])

    hide_spines(ax)

    ax.set_xticks(list(range(len(data["Target size"]))))
    # ax.set_xticklabels(data["Target size"])

    # ax.set_ylabel(ylabel)

    for tick in ax.get_xticklabels():
        tick.set_rotation(90)

    return artists


def hide_spines(ax):
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)


def figure303(data, axes):
    precision = data["P@10"]
    recall = data["Recall@10"]
    ndcg = data["nDCG@10"]
    fscore = data["FScore@10"]

    plot_metrics(precision, axes[0][0])
    plot_metrics(recall, axes[1][0])
    plot_metrics(ndcg, axes[2][0])
    artists = plot_metrics(fscore, axes[3][0])
    axes[3][0].set_xlabel("$|N_{u}|$")

    plot_system_rankings(precision, axes[0][1])
    plot_system_rankings(recall, axes[1][1])
    plot_system_rankings(ndcg, axes[2][1])
    artists = plot_system_rankings(fscore, axes[3][1])
    axes[3][1].set_xlabel("$|N_{u}|$")

    return artists


def get_figure3_data(dataset, split):
    file = os.path.join(
        r"C:\Projects\RecSys2020\results_\\", "figure303." + dataset + split + ".txt"
    )
    print(file)
    lines_with_metrics = [
        (i, linecache.getline(file, i + 1).strip())
        for i, line in enumerate(open(file))
        if line.endswith("@10\n")
    ]
    print(lines_with_metrics)
    rows = lines_with_metrics[2][0] - lines_with_metrics[1][0] - 3
    params = dict(filepath_or_buffer=file, sep="\t", nrows=rows)
    data = dict()
    for j, metric in enumerate(lines_with_metrics):
        metric_name = metric[1]
        metric_line = metric[0]
        data[metric_name] = pd.read_table(header=metric_line - j, **params)
    return data


def figure303_final(splits, size, legend=True):
    for dataset in datasets:
        columns = 2 * len(splits)
        nrows = len(metrics)
        fig, axes = plt.subplots(nrows, columns, sharex="row")
        [x.invert_xaxis() for x in axes[:, 0]]

        [y.set_ylabel(title) for (title, y) in zip(metrics, axes[:, 0])]

        i = 0
        for split in splits:
            blank_space = "                                  "
            axes[0, 0 + i].set_title(blank_space + dataset + split)

            data = get_figure3_data(dataset, split)

            split_axes = axes[:, i : i + 2]
            artists = figure303(data, split_axes)
            i = i + 2

        if legend:
            plt.legend(
                artists,
                data["nDCG@10"].columns[1:],
                bbox_to_anchor=(1.05, 1),
                loc="upper left",
                title="Recommender",
                title_fontsize="x-large",
            )

        fig.set_size_inches(size)
        fig.tight_layout()
        png = os.path.join(
            r"C:\Projects\RecSys2020\results_\_mbd",
            "figure3." + dataset + "".join(splits) + ".png",
        )
        fig.savefig(png, format="png", dpi=300, transparent=False)


metrics = ["Precision@10", "Recall@10", "nDCG@10", "FScore@10"]
datasets = ["ml1m", "ml100k"]
splits_full = ["", "-male", "-female", "-young", "-old"]
splits_gender = ["", "-male", "-female"]
splits_age = ["", "-young", "-old"]

small_size = (15, 8)
big_size = (42, 12)
normal_size = (22, 8)
a4_size = (11, 8)
a3_size = (16, 11)

# figure303_final([""], small_size)
# figure303_final(["-male"], small_size)
# figure303_final(["-female"], small_size)
# figure303_final(["-young"], small_size)
# figure303_final(["-old"], small_size)
# figure303_final(splits_full, big_size)
figure303_final(splits_gender, a3_size, legend=False)
figure303_final(splits_age, a3_size, legend=False)
plt.show()

C:\Projects\RecSys2020\results_\\figure303.ml1m.txt
[(4, 'P@10'), (20, 'Recall@10'), (36, 'nDCG@10'), (52, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml1m-male.txt
[(4, 'P@10'), (20, 'Recall@10'), (36, 'nDCG@10'), (52, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml1m-female.txt
[(4, 'P@10'), (20, 'Recall@10'), (36, 'nDCG@10'), (52, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml100k.txt
[(4, 'P@10'), (19, 'Recall@10'), (34, 'nDCG@10'), (49, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml100k-male.txt
[(4, 'P@10'), (19, 'Recall@10'), (34, 'nDCG@10'), (49, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml100k-female.txt
[(4, 'P@10'), (19, 'Recall@10'), (34, 'nDCG@10'), (49, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml1m.txt
[(4, 'P@10'), (20, 'Recall@10'), (36, 'nDCG@10'), (52, 'FScore@10')]
C:\Projects\RecSys2020\results_\\figure303.ml1m-young.txt
[(4, 'P@10'), (20, 'Recall@10'), (36, 'nDCG@10'), (52, 'FScore@10

In [None]:
print("empty")