In [1]:
import linecache
import os
import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import scipy.stats as stats
import seaborn as sns

pp = pprint.PrettyPrinter(indent=4)

print("Using pandas %s version" % pd.__version__)
print("Using seaborn %s version" % sns.__version__)
print("Using scipy %s version" % scipy.__version__)

Using pandas 1.2.0 version
Using seaborn 0.11.1 version
Using scipy 1.6.0 version


In [2]:
imfFull = "#548235"
imfTest = "#99C979"

knnFullTest = "#C55A11"
normalizedKnnFull = "#4A76C6"
normalizedKnnTest = "#19C3FF"

average = "#FFC000"
popularity = "#F18F8F"
random = "#BFBFBF"

# colors = ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')
rec_colors = (
    imfFull,
    imfTest,
    knnFullTest,
    normalizedKnnFull,
    normalizedKnnTest,
    average,
    popularity,
    random,
)

In [3]:
fig1 = pd.read_table(
    r"C:\Projects\RecSys2020\results\ShuffleSplit\figure1.txt", sep="\t", header=4
)

fig1.head()

Unnamed: 0,Recommender,Full,Test
0,iMF (full),0.22949,0.511424
1,iMF (test),0.101401,0.528172
2,kNN (full/test),0.190464,0.510805
3,Normalized kNN (full),0.11604,0.503576
4,Normalized kNN (test),0.048652,0.518202


In [5]:
def fig111(data=None, legend=False, title="", ax1=None, ax2=None):
    f1 = np.array(data)

    df1 = pd.DataFrame(data=f1.T[1:], columns=f1.T[0], index=["Full", "Test"])
    ax11 = df1.plot(
        kind="bar",
        color=rec_colors,
        legend=None,
        ax=ax1,
        ylabel=title,
    )

    linedata = np.array(
        [data.Full.rank(ascending=False), data.Test.rank(ascending=False)]
    ).T
    artists = []
    for row, color in zip(linedata, rec_colors):
        artists.append(
            plt.Line2D(xdata=[0, 1], ydata=row, lw=1, color=color, marker="o")
        )

    for artist in artists:
        a = ax2.add_artist(artist)

    ax2.set_ybound([0.8, 8.2])
    ax2.spines["top"].set_visible(False)
    ax2.spines["right"].set_visible(False)
    ax2.spines["bottom"].set_visible(False)
    ax2.spines["left"].set_visible(False)
    ax2.invert_yaxis()

    plt.ylabel("System ranking")
    if legend:
        plt.legend(
            artists,
            data.Recommender,
            bbox_to_anchor=(1.05, 1),
            loc="upper left",
            # title="Recommender",
            title_fontsize="xx-large",
        )


fig, (ax1, ax2) = plt.subplots(1, 2, sharex="row")

fig111(data=fig1, legend=True, title="Precision@10", ax1=ax1, ax2=ax2)
fig.tight_layout()
plt.show()

In [13]:
datasets = ["ml1m", "ml100k"]
splits = ["", "-male", "-female"]
for dataset, split in [(x, y) for x in datasets for y in splits]:
    file = os.path.join(
        r"C:\Projects\RecSys2020\results\\", "figure101." + dataset + split + ".txt"
    )
    print(file)

    fig, axes = plt.subplots(4, 2, sharex="row")

    metric = linecache.getline(file, 5)
    fig1 = pd.read_table(file, sep="\t", header=4, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[0, 0], ax2=axes[0, 1])

    metric = linecache.getline(file, 16)
    fig1 = pd.read_table(file, sep="\t", header=14, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[1, 0], ax2=axes[1, 1])

    metric = linecache.getline(file, 27)
    fig1 = pd.read_table(file, sep="\t", header=24, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[2, 0], ax2=axes[2, 1])

    metric = linecache.getline(file, 38)
    fig1 = pd.read_table(file, sep="\t", header=34, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[3, 0], ax2=axes[3, 1])
    plt.suptitle(dataset + split)
    fig.set_size_inches(5, 12)
    fig.tight_layout()

plt.show()

C:\Projects\RecSys2020\results\\figure101.ml1m.txt
C:\Projects\RecSys2020\results\\figure101.ml1m-male.txt
C:\Projects\RecSys2020\results\\figure101.ml1m-female.txt
C:\Projects\RecSys2020\results\\figure101.ml100k.txt
C:\Projects\RecSys2020\results\\figure101.ml100k-male.txt
C:\Projects\RecSys2020\results\\figure101.ml100k-female.txt


In [None]:
datasets = ["ml1m", "ml100k"]
splits = ["", "-male", "-female"]

for dataset in datasets:
    fig, axes = plt.subplots(4, 6, sharex="row")
    i = 0
    for split in splits:

        file = os.path.join(
            r"C:\Projects\RecSys2020\results\\", "figure101." + dataset + split + ".txt"
        )
        print(file)

        axes[0, 0 + i].set_title(dataset + split)

        metric = linecache.getline(file, 5)
        fig1 = pd.read_table(file, sep="\t", header=4, nrows=8)
        fig111(data=fig1, title=metric, ax1=axes[0, 0 + i], ax2=axes[0, 1 + i])

        metric = linecache.getline(file, 16)
        fig1 = pd.read_table(file, sep="\t", header=14, nrows=8)
        fig111(data=fig1, title=metric, ax1=axes[1, 0 + i], ax2=axes[1, 1 + i])

        metric = linecache.getline(file, 27)
        fig1 = pd.read_table(file, sep="\t", header=24, nrows=8)
        fig111(data=fig1, title=metric, ax1=axes[2, 0 + i], ax2=axes[2, 1 + i])

        metric = linecache.getline(file, 38)
        fig1 = pd.read_table(file, sep="\t", header=34, nrows=8)
        fig111(data=fig1, title=metric, ax1=axes[3, 0 + i], ax2=axes[3, 1 + i])
        # plt.suptitle(dataset + split)
        fig.set_size_inches(12, 12)
        fig.tight_layout()
        i = i + 2
plt.show()

C:\Projects\RecSys2020\results\\figure101.ml1m.txt
C:\Projects\RecSys2020\results\\figure101.ml1m-male.txt
C:\Projects\RecSys2020\results\\figure101.ml1m-female.txt
C:\Projects\RecSys2020\results\\figure101.ml100k.txt
C:\Projects\RecSys2020\results\\figure101.ml100k-male.txt
C:\Projects\RecSys2020\results\\figure101.ml100k-female.txt
