In [1]:
import linecache
import os
import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import scipy.stats as stats
import seaborn as sns

pp = pprint.PrettyPrinter(indent=4)

print("Using pandas %s version" % pd.__version__)
print("Using seaborn %s version" % sns.__version__)
print("Using scipy %s version" % scipy.__version__)

Using pandas 1.2.0 version
Using seaborn 0.11.1 version
Using scipy 1.6.0 version


In [71]:
imfFull = "#548235"
imfTest = "#99C979"

knnFull = "#C55A11"
knnTest = "#ff5811"
normalizedKnnFull = "#4A76C6"
normalizedKnnTest = "#19C3FF"

average = "#FFC000"
popularity = "#F18F8F"
random = "#BFBFBF"

rec_colors_dict = {
    "iMF (full)": "#548235",
    "iMF (test)": "#99C979",
    "kNN (full/test)": "#C55A11",
    "kNN (full)": "#C55A11",
    "kNN (test)": "#ff5811",
    "Normalized kNN (full)": "#4A76C6",
    "Normalized kNN (test)": "#19C3FF",
    "Average Rating": "#FFC000",
    "Popularity": "#F18F8F",
    "Random": "#BFBFBF",
}
# colors = ('red','blue','green','magenta', 'black', 'purple', 'grey', 'orange')
rec_colors = (
    imfFull,
    imfTest,
    knnFull,
    knnTest,
    normalizedKnnFull,
    normalizedKnnTest,
    average,
    popularity,
    random,
)

Unnamed: 0,Recommender,Full,Test
0,iMF (full),0.22949,0.511424
1,iMF (test),0.101401,0.528172
2,kNN (full/test),0.190464,0.510805
3,Normalized kNN (full),0.11604,0.503576
4,Normalized kNN (test),0.048652,0.518202


In [75]:
def fig111(data=None, legend=False, title="", ax1=None, ax2=None):
    pp.pprint(data)
    f1 = np.array(data)

    df1 = pd.DataFrame(data=f1.T[1:], columns=f1.T[0], index=["Full", "Test"])
    ax11 = df1.plot(
        kind="bar",
        color=rec_colors,
        legend=None,
        ax=ax1,
        ylabel=title,
    )

    linedata = np.array(
        [
            fig1.Recommender,
            data.Full.rank(ascending=False),
            data.Test.rank(ascending=False),
        ]
    ).T
    artists = []
    for row in linedata:
        pp.pprint([row[0], row[1:], color, rec_colors_dict[row[0]]])
        artists.append(
            plt.Line2D(
                xdata=[0, 1],
                ydata=row[1:],
                lw=1,
                color=rec_colors_dict[row[0]],
                marker="o",
            )
        )

    for artist in artists:
        a = ax2.add_artist(artist)

    ax2.set_ybound([0.8, len(artists) + 0.2])
    ax2.spines["top"].set_visible(False)
    ax2.spines["right"].set_visible(False)
    ax2.spines["bottom"].set_visible(False)
    ax2.spines["left"].set_visible(False)
    ax2.invert_yaxis()

    plt.ylabel("System ranking")
    if legend:
        plt.legend(
            artists,
            data.Recommender,
            bbox_to_anchor=(1.05, 1),
            loc="upper left",
            # title="Recommender",
            title_fontsize="xx-large",
        )


fig1 = pd.read_table(
    r"C:\Projects\RecSys2020\results\ShuffleSplit\figure1.txt", sep="\t", header=4
)

fig1.head()
fig, (ax1, ax2) = plt.subplots(1, 2, sharex="row")

fig111(data=fig1, legend=True, title="Precision@10", ax1=ax1, ax2=ax2)
fig.tight_layout()
plt.show()

             Recommender      Full      Test
0             iMF (full)  0.229490  0.511424
1             iMF (test)  0.101401  0.528172
2        kNN (full/test)  0.190464  0.510805
3  Normalized kNN (full)  0.116040  0.503576
4  Normalized kNN (test)  0.048652  0.518202
5         Average Rating  0.024510  0.515205
6             Popularity  0.115189  0.486745
7                 Random  0.005381  0.444354
['iMF (full)', array([1.0, 4.0], dtype=object), '#BFBFBF', '#548235']
['iMF (test)', array([5.0, 1.0], dtype=object), '#BFBFBF', '#99C979']
['kNN (full/test)', array([2.0, 5.0], dtype=object), '#BFBFBF', '#C55A11']
['Normalized kNN (full)', array([3.0, 6.0], dtype=object), '#BFBFBF', '#4A76C6']
['Normalized kNN (test)', array([6.0, 2.0], dtype=object), '#BFBFBF', '#19C3FF']
['Average Rating', array([7.0, 3.0], dtype=object), '#BFBFBF', '#FFC000']
['Popularity', array([4.0, 7.0], dtype=object), '#BFBFBF', '#F18F8F']
['Random', array([8.0, 8.0], dtype=object), '#BFBFBF', '#BFBFBF']


In [6]:
datasets = ["ml1m", "ml100k"]
splits = ["", "-male", "-female"]
for dataset, split in [(x, y) for x in datasets for y in splits]:
    file = os.path.join(
        r"C:\Projects\RecSys2020\results", "figure101." + dataset + split + ".txt"
    )
    print(file)

    fig, axes = plt.subplots(4, 2, sharex="row")

    metric = linecache.getline(file, 5)
    fig1 = pd.read_table(file, sep="\t", header=4, nrows=8)
    pp.pprint(fig1)
    fig111(data=fig1, title=metric, ax1=axes[0, 0], ax2=axes[0, 1])

    metric = linecache.getline(file, 16)
    fig1 = pd.read_table(file, sep="\t", header=14, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[1, 0], ax2=axes[1, 1])

    metric = linecache.getline(file, 27)
    fig1 = pd.read_table(file, sep="\t", header=24, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[2, 0], ax2=axes[2, 1])

    metric = linecache.getline(file, 38)
    fig1 = pd.read_table(file, sep="\t", header=34, nrows=8)
    fig111(data=fig1, title=metric, ax1=axes[3, 0], ax2=axes[3, 1])
    plt.suptitle(dataset + split)
    fig.set_size_inches(5, 12)
    fig.tight_layout()

plt.show()

C:\Projects\RecSys2020\results\figure101.ml1m.txt
             Recommender      Full      Test
0             iMF (full)  0.354255  0.659066
1             iMF (test)  0.178182  0.684209
2             kNN (full)  0.314387  0.658278
3             kNN (test)  0.311377  0.659278
4  Normalized kNN (full)  0.207798  0.642113
5  Normalized kNN (test)  0.079613  0.673354
6         Average Rating  0.043305  0.665235
7             Popularity  0.193507  0.623808


ValueError: Empty data passed with indices specified.

In [77]:
datasets = ["ml1m", "ml100k"]
splits = ["", "-male", "-female"]

for dataset in datasets:
    fig, axes = plt.subplots(4, 6, sharex="row")
    i = 0
    for split in splits:

        file = os.path.join(
            r"C:\Projects\RecSys2020\results", "figure101." + dataset + split + ".txt"
        )
        print(file)
        lines_with_metrics = [
            (i, linecache.getline(file, i + 1))
            for i, line in enumerate(open(file))
            if line.endswith("@10\n")
        ]
        print(lines_with_metrics)

        params = dict(filepath_or_buffer=file, sep="\t", nrows=9)
        axes[0, 0 + i].set_title(dataset + split)

        for j, metric in enumerate(lines_with_metrics):
            metric_name = metric[1]
            metric_line = metric[0]
            fig1 = pd.read_table(header=metric_line - j, **params)
            pp.pprint([dataset, split, metric_name, metric_line, fig1])
            fig111(data=fig1, title=metric_name, ax1=axes[j, 0 + i], ax2=axes[j, 1 + i])

        # plt.suptitle(dataset + split)
        fig.set_size_inches(22, 12)
        fig.tight_layout()
        i = i + 2
plt.show()

C:\Projects\RecSys2020\results\figure101.ml1m.txt
[(4, 'P@10\n'), (16, 'Recall@10\n'), (28, 'nDCG@10\n'), (40, 'FScore@10\n')]
[   'ml1m',
    '',
    'P@10\n',
    4,
                 Recommender      Full      Test
0             iMF (full)  0.354255  0.659066
1             iMF (test)  0.178182  0.684209
2             kNN (full)  0.314387  0.658278
3             kNN (test)  0.311377  0.659278
4  Normalized kNN (full)  0.207798  0.642113
5  Normalized kNN (test)  0.079613  0.673354
6         Average Rating  0.043305  0.665235
7             Popularity  0.193507  0.623808
8                 Random  0.010189  0.550010]
             Recommender      Full      Test
0             iMF (full)  0.354255  0.659066
1             iMF (test)  0.178182  0.684209
2             kNN (full)  0.314387  0.658278
3             kNN (test)  0.311377  0.659278
4  Normalized kNN (full)  0.207798  0.642113
5  Normalized kNN (test)  0.079613  0.673354
6         Average Rating  0.043305  0.665235
7             Pop

KeyError: 'Recall@10'

In [66]:
data = fig1
f1 = np.array(data)

df1 = pd.DataFrame(data=f1.T[1:], columns=f1.T[0], index=["Full", "Test"])

linedata = np.array(
    [fig1.Recommender, data.Full.rank(ascending=False), data.Test.rank(ascending=False)]
).T
artists = []
for row, color in zip(linedata, rec_colors):
    pp.pprint([row[0], row[1:], color, rec_colors_dict[row[0]]])

['iMF (full)', array([1.0, 3.0], dtype=object), '#548235', '#548235']
['iMF (test)', array([4.0, 1.0], dtype=object), '#99C979', '#99C979']
['kNN (full)', array([2.0, 5.0], dtype=object), '#C55A11', '#C55A11']
['kNN (test)', array([3.0, 4.0], dtype=object), '#ff5811', '#ff5811']
['Normalized kNN (full)', array([5.0, 7.0], dtype=object), '#4A76C6', '#4A76C6']
['Normalized kNN (test)', array([7.0, 2.0], dtype=object), '#19C3FF', '#19C3FF']
['Average Rating', array([8.0, 6.0], dtype=object), '#FFC000', '#FFC000']
['Popularity', array([6.0, 8.0], dtype=object), '#F18F8F', '#F18F8F']
['Random', array([9.0, 9.0], dtype=object), '#BFBFBF', '#BFBFBF']
