# Fair Analysis

Analysis about the fair properties of various recommendation systems.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")

In [None]:
# Declarations.
def plot_scores_paper(dataset):
    for sf in scores_files:
        df = pd.read_csv(dataset + "/" + sf)
        name = sf.split(".")[0].split("_")[1]
        # plt.title(dataset, fontsize=20)
        plt.plot(df["Rounds"], df["Scores"], label=name, linestyle=':', marker="*", linewidth=3, markersize=12)
        plt.ylabel('Scores', fontsize=20)
        plt.xlabel('Rounds', fontsize=20)
        plt.xticks(fontsize=18)
        plt.yticks(fontsize=18)
        plt.legend(bbox_to_anchor=(0.5, -0.5), loc='lower center', ncol=5, fontsize= 18, frameon=False)

def plot_unique_targets_paper(dataset):
    unique_targets = dict()
    rounds = 11

    for sf in scores_files:
        df = pd.read_csv(f"{dataset}/" + sf)
        unique_targets[sf] = set()
        for r in range(1, rounds):
            edges = eval(df["Edges"][r])
            unique_targets[sf] = unique_targets[sf].union(set([j for i,j in edges]))
        unique_targets[sf] = list(unique_targets[sf])

    unique_targets_num = dict()
    for sf in scores_files:
        name = sf.split(".")[0].split("_")[1]
        unique_targets_num[name] = len(unique_targets[sf])

    b = pd.DataFrame(unique_targets_num.items())
    g = sns.barplot(x=b[1], y=b[0])
    # plt.title(f"{dataset}", fontsize=20)
    # plt.ylabel("Policy", fontsize=20)
    plt.xlabel("#Unique Targets", fontsize=20)
    plt.xticks(fontsize=18)
    plt.yticks(ticks=None, fontsize=20)
    plt.legend(bbox_to_anchor=(0.5, -0.5), loc='lower center', ncol=5, fontsize= 18, frameon=False)
    # print(unique_targets_num)


## Simple recommenders

Evaluate simple recommenders as to fairness.

In [None]:
scores_files = ["sc_adamic_adar.csv", "sc_jaccard_coefficient.csv", "sc_resource_allocation.csv",
                "sc_preferential_attachment.csv", "sc_node2vec.csv"]

In [None]:
plot_scores_paper("books")

In [None]:
plot_scores_paper("blogs")

In [None]:
plot_scores_paper("dblp_course")

In [None]:
plot_scores_paper("twitter")

## Fair recommenders

Evaluate fair recommenders as to fairness. Compare them with benchmarks like node2vec and random policies.

### Network's fairness evolution through rounds.

In [None]:
scores_files = ["sc_fair.csv", "sc_dyadic_fair.csv", "sc_node2vec.csv", "sc_hybrid_node2vec.csv"]

In [None]:
plot_scores_paper("books")

In [None]:
plot_scores_paper("blogs")

In [None]:
plot_scores_paper("dblp_course")

In [None]:
plot_scores_paper("twitter")

### Personalized red pagerank distribution evolution.

In [None]:
personalized_files = ["sc_personalized_dyadic_fair.csv_round_", "sc_personalized_fair.csv_round_",
                      "sc_personalized_node2vec.csv_round_", "sc_personalized_fairwalk.csv_round_",
                      "sc_personalized_hybrid_node2vec.csv_round_"] 

In [None]:
def plot_personalized_dist(dataset, algorithm):
    distribution_distance = list()
    file_prefix = f"sc_personalized_{algorithm}.csv_round_"
    try:
        cat = pd.read_csv(f"{dataset}/out_community.txt", sep=" ", names=["Node", "Cat"], header=0)
    except:
        cat = pd.read_csv(f"{dataset}/out_community.txt", sep="\t", names=["Node", "Cat"], header=0)
    df = pd.read_csv(dataset + "/" + file_prefix + "-1.csv", sep="\t")
    fair_ratio = pd.read_csv("books/sc_personalized_fairwalk.csv_round_-1.csv", sep="\t")["Red Personalized Pagerank"].mean()
    df = df.merge(cat, how="inner", on="Node")
    df["round"] = -1
    fair_ratio = pd.read_csv("books/sc_personalized_fairwalk.csv_round_-1.csv", sep="\t")["Red Personalized Pagerank"].mean()
    temp_red = df.loc[df["Cat"] == 1]["Red Personalized Pagerank"]
    temp_blue = df.loc[df["Cat"] == 0]["Red Personalized Pagerank"]
    distribution_distance.append(wasserstein_distance(temp_red, temp_blue))
    for round in range(10):
        temp_df = pd.read_csv(dataset + "/" + file_prefix + f"{round}.csv", sep="\t")
        temp_df = temp_df.merge(cat, how="inner", on="Node")
        temp_red = temp_df.loc[temp_df["Cat"] == 1]["Red Personalized Pagerank"]
        temp_blue = temp_df.loc[temp_df["Cat"] == 0]["Red Personalized Pagerank"]
        distribution_distance.append(wasserstein_distance(temp_red, temp_blue))
        temp_df["round"] = round
        df = df.append(temp_df)
    fig = plt.figure(figsize=(15, 8))
    fig.suptitle(f"Dataset: {dataset} | Algorithm: {algorithm}")
    sns.violinplot(x="round", y="Red Personalized Pagerank", hue="Cat", data=df, split=True)
    
    return distribution_distance

In [None]:
dataset="books"
dist_dist = dict()
dist_dist["node2vec"] = plot_personalized_dist(dataset, "node2vec")
dist_dist["hybrid_node2vec"] = plot_personalized_dist(dataset, "hybrid_node2vec")
dist_dist["fair"] = plot_personalized_dist(dataset, "fair")
dist_dist["dyadic_fair"] = plot_personalized_dist(dataset, "dyadic_fair")
dist_dist = pd.DataFrame.from_dict(dist_dist)

In [None]:
sns.lineplot(data = dist_dist)

In [None]:
dataset="blogs"
dist_dist = dict()
dist_dist["node2vec"] = plot_personalized_dist(dataset, "node2vec")
dist_dist["hybrid_node2vec"] = plot_personalized_dist(dataset, "hybrid_node2vec")
dist_dist["fair"] = plot_personalized_dist(dataset, "fair")
dist_dist["dyadic_fair"] = plot_personalized_dist(dataset, "dyadic_fair")
dist_dist = pd.DataFrame.from_dict(dist_dist)

In [None]:
sns.lineplot(data = dist_dist)

In [None]:
dataset="twitter"
dist_dist = dict()
dist_dist["node2vec"] = plot_personalized_dist(dataset, "node2vec")
dist_dist["hybrid_node2vec"] = plot_personalized_dist(dataset, "hybrid_node2vec")
dist_dist["fair"] = plot_personalized_dist(dataset, "fair")
dist_dist["dyadic_fair"] = plot_personalized_dist(dataset, "dyadic_fair")
dist_dist = pd.DataFrame.from_dict(dist_dist)

In [None]:
sns.lineplot(data = dist_dist)

In [None]:
dataset="dblp_course"
dist_dist = dict()
dist_dist["node2vec"] = plot_personalized_dist(dataset, "node2vec")
dist_dist["hybrid_node2vec"] = plot_personalized_dist(dataset, "hybrid_node2vec")
dist_dist["fair"] = plot_personalized_dist(dataset, "fair")
dist_dist["dyadic_fair"] = plot_personalized_dist(dataset, "dyadic_fair")
dist_dist = pd.DataFrame.from_dict(dist_dist)

In [None]:
sns.lineplot(data = dist_dist)

### Pagerank evolution on ranking through rounds.