In [1]:
import birankpy
from birank_bias_regularization import *
from birank_reg_utils import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import scipy.sparse as spa
from networkx.algorithms import bipartite
import scipy.io
import random
from scipy import stats
%matplotlib inline

np.random.seed(420)

### Fairness metric

In [2]:
graph_configs = [
    ("Random Graph", 100, 50),
    ("Random Graph", 1000, 100),
    ("Random Graph", 10000,500),
    ("Random Graph", 100,500),
    ("Power-Law Graph", 100,50),
    ("Power-Law Graph", 1000, 100),
    ("Power-Law Graph", 10000,500),
    ("Power-Law Graph", 100,500),
]

results = []

for graph_type, num_users, num_products in graph_configs:

    if graph_type == "Random Graph":
        B, users, products = generate_random_bipartite_graph(num_users, num_products, density=0.1)
    elif graph_type == "Power-Law Graph":
        B, users, products = generate_power_law_bipartite_graph(num_users, num_products, user_exponent=1.8)
    else:
        raise ValueError("Unknown graph type")

    # Create an empty adjacency matrix
    adj_matrix = spa.dok_matrix((len(users), len(products)), dtype=np.int8)
    
    # Populate the adjacency matrix based on the edges in B_power_law
    for u, p in B.edges():
        if u in users and p in products:
            u_idx = users.index(u)
            p_idx = products.index(p)
            adj_matrix[u_idx, p_idx] = 1  # Assuming all reviews are weighted equally
    
    # Convert to a CSR (Compressed Sparse Row) matrix
    adj_matrix_csr = adj_matrix.tocsr()
    
    user_scores, product_scores = birankpy.birank(adj_matrix_csr, normalizer='BiRank',alpha=0.85, beta=0.85, max_iter=100, tol=1e-4)
    
    user_scores_reg, product_scores_reg = birank_with_regularization(
        adj_matrix_csr,
        normalizer='BiRank',
        gamma =0.000000001,
        alpha=0.85,
        beta=0.85,
        max_iter=1000,
        tol=1e-4
    )

    total_degree = sum(B.degree(node) for node in users)
    
    relative_user_activity = {node: B.degree(node) / total_degree for node in users}
    
    relative_user_activity_array = np.array([relative_user_activity[node] for node in users])
    
    high_activity_avg_rank, low_activity_avg_rank = calculate_disparity(user_scores, relative_user_activity_array, 0.01)
    gini_coefficient_birank = calculate_gini_coefficient(product_scores)
    
    metric_score_birank = fairness_metric(high_activity_avg_rank, low_activity_avg_rank, gini_coefficient_birank)
    
    high_activity_avg_rank_reg, low_activity_avg_rank_reg = calculate_disparity(user_scores_reg, relative_user_activity_array, 0.01)
    gini_coefficient_reg = calculate_gini_coefficient(product_scores_reg)
    
    metric_score_reg = fairness_metric(high_activity_avg_rank_reg, low_activity_avg_rank_reg, gini_coefficient_reg)
    
    results.append((f"{graph_type} ({num_users},{num_products})", metric_score_birank, metric_score_reg))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [4]:
df_results = pd.DataFrame(results, columns=["Graph Type / (#Users, #Products)", "BiRank", "BiRank with Reg"])

latex_table = df_results.to_latex(index=False, caption="Fairness Metrics for BiRank and BiRank with Regularization", label="tab:fairness_metrics")

  latex_table = df_results.to_latex(index=False, caption="Fairness Metrics for BiRank and BiRank with Regularization", label="tab:fairness_metrics")
