# US News World Report Fairness
## Equity of Attention

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

In [3]:
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout, to_agraph
import pygraphviz as pgv

from IPython.display import Image
def draw(A):
    return Image(A.draw(format='png', prog='dot'))

In [4]:
import sys
from pathlib import Path
home = str(Path.home())

sys.path.insert(0,"%s/rankability_toolbox_dev"%home)
sys.path.insert(0,"%s/RPLib"%home)
sys.path.insert(0,"%s/fairness_analysis"%home)

In [5]:
import pyrankability

In [6]:
import pyrplib

In [7]:
import equity_of_attention.common

## Problem statement
Consider that you have three years of US news and world report rankings. You would like to be achieve individual fairness as laid out in the equity of attention paper. 

You want to present this improved and more fair approach to students and parents who have their own ranking according to the columns of interest to them.

The equity of fairness can take a "query" ranking from the student or parent and then return a more fair ranking, but which query ranking should we supply? We've already seen that there are multiple optimal solutions.

I am going to show how the equity of attention gives different answers for the nearest and farthest from the centroid.

In [8]:
years = [2002,2003,2004]
years

[2002, 2003, 2004]

In [9]:
data = {}
index = None
for year in years:
    data[year] = pd.read_excel('data/USNews liberal arts 2002-2016 (1).xls',sheet_name=str(year))
    data[year]['School Name'] = data[year]['School Name'].str.replace('!','')
    if 'State' in data[year].columns:
        data[year]['State'] = data[year]['State'].str.replace('\(','').str.replace('\)','')
    df = pd.DataFrame(list(data[year]['SAT/ACT 25th-75th Percentile'].str.split('-')),columns=['SAT/ACT 25th Percentile','SAT/ACT 75th Percentile'])
    data[year] = pd.concat([data[year],df],axis=1)
    data[year] = data[year].infer_objects()
    data[year]['SAT/ACT 25th-75th Percentile Mean'] = (data[year]['SAT/ACT 25th Percentile'].astype(int)+data[year]['SAT/ACT 75th Percentile'].astype(int))/2
    data[year] = data[year].set_index('School Name')
    if index is None:
        index = set(data[year].index)
    else:
        index = index.intersection(data[year].index)
colleges = list(index)
for year in years:
    data[year] = data[year].loc[colleges]

  import sys


### Fix the order of the years so that we can create a relevance matrix

$(|colleges| - rank+1)/|colleges|$

In [10]:
series_item_relevances = []
for year in years:
    order = np.argsort(data[year]['Final Rank'].values)
    data[year]['Final Rank Fixed'] = 0
    data[year].loc[:,'Final Rank Fixed'].iloc[order] = np.arange(1,len(order)+1)
    rank = np.array(list(data[year]['Final Rank Fixed'].values))
    ix = np.where(rank == 1)[0]
    print(data[year].index[ix])
    series_item_relevances.append((len(colleges) - rank+1)/len(colleges))

Index(['Amherst College'], dtype='object', name='School Name')
Index(['Amherst College'], dtype='object', name='School Name')
Index(['Williams College'], dtype='object', name='School Name')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [11]:
series_item_relevances_df = pd.DataFrame(series_item_relevances,columns=colleges,index=years).T.dropna()
series_item_relevances_df

Unnamed: 0,2002,2003,2004
Skidmore College,0.18,0.16,0.14
Wabash College,0.04,0.1,0.04
Amherst College,1.0,1.0,0.98
Claremont McKenna College,0.64,0.76,0.76
Sarah Lawrence College,0.22,0.04,0.06
Dickinson College,0.14,0.14,0.18
Occidental College,0.1,0.06,0.2
Franklin and Marshall College,0.3,0.3,0.26
Union College,0.28,0.26,0.28
Lafayette College,0.36,0.36,0.4


I want to see if any team moves outside the top 10

In [12]:
n = 10
(-1*series_item_relevances_df.iloc[:,0]).sort_values()[:n].index

Index(['Amherst College', 'Swarthmore College', 'Williams College',
       'Wellesley College', 'Haverford College', 'Pomona College',
       'Carleton College', 'Bowdoin College', 'Middlebury College',
       'Davidson College'],
      dtype='object')

In [13]:
(-1*series_item_relevances_df.iloc[:,1]).sort_values()[:n].index

Index(['Amherst College', 'Williams College', 'Swarthmore College',
       'Wellesley College', 'Carleton College', 'Pomona College',
       'Bowdoin College', 'Middlebury College', 'Davidson College',
       'Haverford College'],
      dtype='object')

In [14]:
(-1*series_item_relevances_df.iloc[:,2]).sort_values()[:n].index

Index(['Williams College', 'Amherst College', 'Swarthmore College',
       'Wellesley College', 'Carleton College', 'Pomona College',
       'Middlebury College', 'Davidson College', 'Haverford College',
       'Bowdoin College'],
      dtype='object')

In [15]:
s1 = set((-1*series_item_relevances_df.iloc[:,0]).sort_values()[:n].index)
s2 = set((-1*series_item_relevances_df.iloc[:,1]).sort_values()[:n].index)
s3 = set((-1*series_item_relevances_df.iloc[:,2]).sort_values()[:n].index)
len(s1.intersection(s2))#.intersection(s3))

10

In [16]:
len(s1.union(s2).union(s3))

10

In [17]:
def solutions2rankings(solutions,colleges):
    rankings = []
    for i in range(len(solutions)):
        rankings.append(pd.DataFrame(solutions[i],index=colleges).apply(lambda x: np.where(x == 1)[0][0],axis=1))
    return rankings

In [18]:
series_item_relevances[0]

array([0.18, 0.04, 1.  , 0.64, 0.22, 0.14, 0.1 , 0.3 , 0.28, 0.36, 0.06,
       0.56, 0.72, 0.88, 0.74, 0.68, 0.98, 0.54, 0.86, 0.7 , 0.16, 0.08,
       0.58, 0.02, 0.76, 0.82, 0.24, 0.78, 0.92, 0.4 , 0.32, 0.62, 0.26,
       0.48, 0.12, 0.46, 0.5 , 0.44, 0.34, 0.84, 0.42, 0.9 , 0.2 , 0.8 ,
       0.52, 0.66, 0.96, 0.38, 0.94, 0.6 ])

## Sanity check

I rank equity of fairness one iteration on each relevance matrix. For this, we should get back our exact ranking, and we do.

In [19]:
single_solutions = []
single_baseline_unfairness = []
single_unfairness = []
single_rankings = []
for i in range(len(series_item_relevances)):
    solutions, baseline_unfairness, unfairness = equity_of_attention.common.model_3([series_item_relevances[i]],item_names=colleges)
    single_solutions.append(solutions)
    single_baseline_unfairness.append(baseline_unfairness)
    single_unfairness.append(unfairness)
    single_rankings.append(solutions2rankings(solutions,colleges))

Using license file /home/jupyter-pander14/gurobi.lic
Academic license - for non-commercial use only - expires 2021-04-12
Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (linux64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 101 rows, 2500 columns and 5500 nonzeros
Model fingerprint: 0xee9159d8
Variable types: 0 continuous, 2500 integer (2500 binary)
Coefficient statistics:
  Matrix range     [4e-03, 1e+00]
  Objective range  [3e-03, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
Presolve removed 0 rows and 64 columns
Presolve time: 0.01s
Presolved: 101 rows, 2436 columns, 5298 nonzeros
Variable types: 0 continuous, 2436 integer (2436 binary)
Found heuristic solution: objective 24.5009766

Root relaxation: cutoff, 332 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    

In [20]:
for i in range(len(series_item_relevances)):
    rankings = solutions2rankings(single_solutions[i],colleges)
    display(rankings[0].sort_values())

Amherst College                   0
Swarthmore College                1
Williams College                  2
Wellesley College                 3
Haverford College                 4
Pomona College                    5
Carleton College                  6
Bowdoin College                   7
Middlebury College                8
Davidson College                  9
Occidental College               10
Dickinson College                11
Sarah Lawrence College           12
Claremont McKenna College        13
Wabash College                   14
Skidmore College                 15
Hamilton College                 16
College of the Holy Cross        17
Bryn Mawr College                18
Trinity College                  19
Grinnell College                 20
DePauw University                21
Colorado College                 22
Franklin and Marshall College    23
Kenyon College                   24
University of the South          25
Macalester College               26
Barnard College             

Amherst College                   0
Williams College                  1
Swarthmore College                2
Wellesley College                 3
Carleton College                  4
Pomona College                    5
Bowdoin College                   6
Middlebury College                7
Davidson College                  8
Haverford College                 9
Occidental College               10
Dickinson College                11
Sarah Lawrence College           12
Claremont McKenna College        13
Wabash College                   14
Skidmore College                 15
Hamilton College                 16
College of the Holy Cross        17
Bryn Mawr College                18
Trinity College                  19
Grinnell College                 20
DePauw University                21
Colorado College                 22
Kenyon College                   23
University of the South          24
Macalester College               25
Barnard College                  26
Rhodes College              

Williams College                  0
Amherst College                   1
Swarthmore College                2
Wellesley College                 3
Carleton College                  4
Pomona College                    5
Middlebury College                6
Davidson College                  7
Haverford College                 8
Bowdoin College                   9
Occidental College               10
Dickinson College                11
Sarah Lawrence College           12
Claremont McKenna College        13
Wabash College                   14
Skidmore College                 15
Hamilton College                 16
College of the Holy Cross        17
Bryn Mawr College                18
Trinity College                  19
Grinnell College                 20
DePauw University                21
Colorado College                 22
Kenyon College                   23
University of the South          24
Macalester College               25
Barnard College                  26
Rhodes College              

### Now let's run equity of attention on all three in a series

I am not doing any repeats of the same year.

In [21]:
solutions, baseline_unfairness, unfairness = equity_of_attention.common.model_3(series_item_relevances)

Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (linux64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 101 rows, 2500 columns and 5500 nonzeros
Model fingerprint: 0xee9159d8
Variable types: 0 continuous, 2500 integer (2500 binary)
Coefficient statistics:
  Matrix range     [4e-03, 1e+00]
  Objective range  [3e-03, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
Presolve removed 0 rows and 64 columns
Presolve time: 0.02s
Presolved: 101 rows, 2436 columns, 5298 nonzeros
Variable types: 0 continuous, 2436 integer (2436 binary)
Found heuristic solution: objective 24.5009766

Root relaxation: cutoff, 332 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0     cutoff    0        24.50098   24.50098  0.00%     -    0s

Explored 0 nodes (332 simplex iterations) in 0.04 seconds
Thr

In [22]:
rankings = solutions2rankings(solutions,colleges)

In [23]:
k = 20
top_k = []
for i in range(len(rankings)):
    top_k.append(list(rankings[i].sort_values()[:k].index))
top_k_df = pd.DataFrame(top_k,index=years).T
top_k_df

Unnamed: 0,2002,2003,2004
0,Amherst College,Amherst College,Williams College
1,Swarthmore College,Williams College,Amherst College
2,Williams College,Swarthmore College,Swarthmore College
3,Wellesley College,Wellesley College,Wellesley College
4,Haverford College,Carleton College,Carleton College
5,Pomona College,Pomona College,Pomona College
6,Carleton College,Bowdoin College,Middlebury College
7,Bowdoin College,Middlebury College,Davidson College
8,Middlebury College,Davidson College,Haverford College
9,Davidson College,Haverford College,Bowdoin College


In [24]:
(-1*series_item_relevances_df.iloc[:,2]).sort_values().index

Index(['Williams College', 'Amherst College', 'Swarthmore College',
       'Wellesley College', 'Carleton College', 'Pomona College',
       'Middlebury College', 'Davidson College', 'Haverford College',
       'Bowdoin College', 'Wesleyan University',
       'Washington and Lee University', 'Claremont McKenna College',
       'Vassar College', 'Grinnell College', 'Smith College',
       'Colgate University', 'Bryn Mawr College', 'Colby College',
       'Harvey Mudd College', 'Hamilton College', 'Trinity College',
       'Bates College', 'Oberlin College', 'Mount Holyoke College',
       'Macalester College', 'Bucknell University',
       'College of the Holy Cross', 'Colorado College', 'Kenyon College',
       'Lafayette College', 'Bard College', 'University of the South',
       'Connecticut College', 'Whitman College', 'Scripps College',
       'Union College', 'Franklin and Marshall College', 'Barnard College',
       'DePauw University', 'Occidental College', 'Dickinson College',


## Now enter the student
Select the colleges of interest to them

In [25]:
#colleges_of_interest = np.unique(top_k_df)

## Now grag the D matrix for the final year

In [26]:
class Details:
    def __init__(self, **entries):
        self.__dict__.update(entries)
        
import joblib
Ds = joblib.load('results/Ds.joblib.z')
D = Ds.loc[(2002,'Both'),'D']
D

Unnamed: 0_level_0,Amherst College,Bowdoin College,Carleton College,Claremont McKenna College,Davidson College,Haverford College,Middlebury College,Pomona College,Swarthmore College,Wellesley College,Williams College
School Name1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Amherst College,0.0,5.0,4.0,6.0,5.0,5.0,3.0,4.0,3.0,6.0,4.0
Bowdoin College,1.0,0.0,4.0,4.0,5.0,3.0,2.0,3.0,1.0,5.0,1.0
Carleton College,2.0,2.0,0.0,3.0,3.0,2.0,2.0,2.0,1.0,3.0,2.0
Claremont McKenna College,0.0,2.0,3.0,0.0,4.0,4.0,1.0,3.0,0.0,4.0,1.0
Davidson College,1.0,1.0,3.0,2.0,0.0,1.0,2.0,2.0,1.0,4.0,1.0
Haverford College,1.0,3.0,4.0,2.0,5.0,0.0,3.0,2.0,2.0,6.0,2.0
Middlebury College,3.0,4.0,4.0,5.0,4.0,3.0,0.0,3.0,2.0,5.0,3.0
Pomona College,2.0,3.0,4.0,3.0,4.0,4.0,3.0,0.0,2.0,5.0,3.0
Swarthmore College,3.0,5.0,5.0,6.0,5.0,4.0,4.0,4.0,0.0,6.0,4.0
Wellesley College,0.0,1.0,3.0,2.0,2.0,0.0,1.0,1.0,0.0,0.0,2.0


In [27]:
colleges_of_interest = list(D.columns)

In [28]:
#data_for_D = []
#data_for_D.append(list(data[year]['Final Rank Fixed'].values))
#data_for_D = pd.DataFrame(data_for_D,columns=colleges,index=years).T
#data_for_D = data_for_D.loc[colleges_of_interest]
#data_for_D

In [29]:
#trans = pyrplib.transformers.ColumnCountTransformer(data_for_D.columns[:2])
#trans.fit(data_for_D)
#D = trans.transform(data_for_D)
#D

In [30]:
Ds.columns

Index(['D', 'delta_cont', 'delta_bin', 'details_cont', 'details_bin',
       'details_fixed_cont_x_maximize', 'details_fixed_cont_x_minimize',
       'details_pair_maximize', 'details_pair_minimize'],
      dtype='object')

In [31]:
def perm_to_series(D,perm,name):
    return pd.Series(list(D.index[list(perm)]),name=name)

closest = perm_to_series(D,Ds.loc[(year,'Both'),'details_fixed_cont_x_minimize'].perm,'Closest')
farthest = perm_to_series(D,Ds.loc[(year,'Both'),'details_fixed_cont_x_maximize'].perm,'Farthest')

KeyError: (2004, 'Both')

In [None]:
series_item_relevances = []
for year in years:
    order = np.argsort(data[year]['Final Rank'].values)
    data[year]['Final Rank Fixed'] = 0
    data[year].loc[:,'Final Rank Fixed'].iloc[order] = np.arange(1,len(order)+1)
    rank = np.array(list(data[year]['Final Rank Fixed'].values))
    ix = np.where(rank == 1)[0]
    print(data[year].index[ix])
    series_item_relevances.append((len(colleges) - rank+1)/len(colleges))

closest_rank = pd.Series(closest.index+1,index=closest).reindex(colleges)
closest_rank = closest_rank.fillna(len(colleges)+1) # put at end
series_item_relevances.append((len(colleges) - closest_rank.values+1)/len(colleges))
series_item_relevances_df = pd.DataFrame(series_item_relevances,columns=colleges,index=years+["Closest"]).T
series_item_relevances_df

In [None]:
solutions, baseline_unfairness, unfairness = equity_of_attention.common.model_3(series_item_relevances)

In [None]:
closest

In [None]:
rankings = solutions2rankings(solutions,colleges)
k = 20
top_k = []
for i in range(len(rankings)):
    top_k.append(list(rankings[i].sort_values()[:k].index))
top_k_df = pd.DataFrame(top_k,index=series_item_relevances_df.columns).T
closest_result = top_k_df[['Closest']]
top_k_df

## Now do the farthest

In [None]:
series_item_relevances = []
for year in years:
    order = np.argsort(data[year]['Final Rank'].values)
    data[year]['Final Rank Fixed'] = 0
    data[year].loc[:,'Final Rank Fixed'].iloc[order] = np.arange(1,len(order)+1)
    rank = np.array(list(data[year]['Final Rank Fixed'].values))
    ix = np.where(rank == 1)[0]
    print(data[year].index[ix])
    series_item_relevances.append((len(colleges) - rank+1)/len(colleges))

farthest_rank = pd.Series(farthest.index+1,index=farthest).reindex(colleges)
farthest_rank = farthest_rank.fillna(len(colleges)+1) # put at end
series_item_relevances.append((len(colleges) - farthest_rank.values+1)/len(colleges))
series_item_relevances_df = pd.DataFrame(series_item_relevances,columns=colleges,index=years+["Farthest"]).T
series_item_relevances_df

In [None]:
solutions, baseline_unfairness, unfairness = equity_of_attention.common.model_3(series_item_relevances)

In [None]:
rankings = solutions2rankings(solutions,colleges)
k = 20
top_k = []
for i in range(len(rankings)):
    top_k.append(list(rankings[i].sort_values()[:k].index))
top_k_df = pd.DataFrame(top_k,index=series_item_relevances_df.columns).T
farthest_result = top_k_df[['Farthest']]
top_k_df

In [None]:
joined = closest_result.join(farthest_result)
joined

In [None]:
closest_perm = pd.Series(joined.index,index=joined['Closest'],name='Closest')
closest_perm

In [None]:
farthest_perm = pd.Series(joined.index,index=joined['Farthest'],name='Farthest')
farthest_perm = farthest_perm.loc[closest_perm.index] #np.argsort(farthest_perm.loc[closest_perm.index]).loc
farthest_perm

In [None]:
def perm_to_series2(names,perm,name):
    return pd.Series(list(names[list(perm)]),name=name)

A = perm_to_series2(farthest_perm.index,list(closest_perm),'closest_perm')
B = perm_to_series2(farthest_perm.index,list(farthest_perm),'farthest_perm')
pyrankability.plot.spider2(A,B,file=f'results/fairness_spider.png')

In [None]:
def unfairness_comparison(series_item_relevances):
    num_items = len(series_item_relevances[0])
    position_attentions = equity_of_attention.common.calc_position_attentions(num_items, click_probability=0.5, position_cutoff=num_items//2)
    accumulated_attention = np.zeros(num_items) # A
    accumulated_relevance = np.zeros(num_items) # R
    unfairness = []
    for ranking_num in range(len(series_item_relevances)):
        item_relevances = series_item_relevances[ranking_num] #np.array([1.0 for i in range(num_items)]) # all of them are relevant?
        order = np.argsort(-1*item_relevances)
        accumulated_attention[order] += position_attentions
        accumulated_relevance += item_relevances
        unfairness.append(np.abs(accumulated_attention - accumulated_relevance))
        
    unfairness0 = []
    accumulated_attention = np.zeros(num_items) # A
    accumulated_relevance = np.zeros(num_items) # R
    for ranking_num in range(len(series_item_relevances)):
        item_relevances = series_item_relevances[0] #np.array([1.0 for i in range(num_items)]) # all of them are relevant?
        order = np.argsort(-1*item_relevances)
        accumulated_attention[order] += position_attentions
        accumulated_relevance += item_relevances
        unfairness0.append(np.abs(accumulated_attention - accumulated_relevance))
    
    return unfairness,unfairness0

In [None]:
perm_closest = Ds.loc[(2002,'Both'),'details_fixed_cont_x_minimize'].perm
perm_farthest = Ds.loc[(2002,'Both'),'details_fixed_cont_x_maximize'].perm
perm_pair_minimize_x = Ds.loc[(2002,'Both'),'details_pair_minimize'].perm_x
perm_pair_minimize_y = Ds.loc[(2002,'Both'),'details_pair_minimize'].perm_y
perm_pair_maximize_x = Ds.loc[(2002,'Both'),'details_pair_maximize'].perm_x
perm_pair_maximize_y = Ds.loc[(2002,'Both'),'details_pair_maximize'].perm_y

In [None]:
rank_pair_minimize_x,rank_pair_minimize_y = np.argsort(perm_pair_minimize_x),np.argsort(perm_pair_minimize_y)

In [None]:
rank_pair_maximize_x,rank_pair_maximize_y = np.argsort(perm_pair_maximize_x),np.argsort(perm_pair_maximize_y)

In [None]:
rank_closest,rank_farthest = np.argsort(perm_closest),np.argsort(perm_farthest)

In [None]:
rank_pair_minimize_x

In [None]:
rank_pair_minimize_y

In [None]:
rank_closest

In [None]:
rank_farthest

In [None]:
rank_pair_maximize_y,rank_pair_maximize_x

In [None]:
lop_series_item_relevances['rank_pair_maximize']

In [None]:
true_relevance

In [None]:
true_relevance = np.mean(np.array(lop_series_item_relevances['rank_pair_maximize']),axis=0)
method = 'closest'
series_item_relevances = lop_series_item_relevances[method]
num_items = len(true_relevance)
position_attentions = equity_of_attention.common.calc_position_attentions(num_items, click_probability=0.5, position_cutoff=num_items//2)
accumulated_attention = np.zeros(num_items) # A
accumulated_relevance = np.zeros(num_items) # R
unfairness = []
A = np.zeros((len(series_item_relevances),num_items))
R = np.zeros((len(series_item_relevances),num_items))
for ranking_num in range(len(series_item_relevances)):
    item_relevances = series_item_relevances[ranking_num] #np.array([1.0 for i in range(num_items)]) # all of them are relevant?
    order = np.argsort(-1*item_relevances)
    accumulated_attention[order] += position_attentions
    accumulated_relevance += true_relevance
    A[ranking_num,:] = position_attentions
    R[ranking_num,:] = true_relevance
A,R

In [None]:
np.sum(np.abs(np.sum(A,axis=0) - np.sum(R,axis=0)))

In [None]:
true_relevance = np.mean(np.array(lop_series_item_relevances['rank_pair_maximize']),axis=0)
method = 'rank_pair_maximize'
series_item_relevances = lop_series_item_relevances[method]
num_items = len(true_relevance)
position_attentions = equity_of_attention.common.calc_position_attentions(num_items, click_probability=0.5, position_cutoff=num_items//2)
accumulated_attention = np.zeros(num_items) # A
accumulated_relevance = np.zeros(num_items) # R
unfairness = []
A = np.zeros((len(series_item_relevances),num_items))
R = np.zeros((len(series_item_relevances),num_items))
for ranking_num in range(len(series_item_relevances)):
    item_relevances = series_item_relevances[ranking_num] #np.array([1.0 for i in range(num_items)]) # all of them are relevant?
    order = np.argsort(-1*item_relevances)
    accumulated_attention[order] += position_attentions
    accumulated_relevance += true_relevance
    A[ranking_num,:] = position_attentions
    R[ranking_num,:] = true_relevance
A,R

In [None]:
np.sum(np.abs(np.sum(A,axis=0) - np.sum(R,axis=0)))

In [None]:
total = 2
lop_series_item_relevances = {}
lop_series_item_relevances['closest'] = []
lop_series_item_relevances['farthest'] = []
lop_series_item_relevances['rank_pair_maximize'] = []
lop_series_item_relevances['rank_pair_minimize'] = []
for i in range(total):
    lop_series_item_relevances['closest'].append((len(rank_closest) - rank_closest)/len(rank_closest))
    lop_series_item_relevances['farthest'].append((len(rank_farthest) - rank_farthest)/len(rank_farthest))

for i in range(total//2):
    lop_series_item_relevances['rank_pair_maximize'].append((len(rank_pair_maximize_x) - rank_pair_maximize_x)/len(rank_pair_maximize_x))
    lop_series_item_relevances['rank_pair_maximize'].append((len(rank_pair_maximize_y) - rank_pair_maximize_y)/len(rank_pair_maximize_y))

for i in range(total//2):
    lop_series_item_relevances['rank_pair_minimize'].append((len(rank_pair_minimize_x) - rank_pair_minimize_x)/len(rank_pair_minimize_x))
    lop_series_item_relevances['rank_pair_minimize'].append((len(rank_pair_minimize_y) - rank_pair_minimize_y)/len(rank_pair_minimize_y))


In [None]:
ix_cols = ['Iteration','Item','Category']
unfairness = pd.DataFrame(columns=['Unfairness','Iteration','Item','Category']).set_index(ix_cols)
for method in ['closest','rank_pair_maximize']:#lop_series_item_relevances.keys():
    u, bu = unfairness_comparison(lop_series_item_relevances[method])
    t = pd.DataFrame(u)
    t.index.name = 'Iteration'
    t = t.reset_index()
    t = t.melt(id_vars='Iteration')
    t.columns = ['Iteration','Item','Unfairness']
    t['Category'] = method
    r = pd.DataFrame({"Relevance":np.array(lop_series_item_relevances[method]).mean(axis=0)})
    r.index.name='Item'
    t = t.set_index('Item').join(r)
    unfairness = unfairness.append(t.reset_index().set_index(ix_cols))
                 
unfairness = unfairness.reset_index()

In [None]:
import altair as alt
alt.Chart(unfairness).mark_line().encode(
    x='Iteration',
    y='sum(Unfairness)',
    color='Category',
    #row='Item'
)

In [None]:
import altair as alt
alt.Chart(unfairness).mark_line().encode(
    x='Iteration',
    y='Unfairness',
    color='Category',
    row='Relevance',
    column='Item'
)

## Don't go below

In [None]:
position_attentions = equity_of_attention.common.calc_position_attentions(len(lop_ranking_x))
position_attentions

In [None]:
np.array(farthest_pair_details['perm_x'])

### Unfairness of LOP

In [None]:
acc_attentions = position_attentions[np.array(farthest_pair_details['perm_x'])] + position_attentions[np.array(farthest_pair_details['perm_y'])]
acc_attentions

In [None]:
pd.Series(acc_attentions)

In [None]:
acc_relevances = pd.Series(series_item_relevances[0],index=colleges).loc[colleges_of_interest] + pd.Series(series_item_relevances[1],index=colleges).loc[colleges_of_interest]
acc_relevances

In [None]:
unfairness_lop = np.sum(np.abs(acc_attentions - acc_relevances))
unfairness_lop

In [None]:
A = perm_to_series(D,farthest_pair_details['perm_x'],'perm_x')
B = perm_to_series(D,farthest_pair_details['perm_y'],'perm_y')
pyrankability.plot.spider2(A,B,file=f'results/farthest_pair_fairness.png')

In [None]:
new_rankings = []
new_perms = []
for i in range(len(rankings)):
    new_rankings.append(rankings[i].loc[colleges_of_interest].copy())
    order = np.argsort(rankings[i].loc[colleges_of_interest].values)
    new_rankings[i][order] = np.arange(0,len(order))
    new_perms.append(np.argsort(new_rankings[i]))

In [None]:
new_rankings[i]

In [None]:
acc_attentions = position_attentions[np.array(new_perms[0])] + position_attentions[np.array(new_perms[1])]
acc_attentions

In [None]:
pd.Series(acc_attentions)

In [None]:
acc_relevances = pd.Series(series_item_relevances[0],index=colleges).loc[colleges_of_interest] + pd.Series(series_item_relevances[1],index=colleges).loc[colleges_of_interest]
acc_relevances

In [None]:
unfairness_equity = np.sum(np.abs(acc_attentions - acc_relevances))
unfairness_equity

In [None]:
A = perm_to_series(D,new_perms[0],'ranking_perm0')
B = perm_to_series(D,new_perms[1],'ranking_perm1')
pyrankability.plot.spider2(A,B,file=f'results/equity_pair_fairness.png')