In [None]:
import csv
import numpy as np
import pandas as pd

header = []
ids = []
data = []
with open("../data.csv", "r") as f:
    reader = csv.reader(f)
    header = next(reader)[1:]
    for row in reader:
        name = row[0]
        ids.append(name)
        data.append(list(map(float, row[1:])))

df = pd.DataFrame(data, columns=header)
df['name'] = ids
df = df.set_index('name')
df

In [None]:
GAIN = 0
COST = 1
criteria_types = [GAIN, GAIN, GAIN, GAIN, GAIN, COST, COST]

# Preferencial information

In [None]:
preferences = [
    ('Samsung Galaxy S22 Ultra', '~', 'Pixel 7'),
    ('Samsung Galaxy S22 Ultra' , '>', 'Samsung S24+'),
    ('Pixel 7', '>', 'Pixel 8'),
    ('Samsung S24 Ultra', '~', 'iPhone 15 Pro Max'),
    ('Samsung S23 FE', '>', 'iPhone 15 Pro'),
    ('Vivo X80 Pro', '~', 'Samsung S23 FE'),
    ('Pixel 8 Pro', '~', 'Pixel 8'),
    ('iPhone 15', '>', 'iPhone 15 Pro'), # inconsistency A > B and B > C and C > A 
    ('iPhone 15 Pro', '>', 'iPhone 15 Pro Max'),
    ('iPhone 15 Pro Max', '>', 'iPhone 15'),
    ('iPhone SE the 3rd', '>', 'Samsung S24+'), # inconsistency A > B and B > A
    ('Samsung S24+', '>', 'iPhone SE the 3rd')
]

In [None]:
import pulp
from itertools import pairwise

prob = pulp.LpProblem("Resolve Inconsistencies", pulp.LpMinimize)

MAX_WEIGHT = 0.5
MIN_WEIGHT = 0.05

u_vars = {}
best_u = []
for i, (c, ctype) in enumerate(zip(df, criteria_types)):
    series = sorted(df[c], reverse=bool(ctype))
    
    # Worst values for certain criteria should be zero
    worst = series[0]
    worstid = f'u_{i}_{worst}'
    if worstid not in u_vars:
        u_vars[worstid] = pulp.LpVariable(worstid, lowBound=0, upBound=0)

    # Best values for certain criteria should be at most MAX_WEIGHT and sum to 1
    best = series[-1]
    bestid = f'u_{i}_{best}'
    if bestid not in u_vars:
        u_vars[bestid] = pulp.LpVariable(bestid, lowBound=MIN_WEIGHT, upBound=MAX_WEIGHT)
    best_u.append(u_vars[bestid])

    # Monotonicity constraints
    for a, b in pairwise(series):
        aid = f'u_{i}_{a}'
        bid = f'u_{i}_{b}'
        if aid not in u_vars:
            u_vars[aid] = pulp.LpVariable(aid, lowBound=0, upBound=MAX_WEIGHT)
        if bid not in u_vars:
            u_vars[bid] = pulp.LpVariable(bid, lowBound=0, upBound=MAX_WEIGHT)
        prob += u_vars[aid] <= u_vars[bid]

# Normalization constraint of best values in each criteria
constraint = pulp.LpConstraint(pulp.lpSum(best_u), sense=pulp.LpConstraintEQ, rhs=1)
prob += constraint

Non negativity constraints for u already included by setting lowBound to 0

## Resolving inconsistencies

In [None]:
binary_variables = []

EPSILON = 1e-6

for a, ctype, b in preferences:
    v_ab = pulp.LpVariable(f'v_{a},{b}', cat=pulp.LpBinary)
    binary_variables.append(v_ab)
    A = []
    for i, v in enumerate(df.loc[a]):
        identifier = f"u_{i}_{v}"
        if identifier not in u_vars:
            u_vars[identifier] = pulp.LpVariable(identifier, lowBound=0, upBound=MAX_WEIGHT)
        A.append(u_vars[identifier])

    B = []
    for i, v in enumerate(df.loc[b]):
        identifier = f"u_{i}_{v}"
        if identifier not in u_vars:
            u_vars[identifier] = pulp.LpVariable(identifier, lowBound=0, upBound=MAX_WEIGHT)
        B.append(u_vars[identifier])

    if ctype == '>':
        prob += pulp.lpSum(A) >= pulp.lpSum(B) + EPSILON - v_ab
    else:
        prob += pulp.lpSum(A) >= pulp.lpSum(B) - v_ab
        prob += pulp.lpSum(B) >= pulp.lpSum(A) - v_ab

prob += pulp.lpSum(binary_variables)

In [None]:
prob.solve()

Inconsistent comparisons to remove

In [None]:
for i, v in enumerate(binary_variables):
    if v.value() == 1:
        print(*preferences[i])