In [15]:
###Function to generate random Rankings, which have low dispersion between one another, to simulate a real ranking, where there are always "top" universities

In [13]:
import random
from typing import List, Optional

def generate_rankings_correlated(n: int, m: int,*,phi: float = 0.3,top_k: int = 0,seed: Optional[int] = None) -> List[List[str]]:
    """
    Generate `n` rankings of `m` items that are similar to each other.

    Rankings are sampled from a Mallows model (Kendall distance) centered at a base
    order. Lower `phi` => rankings stay close to the center; higher `phi` => more noise.

    Args:
        n: Number of rankings to generate.
        m: Number of items per ranking (max 26, A–Z).
        phi: Dispersion in [0, 1]. 0 -> identical to center; 1 -> near-uniform.
        top_k: If >0, the first `top_k` labels (A,B,C,...) sit at the top of the
               center order, so they tend to remain near the top (but can shuffle
               among themselves).
        seed: Optional random seed for reproducibility.

    Returns:
        List of `n` permutations (each a list of str).
    """
    if m > 26:
        raise ValueError("m cannot exceed 26 (A–Z)")
    if not (0.0 <= phi <= 1.0):
        raise ValueError("phi must be in [0, 1]")
    if top_k < 0 or top_k > m:
        raise ValueError("top_k must be in [0, m]")
    if seed is not None:
        random.seed(seed)

    labels = [chr(ord('A') + i) for i in range(m)]
    center = labels[:top_k] + labels[top_k:]  # base order (A..A+top_k-1 first)

    def _sample_mallows_kendall(center_order: List[str], phi_val: float) -> List[str]:
        # Repeated Insertion Model (RIM) sampler for Mallows (Kendall) distribution
        perm: List[str] = []
        for item in reversed(center_order):
            L = len(perm)
            if phi_val == 1.0:
                j = random.randint(0, L)
            else:
                weights = [phi_val ** k for k in range(L + 1)]
                s = sum(weights)
                r = random.random() * s
                acc = 0.0
                j = 0
                for idx, w in enumerate(weights):
                    acc += w
                    if r <= acc:
                        j = idx
                        break
            perm.insert(j, item)
        return perm

    return [_sample_mallows_kendall(center, phi) for _ in range(n)]

In [14]:
#test
A=generate_rankings_correlated(4,4)
print(A)
B=generate_rankings_correlated(4,4)
print(B)
C=generate_rankings_correlated(5,6)
print(C)

[['A', 'B', 'C', 'D'], ['A', 'B', 'D', 'C'], ['A', 'B', 'D', 'C'], ['B', 'A', 'C', 'D']]
[['A', 'B', 'C', 'D'], ['A', 'B', 'D', 'C'], ['C', 'D', 'B', 'A'], ['B', 'A', 'D', 'C']]
[['A', 'D', 'B', 'C', 'F', 'E'], ['B', 'A', 'C', 'D', 'E', 'F'], ['B', 'A', 'E', 'C', 'D', 'F'], ['A', 'B', 'C', 'D', 'E', 'F'], ['A', 'C', 'B', 'D', 'E', 'F']]


In [None]:
###Function to build pairwise matrixes: This function merges all rankings together into a pairwise comparison table
##to know how many times each university wins against each other.

In [16]:
from typing import Iterable, Mapping, Sequence, Dict, List, Optional, Tuple, Union, Set
import itertools

# ---- helpers (used only by build_pairwise_matrix) ----

RankLike = Union[Sequence[str], Mapping[str, int]]
MarginMatrix = Dict[str, Dict[str, float]]

#Turns a list (ranking) into a dictionary, wto know the position of each item
def _as_position_map(ranking: RankLike) -> Dict[str, float]:
    """Accepts list/tuple (1 is best) or dict {item: position} (lower is better)."""
    if isinstance(ranking, Mapping):
        return {str(k): float(v) for k, v in ranking.items()}
    elif isinstance(ranking, Sequence):
        return {str(x): float(i + 1) for i, x in enumerate(ranking)}
    raise TypeError("ranking must be a sequence or a mapping")

#Checks for  each ranking if "a" is ranked higher than "b":
def _pair_pref(pos: Mapping[str, float], a: str, b: str,
               *, missing: str, default_pos: float) -> Optional[int]:
    """
    Preference of a over b in one ranking:
      +1 if a<b, -1 if b<a, 0 if tie, None if ignore due to missing.
    """
    a_in, b_in = a in pos, b in pos
    if missing == "ignore" and (not a_in or not b_in):
        return None
    pa = pos.get(a, default_pos)
    pb = pos.get(b, default_pos)
    if pa < pb:  return 1
    if pb < pa:  return -1
    return 0

# ---- main function ----

#It does this for all pairs of items (A vs B, A vs C, B vs C, etc.)
#and sums the results from all rankings. 
#If in most rankings, A is above B, then it returns a positive number.
#If B beats A more often, the is negative
def build_pairwise_matrix(
    rankings: Iterable[RankLike],
    *,
    weights: Optional[Iterable[float]] = None,
    items: Optional[Iterable[str]] = None,
    missing: str = "bottom",
) -> Tuple[List[str], MarginMatrix]:
    """
    Merge all rankings into the pairwise *margins* matrix M.

    M[i][j] = (weighted # times i preferred over j) - (weighted # times j over i)

    Robust to:
      - different ranking lengths,
      - missing items (controlled by `missing`: 'bottom' or 'ignore').

    Returns:
        items_list, M
    """
    ranks = [_as_position_map(r) for r in rankings]
    if not ranks:
        return [], {}

    # Universe of items
    if items is None:
        universe: Set[str] = set().union(*[set(r.keys()) for r in ranks])
    else:
        universe = set(map(str, items))
    items_list = sorted(universe)

    # Weights
    if weights is None:
        w = [1.0] * len(ranks)
    else:
        w = [float(x) for x in weights]
        if len(w) != len(ranks):
            raise ValueError("weights length must match number of rankings")

    # Default positions for missing='bottom'
    defaults = [(max(r.values()) if r else 0.0) + 1.0 for r in ranks]

    # Initialize margins
    M: MarginMatrix = {i: {j: 0.0 for j in items_list if j != i} for i in items_list}

    # Tally pairwise preferences
    for a, b in itertools.permutations(items_list, 2):
        margin = 0.0
        for r, wt, defpos in zip(ranks, w, defaults):
            pref = _pair_pref(r, a, b, missing=missing, default_pos=defpos)
            if pref is None:
                continue
            margin += wt * (1 if pref == 1 else (-1 if pref == -1 else 0))
        M[a][b] = margin

    return items_list, M

In [23]:
#Test
Matrix,M=build_pairwise_matrix(A)
#Getting the elements of the rankings, then the pairwise margins matrix
#Each row shows how many more times one item beats another.
print(Matrix)
print(M)

['A', 'B', 'C', 'D']
{'A': {'B': 2.0, 'C': 4.0, 'D': 4.0}, 'B': {'A': -2.0, 'C': 4.0, 'D': 4.0}, 'C': {'A': -4.0, 'B': -4.0, 'D': 0.0}, 'D': {'A': -4.0, 'B': -4.0, 'C': 0.0}}


In [None]:
#Kemeny Score
#For every pair (i,j) in your candidate ranking, if i appears before j in your order, it adds M[i][j] to the total.
#It’s the sum of all pairwise agreements with your candidate order.
    #The larger the number, the better this order fits all the rankings.
    #A smaller (or negative) score means your order disagrees more often.

In [18]:
from typing import Sequence, Dict

MarginMatrix = Dict[str, Dict[str, float]]

def calculate_kemeny_score(
    order: Sequence[str], #Proposed matrix
    matrix: MarginMatrix,  #Total points ( inverse of kendall tau distance)
    *,
    strict: bool = False
) -> float:
    """
    Kemeny objective: sum M[i][j] for all pairs where i appears before j in `order`.
    Higher score = better agreement with the input rankings.

    Args:
        order: candidate ranking (best -> worst). Can be any iterable of labels.
        matrix: pairwise *margins* matrix as returned by build_pairwise_matrix().
        strict: if True, require `order` to contain exactly the matrix items
                (raise on missing/extra). If False, extra labels are ignored and
                missing items are appended to the end in a stable way.

    Returns:
        Total Kemeny score (float).
    """
    items = list(matrix.keys())
    order = [str(x) for x in order]

    if strict:
        s_order, s_items = set(order), set(items)
        if s_order != s_items:
            missing = sorted(s_items - s_order)
            extra   = sorted(s_order - s_items)
            raise ValueError(
                f"Order must contain exactly these items: {sorted(s_items)}; "
                f"missing={missing}, extra={extra}"
            )
        full_order = order
    else:
        # keep only valid labels, deduplicate while preserving given order
        seen = set()
        full_order = [x for x in order if x in matrix and not (x in seen or seen.add(x))]
        # append any items not mentioned
        full_order += [x for x in items if x not in full_order]

    pos = {x: i for i, x in enumerate(full_order)}

    score = 0.0
    for i in items:
        for j, m_ij in matrix[i].items():
            if i == j:
                continue
            if pos[i] < pos[j]:
                score += m_ij
    return float(score)

In [26]:
#test with proposed matrixes and the  score board M
Score1=calculate_kemeny_score(['A', 'B', 'C', 'D'],M)
print(Score1)
Score2=calculate_kemeny_score(['B', 'A', 'C', 'D'],M)
print(Score2)
Score3=calculate_kemeny_score(['C', 'A', 'D', 'B'],M)
print(Score3)

18.0
14.0
-6.0
