In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from package.RankAMIP.logistic import run_logistic_regression
from package.RankAMIP.logistic import LogisticAMIP

### Generate Table of Games

In [2]:
# Set seed for reproducibility
np.random.seed(42)

# --- Step 1: Define teams and latent strengths ---
teams = ['A', 'B', 'C', 'D', 'E', 'F']
team_ids = {name: i for i, name in enumerate(teams)}
true_betas = np.array([0.05, 0.04, 0.03, -0.00, -0.01, -0.02])  # (note: we should try out different signal levels later).


n_games = 10000
matchups = []

for _ in range(n_games):
    i, j = np.random.choice(6, size=2, replace=False) # randomly choose 2 teams to compete.
    beta_diff = true_betas[i] - true_betas[j]
    prob_win_i = 1 / (1 + np.exp(-beta_diff))
    winner = i if np.random.rand() < prob_win_i else j
    matchups.append((i, j, winner))


X = np.zeros((n_games, 5))  # We fix beta_0 = 0 and estimate beta_1 and beta_2.
y = np.zeros(n_games)

for idx, (i, j, winner) in enumerate(matchups):
    # Map to reduced index space (beta_0 = 0)
    def reduced(k): return k - 1 if k > 0 else None
    
    if winner == i:
        y[idx] = 1
        if reduced(i) is not None:
            X[idx, reduced(i)] += 1
            # print(f"i, reduced(i): {i, reduced(i)}")
        if reduced(j) is not None:
            X[idx, reduced(j)] -= 1
    else:
        y[idx] = 0
        if reduced(j) is not None:
            X[idx, reduced(j)] += 1
        if reduced(i) is not None:
            X[idx, reduced(i)] -= 1

In [3]:
matchups[:5] # (i,j,winner)


[(0, 1, 1), (5, 0, 0), (4, 5, 5), (4, 2, 4), (1, 2, 2)]

In [4]:
full_model = run_logistic_regression(X, y)
full_model.coef_[0]
pos_p_hats = full_model.predict_proba(X)[:, 1]

In [5]:
full_model.coef_[0]

array([0.04854473, 0.010578  , 0.078925  , 0.01943386, 0.06144881])

In [6]:
def find_closest_matchups(scores: np.ndarray, K: int) -> 'list[tuple[int,int,float]]':
    """
    For each top-index t in [0..K-1] and each rest-index r in [K..P-1],
    compute (t, r, scores[t] - scores[r]) and return as a list.
    """
    P = scores.shape[0]
    top  = scores[:K]         # shape (K,)
    rest = scores[K:]         # shape (P-K,)

    # diffs[t, r-K] = scores[t] - scores[r]
    diffs = top[:, None] - rest[None, :]  # shape (K, P-K)

    # build flat index arrays of length K*(P-K)
    t_idx = np.repeat(np.arange(K), P - K)  # [0,0,…,1,1,…,K-1, …]
    r_idx = np.tile(np.arange(K, P), K)  # [K,K+1,…,K,K+1,…, …]

    matchups = list(zip(
        t_idx.tolist(),
        r_idx.tolist(),
        diffs.ravel().tolist()
    ))
    # sort the matchups by the difference.
    sorted_matchups = sorted(matchups, key=lambda x: x[2])
    
    return sorted_matchups

In [7]:
scores = np.array([10.0, 8.5, 7.2, 5.0, 4.8])
K = 2
matchups = find_closest_matchups(scores, K)
matchups

[(1, 2, 1.2999999999999998),
 (0, 2, 2.8),
 (1, 3, 3.5),
 (1, 4, 3.7),
 (0, 3, 5.0),
 (0, 4, 5.2)]

In [12]:
myAMIP = LogisticAMIP(X, y, fit_intercept=False, penalty=None)
# inputs: alphaN, player1, player2.
# returns: sign_change_amip, sign_change_refit, original_beta_diff, new_beta_diff_amip, new_beta_diff_refit, indices of top alphaN matches.
results = myAMIP.AMIP_sign_change(40, 0, 1)
print(results)

(True, True, 0.0379667285577653, -0.00111148602931576, -0.0018768168345102733, array([4999, 4756, 4770, 4786,  996, 4800, 8523, 8595, 8495, 8471, 8444,
       8422, 1074, 1078, 4852, 4829, 8603, 3386, 3390, 8797, 3471, 8783,
       3452, 4639, 8700, 4659,  871,  884, 4689, 4706, 8642,  924,  925,
       8624, 4853, 8800, 4865, 3313, 1303, 8015]))


In [13]:
def isRankingRobust(k, alphaN, X, y):
    '''
    Check if the rank of the top k players/models is robust to data-dropping.
    Arg: 
        k, int, number of top players to consider. 
        alphaN, int, amount of data willing to drop.
        X, np.ndarray, design matrix.
        y, np.ndarray, response vector.
    Return:
        playerA, playerB: int, indices of players/models.
        new_beta_diff_refit: float, new beta difference.
        indices: list, indices of dropped data.
    '''
    # run logistic regression on X, y
    myAMIP = LogisticAMIP(X, y, fit_intercept=False, penalty=None)
    player_scores = myAMIP.model.coef_[0] # (p,)

    close_matchups = find_closest_matchups(player_scores, k)
    for playerA, playerB, diff in close_matchups: # a list of k(p-k) matchups.
        sign_change_amip, sign_change_refit, original_beta_diff,new_beta_diff_amip, new_beta_diff_refit, indices = myAMIP.AMIP_sign_change(alphaN, playerA, playerB)
        if sign_change_refit:
            return playerA, playerB, original_beta_diff, new_beta_diff_refit, indices
    
    return -1, -1, -1, -1, -1, [-1]

In [14]:
k = 2
alphaN = 15
thisPlayerA, thisPlayerB, thisOriginalBetaDiff, thisNewBetaDiff, thisIndices = isRankingRobust(k, alphaN, X, y)

In [16]:
print(thisPlayerA, thisPlayerB, thisOriginalBetaDiff,thisNewBetaDiff, thisIndices)

0 4 -0.012904076785887723 0.0022168489323046367 [5700 6085 4175 1678 1680 4178 7949  387 6059 9533 4820 3236 3883 9545
 1704]
