In [31]:
import numpy as np
import matplotlib.pyplot as plt

from collections import deque

from rashomon import loss
from rashomon import count_pools
from rashomon.cache import RashomonSubproblemCache
from rashomon.tva import enumerate_policies
from rashomon.extract_pools import extract_pools


%load_ext autoreload
%autoreload 2
# %matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Function to pool based on $\Sigma$ matrix

In [4]:
sigma = np.array([[1, 1, 0],
                  [0, 1, 1]], dtype='float64')

M, n = sigma.shape
R = n + 2

num_policies = (R-1)**M
policies = enumerate_policies(M, R)
pi_pools, pi_policies = extract_pools(policies, sigma)

for pool_id, pool in pi_pools.items():
    print(pool_id, ":", pool)

0 : [0, 4, 8]
1 : [1, 2, 3, 5, 6, 7, 9, 10, 11]
2 : [12]
3 : [13, 14, 15]


### Generate data

In [5]:
np.random.seed(3)

num_pools = len(pi_pools)
mu = np.random.uniform(0, 4, size=num_pools)
var = [1] * num_pools

n_per_pol = 10

num_data = num_policies * n_per_pol
X = np.ndarray(shape=(num_data, M))
D = np.ndarray(shape=(num_data, 1), dtype='int_')
y = np.ndarray(shape=(num_data, 1))

for idx, policy in enumerate(policies):
    pool_i = pi_policies[idx]
    mu_i = mu[pool_i]
    var_i = var[pool_i]
    y_i = np.random.normal(mu_i, var_i, size=(n_per_pol, 1))

    start_idx = idx * n_per_pol
    end_idx = (idx + 1) * n_per_pol

    X[start_idx:end_idx, ] = policy
    D[start_idx:end_idx, ] = idx
    y[start_idx:end_idx, ] = y_i
    

In [6]:
# This function needs to called only once
policy_means = loss.compute_policy_means(D, y, num_policies)

# This function needs to be called every time the pools change
mu_pools = loss.compute_pool_means(policy_means, pi_pools)

# This function needs to be called every time the pools change
Q = loss.compute_Q(D, y, sigma, policies, policy_means, 1)

print(Q)

4.895456436046057


In [7]:
i = 0
j = 0

B = loss.compute_B(D, y, sigma, i, j, policies, policy_means, 1)
print(B)

4.859163572719872


### Caching object

In [8]:
seen_sigma = RashomonCache(shape=sigma.shape)
seen_sigma.insert(sigma, i, j)

# Should be True
print(seen_sigma.seen(sigma, i, j))

# Should be False
sigma2 = np.copy(sigma)
sigma2[1, 1] = 1 - sigma2[1, 1]
print(seen_sigma.seen(sigma2, i, j))

# Should be True
sigma3 = np.copy(sigma)
sigma3[0, 2] = 1 - sigma3[0, 2]
print(seen_sigma.seen(sigma3, i, j))



True
False
True


### RAggregate

In [27]:
class RashomonSet:
    """
    Caching object to keep track of fixed sigma matrices
    Some discussion on how to hash a numpy array
    https://stackoverflow.com/questions/16589791/most-efficient-property-to-hash-for-numpy-array
    """

    def __init__(self, shape: tuple[int, int]):
        self.shape = shape
        self.P_hash = set()
        self.P_qe = []

    def insert(self, sigma: np.ndarray) -> None:
        if self.seen(sigma):
            return
        sigma_hash = self.__process__(sigma)
        self.P_hash.add(sigma_hash)
        self.P_qe.append(sigma)

    def seen(self, sigma: np.ndarray) -> bool:
        self.__verify_shape__(sigma)
        sigma_hash = self.__process__(sigma)
        return sigma_hash in self.P_hash

    @property
    def size(self):
        return len(self.P_qe)

    def __process__(self, sigma: np.ndarray):
        byte_rep = sigma.tobytes()
        return hash(byte_rep)

    def __verify_shape__(self, sigma: np.ndarray):
        # Distinct arrays of different shapes may have same byte representation
        if self.shape != sigma.shape:
            raise RuntimeError(
                f"Expected array of dimensions {self.shape}. Received {sigma.shape}"
            )

    def __iter__(self):
        return iter(self.P_qe)

    def __repr__(self):
        return repr(self.P_qe)

In [33]:
class RashomonProblemCache:
    """
    Caching object to keep track of full problems
    Some discussion on how to hash a numpy array
    https://stackoverflow.com/questions/16589791/most-efficient-property-to-hash-for-numpy-array
    """

    def __init__(self, shape: tuple[int, int]):
        self.shape = shape
        self.C = set()

    def insert(self, sigma: np.ndarray) -> None:
        self.__verify_shape__(sigma)
        sigma_hash = self.__process__(sigma)
        self.C.add(sigma_hash)

    def seen(self, sigma: np.ndarray) -> bool:
        self.__verify_shape__(sigma)
        sigma_hash = self.__process__(sigma)
        return sigma_hash in self.C

    @property
    def size(self):
        return len(self.C)

    def __process__(self, sigma: np.ndarray):
        byte_rep = sigma.tobytes()
        return hash(byte_rep)

    def __verify_shape__(self, sigma: np.ndarray):
        # Distinct arrays of different shapes may have same byte representation
        if self.shape != sigma.shape:
            raise RuntimeError(
                f"Expected array of dimensions {self.shape}. Received {sigma.shape}"
            )


In [41]:
def RAggregate(M, R, H, D, y, theta, reg=1):

    
    policies = enumerate_policies(M, R)
    policy_means = loss.compute_policy_means(D, y, len(policies))

    sigma = np.ndarray(shape=(M, R-2))
    sigma[:, :] = 1

    P_qe = RashomonSet(sigma.shape)
    Q_seen = RashomonProblemCache(sigma.shape)
    problems = RashomonSubproblemCache(sigma.shape)
    
    queue = deque([(sigma, 0, 0)])

    while len(queue) > 0:
        (sigma, i, j) = queue.popleft()

        # Cache problem
        if problems.seen(sigma, i, j):
            continue
        problems.insert(sigma, i, j)
        
        if count_pools.num_pools(sigma) > H:
            continue
            
        B = loss.compute_B(D, y, sigma, i, j, policies, policy_means, reg)
        if B > theta:
            continue
        
        sigma_0 = np.copy(sigma)
        sigma[i, j] = 1
        sigma_0[i, j] = 0

        # Check if the pooling already satisfies the Rashomon threshold
        if not Q_seen.seen(sigma):
            Q_seen.insert(sigma)
            Q = loss.compute_Q(D, y, sigma, policies, policy_means, reg)
            if Q <= theta:
                P_qe.insert(sigma)
        
        if not Q_seen.seen(sigma_0):
            Q_seen.insert(sigma_0)
            Q = loss.compute_Q(D, y, sigma_0, policies, policy_means, reg)
            if Q <= theta:
                P_qe.insert(sigma_0)

        # Add children problems to the queue
        if j < R-3: # j < R - 2 in math notation
            if not problems.seen(sigma, i, j+1):
                queue.append((sigma, i, j+1))
            if not problems.seen(sigma_0, i, j+1):
                queue.append((sigma_0, i, j+1))
        
        for m in range(M):
            # if m == i:
            #     continue
            if not problems.seen(sigma, m, 0):
                queue.append((sigma, m, 0))
            if not problems.seen(sigma_0, m, 0):
                queue.append((sigma_0, m, 0))

    return P_qe

P_set = RAggregate(2, 5, 4, D, y, 5, reg=1)
print(P_set.size)
P_set.seen(sigma)

48
24
20


True