In [1]:
import numpy as np
from typing import Tuple

## Functions


In [2]:
def score(x_1: np.ndarray, x_2: np.ndarray) -> int:
    """
    Compares two 1D NumPy arrays and returns a score.

    Parameters:
      x_1: The first 1D NumPy array.
      x_2: The second 1D NumPy array.

    Returns:
      The score, which is the number of elements in x_1 that are greater than or equal to the corresponding elements in x_2.
    """

    score = 0
    for i in range(x_1.shape[0]):
        score += np.sum(x_1[i] >= x_2)

    return score

## step 1


In [14]:
n, p = (100, 15)
X = np.random.randn(n, p)
y = np.random.randint(0, 2, size=n)
print(X.shape)
print(np.unique(y, return_counts=True))

(100, 15)
(array([0, 1]), array([52, 48], dtype=int64))


## step 2


In [15]:
_, (n0, n1) = np.unique(y, return_counts=True)

s_max = n0 * n1  # Maximum possible score
s_min = 0
# Step 2: Sign-flip operation
scores = np.zeros(p)
new_scores = np.zeros(p)
for i in range(p):
    xi = X[:, i]
    class_0_values, class_1_values = xi[y == 0], xi[y == 1]
    scores[i] = score(class_0_values, class_1_values)
    if scores[i] > s_max / 2:
        X[:, i] *= -1
    new_scores[i] = min(scores[i], s_max - scores[i])
X.shape, new_scores.shape, s_max

((100, 15), (15,), 2496)

## step 3


In [16]:
cluster = []
cluster_index = []
margins = np.min(X[y == 1], axis=0) - np.max(X[y == 0], axis=0)

if len(cluster) == 0:
    min_value = np.min(new_scores)
    min_indices = np.where(new_scores == min_value)[0]

    i_star = (
        np.argmin(new_scores)
        if min_indices.size == 1
        else np.argmax(margins[min_indices])
    )
    cluster = np.expand_dims(X[:, i_star], axis=1)
    cluster_index.append(i_star)
    initial_cluster_mean = X[:, i_star]
else:
    # Random selected genes
    pass

cluster.shape, cluster_index,i_star

((100, 1), [1], 1)

## step 4-5

In [17]:
# forward search

cluster_mean_score = score(initial_cluster_mean[y == 0], initial_cluster_mean[y == 1])
cluster_mean_margin = np.min(initial_cluster_mean[y == 1], axis=0) - np.max(
    initial_cluster_mean[y == 0], axis=0
)

while True:
    scores_with_gene = np.zeros(p)
    margins_with_gene = np.zeros(p)
    for i in range(p):
        if i in cluster_index:
            scores_with_gene[i] = s_max
            margins_with_gene[i] = s_min
            continue
        temp_cluster = np.concatenate(
            [cluster, np.expand_dims(X[:, i], axis=1)], axis=1
        )
        temp_cluster_avg = np.mean(temp_cluster, axis=1)
        scores_with_gene[i] = score(temp_cluster_avg[y == 0], temp_cluster_avg[y == 1])
        margins_with_gene[i] = np.min(temp_cluster_avg[y == 1]) - np.max(
            temp_cluster_avg[y == 0]
        )

    min_value = np.min(scores_with_gene)
    min_indices = np.where(scores_with_gene == min_value)[0]

    i_star = (
        min_indices[0]
        if min_indices.size == 1
        else np.argmax(margins_with_gene[min_indices])
    )

    if scores_with_gene[i_star] > cluster_mean_score:
        break

    if scores_with_gene[i_star] == cluster_mean_score:
        if margins_with_gene[i_star] <= cluster_mean_margin:
            break

    cluster_index.append(i_star)
    cluster = np.concatenate([cluster, np.expand_dims(X[:, i_star], axis=1)], axis=1)
    cluster_mean_score = scores_with_gene[i_star]
    cluster_mean_margin = margins_with_gene[i_star]


In [18]:
cluster.shape, cluster_index

((100, 7), [1, 12, 3, 14, 11, 9, 13])

## step 6-7

In [12]:
# Backward search
# there is error

initial_cluster_mean
cluster_mean_score = score(initial_cluster_mean[y == 0], initial_cluster_mean[y == 1])
cluster_mean_margin = np.min(initial_cluster_mean[y == 1], axis=0) - np.max(
    initial_cluster_mean[y == 0], axis=0
)

while True:
    
    pass
cluster.shape,new_cluster_index

((100, 6), array([11,  2,  6, 14,  8, 13,  4,  1,  9, 10,  5], dtype=int64))

In [13]:
cluster_index

[11, 2, 6, 14, 8, 13, 7, 4, 1, 9, 10, 5]