## Step 1

In [1]:
import numpy as np
n = 5
p = 10

def generate_matrix(n, p):
  X = np.random.rand(n, p)
  means = np.mean(X, axis=0)
  stds = np.std(X, axis=0)
  X = (X - means) / stds
  y = np.random.randint(0, 2, p)
  return X,y

X,y = generate_matrix(n, p)

print("X", X)
print("y", y)

X [[ 0.17792082  1.17138209  0.80976414  1.4001992  -1.44197537  1.81236127
  -1.37434054  0.13901821  0.86217824  0.14698969]
 [-0.03588597 -0.70758008 -1.71563934  0.45089211 -0.50578642  0.36398456
  -0.6953512  -1.27082802 -0.76041604  1.06459005]
 [-1.23697768  0.62422059  1.12496505 -0.71951683  0.82475303 -0.70221634
   0.91452543  0.77732488 -0.91931364  1.07339804]
 [ 1.73858281  0.49176051 -0.34546561 -1.48181224  1.38433342 -0.77722206
   1.32728758 -0.98435264  1.52660473 -1.34533979]
 [-0.64363997 -1.57978311  0.12637576  0.35023776 -0.26132466 -0.69690742
  -0.17212127  1.33883756 -0.70905329 -0.93963799]]
y [1 0 1 1 1 1 1 0 0 1]


## Step 2

In [2]:
smax = 0.5

def determine_scores_and_flip(X, smax):
    scores = np.linalg.norm(X, axis=1)
    
    flip_indices = np.where(scores > smax / 2)[0]
    
    X_flipped = np.copy(X)
    X_flipped[flip_indices] = -X_flipped[flip_indices]
    
    flipped_scores = np.linalg.norm(X_flipped, axis=1)
    scores = np.minimum(scores, smax - flipped_scores)
    
    return X_flipped, scores


X_flipped, scores = determine_scores_and_flip(X, smax)

print("Flipped Expression Matrix:")
print(X_flipped)
print("\nScores:")
print(scores)


Flipped Expression Matrix:
[[-0.17792082 -1.17138209 -0.80976414 -1.4001992   1.44197537 -1.81236127
   1.37434054 -0.13901821 -0.86217824 -0.14698969]
 [ 0.03588597  0.70758008  1.71563934 -0.45089211  0.50578642 -0.36398456
   0.6953512   1.27082802  0.76041604 -1.06459005]
 [ 1.23697768 -0.62422059 -1.12496505  0.71951683 -0.82475303  0.70221634
  -0.91452543 -0.77732488  0.91931364 -1.07339804]
 [-1.73858281 -0.49176051  0.34546561  1.48181224 -1.38433342  0.77722206
  -1.32728758  0.98435264 -1.52660473  1.34533979]
 [ 0.64363997  1.57978311 -0.12637576 -0.35023776  0.26132466  0.69690742
   0.17212127 -1.33883756  0.70905329  0.93963799]]

Scores:
[-2.97233483 -2.30126502 -2.38344464 -3.36925953 -2.10967101]


## Step 3

In [3]:
C = np.random.rand(10, n)
def compute_starting_values(X, C=None):
    scores = np.linalg.norm(X, axis=1)

    if C is None:
        min_score = np.min(scores) 
        genes_with_min_score = np.where(scores == min_score)[0]

        if len(genes_with_min_score) > 1:
            margins = compute_margins(X, scores)
            max_margin_gene = genes_with_min_score[np.argmax(margins)]
            initial_cluster_mean = X[max_margin_gene]
        else:
            initial_cluster_mean = X[genes_with_min_score[0]]
    else:
        initial_cluster_mean = np.mean(C, axis=0)

    return initial_cluster_mean


def compute_margins(X, scores):
    norms = np.linalg.norm(X, axis=1)
    max_norm = np.max(norms)
    margins = (scores - (max_norm - norms)) / max_norm
    return margins


starting_values = compute_starting_values(X)
print("Initial Cluster Mean:")
print(starting_values)

starting_values_with_cluster = compute_starting_values(X, C=C)
print("\nInitial Cluster Mean with Given Cluster:")
print(starting_values_with_cluster)


Initial Cluster Mean:
[-0.64363997 -1.57978311  0.12637576  0.35023776 -0.26132466 -0.69690742
 -0.17212127  1.33883756 -0.70905329 -0.93963799]

Initial Cluster Mean with Given Cluster:
[0.48390733 0.45232286 0.42692797 0.61851177 0.32854953]


## Step 4

In [6]:
import numpy as np

def forward_search(X, C):
    p = X.shape[0]
    scores = np.zeros(p)

    for i in range(p):
        Ci_plus = (C + X[i]) / (len(C) + 1)
        scores[i] = compute_score(Ci_plus)

    min_score = np.min(scores) 
    winning_genes = np.where(scores == min_score)[0]

    if len(winning_genes) > 1:
        margins = compute_margins(X[winning_genes], scores[winning_genes])
        max_margin_gene = winning_genes[np.argmax(margins)]
        winning_gene = X[max_margin_gene]
    else:
        winning_gene = X[winning_genes[0]]

    return winning_gene, scores

def compute_score(C):
    return np.sum(C)


def compute_margins(X, scores):
    norms = np.linalg.norm(X, axis=1)
    max_norm = np.max(norms)
    margins = (scores - (max_norm - norms))
    return margins

winning_gene = forward_search(X, C)
print("Winning Gene:")
print(winning_gene)


ValueError: operands could not be broadcast together with shapes (10,5) (10,) 

## Step 5

In [5]:
def repeat_forward_search(X):
    p, n = X.shape
    C = np.zeros(n)
    accepted = True

    while accepted:
        winning_gene = forward_search(X, C)

        updated_score = compute_score((C + winning_gene) / (len(C) + 1))

        if updated_score > compute_score(C) or (
                updated_score == compute_score(C) and np.min(compute_margins(X, scores)) < np.min(compute_margins(X, [compute_score(C)]))):
            accepted = False
        else:
            C = (C + winning_gene) / (len(C) + 1)

    return C

final_cluster = repeat_forward_search(X)
print("Final Cluster Expression Profile:")
print(final_cluster)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

## Step 6

In [24]:
def backward_search(X, C):
    p = X.shape[0]
    scores = np.zeros(p)

    for i in range(p):
        Ci_minus = np.delete(C, i, axis=0)
        Ci_minus_avg = np.mean(Ci_minus, axis=0)
        scores[i] = compute_score(Ci_minus_avg)

    min_score = np.min(scores)
    winning_genes = np.where(scores == min_score)[0]

    if len(winning_genes) > 1:
        margins = compute_margins(X[winning_genes], scores[winning_genes])
        max_margin_gene = winning_genes[np.argmax(margins)]
        winning_gene = X[max_margin_gene]
    else:
        winning_gene = X[winning_genes[0]]

    return winning_gene

winning_gene_backward = backward_search(X, C)

print("Winning gene (backward search):", winning_gene_backward)


Winning gene (backward search): [0.99197527 0.37211552 0.98458852 0.07418504 0.39711494 0.10028973
 0.45892427 0.52209353 0.64070543 0.27467434]
