## Step 1

In [2]:
import numpy as np
n = 5
p = 10

def generate_matrix(n, p):
  X = np.random.rand(n, p)
  means = np.mean(X, axis=0)
  stds = np.std(X, axis=0)
  X = (X - means) / stds
  y = np.random.randint(0, 2, p)
  return X,y

X,y = generate_matrix(n, p)

print("X", X)
print("y", y)

X [[ 0.91713176 -0.90243893  1.44528202  0.52352804 -1.83636012 -0.26904765
   1.28346913  0.2490139   1.02789573  1.21501068]
 [ 0.2025701   1.94689289 -0.14614529  0.47173717  0.42828842  0.24825837
  -0.38479923 -1.48500035 -1.72938369 -1.51412506]
 [-1.92477155 -0.20526708 -0.30084902 -1.45804949  0.06126227  0.94735186
  -1.3584403   0.48744529 -0.42778408 -0.6483552 ]
 [ 0.61350943 -0.39768265  0.57347027 -0.83406305  1.18995562  0.86699058
  -0.56181615  1.42665031  0.30769682  0.89915257]
 [ 0.19156026 -0.44150424 -1.57175798  1.29684732  0.15685381 -1.79355316
   1.02158656 -0.67810916  0.82157523  0.048317  ]]
y [0 1 1 1 0 1 0 1 0 1]


## Step 2

In [3]:
smax = 0.5

def determine_scores_and_flip(X, smax):
    scores = np.linalg.norm(X, axis=1)
    
    flip_indices = np.where(scores > smax / 2)[0]
    
    X_flipped = np.copy(X)
    X_flipped[flip_indices] = -X_flipped[flip_indices]
    
    flipped_scores = np.linalg.norm(X_flipped, axis=1)
    scores = np.minimum(scores, smax - flipped_scores)
    
    return X_flipped, scores


X_flipped, scores = determine_scores_and_flip(X, smax)

print("Flipped Expression Matrix:")
print(X_flipped)
print("\nScores:")
print(scores)


Flipped Expression Matrix:
[[-0.91713176  0.90243893 -1.44528202 -0.52352804  1.83636012  0.26904765
  -1.28346913 -0.2490139  -1.02789573 -1.21501068]
 [-0.2025701  -1.94689289  0.14614529 -0.47173717 -0.42828842 -0.24825837
   0.38479923  1.48500035  1.72938369  1.51412506]
 [ 1.92477155  0.20526708  0.30084902  1.45804949 -0.06126227 -0.94735186
   1.3584403  -0.48744529  0.42778408  0.6483552 ]
 [-0.61350943  0.39768265 -0.57347027  0.83406305 -1.18995562 -0.86699058
   0.56181615 -1.42665031 -0.30769682 -0.89915257]
 [-0.19156026  0.44150424  1.57175798 -1.29684732 -0.15685381  1.79355316
  -1.02158656  0.67810916 -0.82157523 -0.048317  ]]

Scores:
[-2.92128266 -2.9578929  -2.59044557 -2.14213225 -2.63146691]


## Step 3

In [4]:
C = np.random.rand(10, n)
def compute_starting_values(X, C=None):
    scores = np.linalg.norm(X, axis=1)

    if C is None:
        min_score = np.min(scores) 
        genes_with_min_score = np.where(scores == min_score)[0]

        if len(genes_with_min_score) > 1:
            margins = compute_margins(X, scores)
            max_margin_gene = genes_with_min_score[np.argmax(margins)]
            initial_cluster_mean = X[max_margin_gene]
        else:
            initial_cluster_mean = X[genes_with_min_score[0]]
    else:
        initial_cluster_mean = np.mean(C, axis=0)

    return initial_cluster_mean


def compute_margins(X, scores):
    norms = np.linalg.norm(X, axis=1)
    max_norm = np.max(norms)
    margins = (scores - (max_norm - norms)) / max_norm
    return margins


starting_values = compute_starting_values(X)
print("Initial Cluster Mean:")
print(starting_values)

starting_values_with_cluster = compute_starting_values(X, C=C)
print("\nInitial Cluster Mean with Given Cluster:")
print(starting_values_with_cluster)


Initial Cluster Mean:
[ 0.61350943 -0.39768265  0.57347027 -0.83406305  1.18995562  0.86699058
 -0.56181615  1.42665031  0.30769682  0.89915257]

Initial Cluster Mean with Given Cluster:
[0.54905875 0.54055961 0.49127443 0.63613172 0.48151707]


## Step 4

In [5]:
import numpy as np

def forward_search(X, C):
    p = X.shape[0]
    scores = np.zeros(p)

    for i in range(p):
        Ci_plus = (C + X[i]) / (len(C) + 1)
        scores[i] = compute_score(Ci_plus)

    min_score = np.min(scores) 
    winning_genes = np.where(scores == min_score)[0]

    if len(winning_genes) > 1:
        margins = compute_margins(X[winning_genes], scores[winning_genes])
        max_margin_gene = winning_genes[np.argmax(margins)]
        winning_gene = X[max_margin_gene]
    else:
        winning_gene = X[winning_genes[0]]

    return winning_gene, scores

def compute_score(C):
    return np.sum(C)


def compute_margins(X, scores):
    norms = np.linalg.norm(X, axis=1)
    max_norm = np.max(norms)
    margins = (scores - (max_norm - norms))
    return margins

winning_gene = forward_search(X, C)
print("Winning Gene:")
print(winning_gene)


ValueError: operands could not be broadcast together with shapes (10,5) (10,) 

## Step 5

In [6]:
def repeat_forward_search(X):
    p, n = X.shape
    C = np.zeros(n)
    accepted = True

    while accepted:
        winning_gene = forward_search(X, C)

        updated_score = compute_score((C + winning_gene) / (len(C) + 1))

        if updated_score > compute_score(C) or (
                updated_score == compute_score(C) and np.min(compute_margins(X, scores)) < np.min(compute_margins(X, [compute_score(C)]))):
            accepted = False
        else:
            C = (C + winning_gene) / (len(C) + 1)

    return C

final_cluster = repeat_forward_search(X)
print("Final Cluster Expression Profile:")
print(final_cluster)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

## Step 6

In [7]:
def backward_search(X, C):
    p = X.shape[0]
    scores = np.zeros(p)

    for i in range(p):
        Ci_minus = np.delete(C, i, axis=0)
        Ci_minus_avg = np.mean(Ci_minus, axis=0)
        scores[i] = compute_score(Ci_minus_avg)

    min_score = np.min(scores)
    winning_genes = np.where(scores == min_score)[0]

    if len(winning_genes) > 1:
        margins = compute_margins(X[winning_genes], scores[winning_genes])
        max_margin_gene = winning_genes[np.argmax(margins)]
        winning_gene = X[max_margin_gene]
    else:
        winning_gene = X[winning_genes[0]]

    return winning_gene

winning_gene_backward = backward_search(X, C)

print("Winning gene (backward search):", winning_gene_backward)


Winning gene (backward search): [ 0.19156026 -0.44150424 -1.57175798  1.29684732  0.15685381 -1.79355316
  1.02158656 -0.67810916  0.82157523  0.048317  ]
