In [1]:
import numpy as np
from simanneal import Annealer
from scipy.spatial import distance
import random
import time

class BinaryVectorOptimization(Annealer):

    def move(self):
        """Perform a single mutation on the current state (vector)."""
        idx = random.randint(0, len(self.state) - 1)
        self.state[idx] = not self.state[idx]

    def energy(self):
        """Calculate fitness of the current state (vector)."""
        num_vectors = int(np.sqrt(len(self.state)))
        dim = int(np.sqrt(len(self.state)))
        binary_vectors = np.transpose(np.reshape(self.state, (dim, num_vectors)))
        distances = distance.squareform(distance.pdist(binary_vectors, 'hamming'))
        return np.sum((distances - self.distance_matrix) ** 2)


def generate_initial_state(dim, num_vectors):
    return np.random.randint(2, size=dim*num_vectors)


def find_best_binary_vectors_sa(dim, num_vectors, distance_matrix, num_trials):
    best_error = float('inf')
    best_vectors = None
    best_calculated_distance_matrix = None

    for trial in range(num_trials):
        initial_state = generate_initial_state(dim, num_vectors)
        bvo = BinaryVectorOptimization(initial_state)
        bvo.distance_matrix = distance_matrix
        state, e = bvo.anneal()

        binary_vectors = np.transpose(np.reshape(state, (dim, num_vectors)))
        calculated_distance_matrix = distance.squareform(distance.pdist(binary_vectors, 'hamming'))

        error = np.sum((calculated_distance_matrix - distance_matrix)**2)

        if error < best_error:
            best_error = error
            best_vectors = binary_vectors
            best_calculated_distance_matrix = calculated_distance_matrix

    return best_vectors, best_calculated_distance_matrix, best_error


AA_list = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']

# Assume simmat and simmatblo are the similarity matrices corresponding to GRANTHAM1974 and BLOSUM62 respectively
grantham = [[simmat[i, j] for i in AA_list] for j in AA_list]
blosum = [[simmatblo[i, j] for i in AA_list] for j in AA_list]

def norm_mat(matrix, zero=False):
    min_value = 0 if zero else np.min(matrix)
    max_value = np.max(matrix)
    normalized_matrix = (matrix - min_value) / (max_value - min_value)
    return normalized_matrix

norm_blosum = np.abs(norm_mat(blosum) - 1)
np.fill_diagonal(norm_blosum, 0)
norm_grant = norm_mat(grantham, True)

# Generate binary vectors using the desired pairwise similarities
distance_matrix = norm_blosum
dim = 10000
num_vectors = 20
num_trials = 1

start = time.time()
best_vectors, best_calculated_distance_matrix, best_error = find_best_binary_vectors_sa(dim, num_vectors, distance_matrix, num_trials)

print("Desired pairwise similarities (BLOSUM BIGG):")
print(norm_blosum)

print("\nBest calculated pairwise similarities:")
print(best_calculated_distance_matrix)

print(f"\nBest error: {best_error}")
print(best_vectors)

print(f"\nTime taken: {time.time() - start}")


NameError: name 'simmat' is not defined