In [None]:
import pandas as pd

In [None]:
import hashlib
from Bio.Align import substitution_matrices
from Bio.SubsMat import MatrixInfo

def calculate_blosum_similarity(seq1, seq2, matrix):
    score = 0
    for aa1, aa2 in zip(seq1, seq2):
        if (aa1, aa2) in matrix:
            score += matrix[(aa1, aa2)]
        else:
            score += matrix[(aa2, aa1)]  # Check reverse pair
    return score

def generate_sequence_hash(sequence:str, matrix):
    sequence_score = sum(matrix[(sequence[i], sequence[i+1])] for i in range(len(sequence) - 1))
    return hashlib.sha256(str(sequence_score).encode()).hexdigest()

# Example sequences
sequence1 = "MDSRGEWVG"
sequence2 = "MDSPLEWVK"

# Use BLOSUM62 matrix
blosum62 = MatrixInfo.blosum62

# Calculate similarity score
similarity_score = calculate_blosum_similarity(sequence1, sequence2, blosum62)

# Generate hash values for sequences
hash1 = generate_sequence_hash(sequence1, blosum62)
hash2 = generate_sequence_hash(sequence2, blosum62)

print("Similarity Score:", similarity_score)
print("Hash of Sequence 1:", hash1)
print("Hash of Sequence 2:", hash2)