In [3]:
from enum import Enum

AminoAcid = Enum("AminoAcid", "A R N D C E Q G H I L K M F P S T W Y V B Z X")

class Sequence:
    """Represents a sequence of amino acids (with no further semantic).
    It is suited to compute the best alignment between two sequences."""
    
    GAP = None
    def __init__(self, filename, sequenceId):
        """Constructor. Loads the sequence from a file.
        
        Parameters:
            filename: the filename of the file containing the sequence.
            sequenceId: the identifier of the sequence in the file.
                This is the string between the two first bars "|" on the line preceding the sequence.
            """
        self.loadFromFile(filename, sequenceId)
    
    def insertAminoAcid(self, aminoAcid, index):
        """Adds an amino acid to the sequence at the specific index."""
        self.sequence.insert(index, aminoAcid)
    
    def insertGap(self, index):
        """Inserts a gap in the sequence at the specified index."""
        self.sequence.insert(index, self.GAP)
    
    def getElement(self, index):
        """Returns the elements at the specified index."""
        return self.sequence[index]

    def loadFromFile(self, filename, sequenceId):
        """Loads the sequence data from a file.
        
        Parameters:
            filename: the filename of the file containing the sequence.
            sequenceId: the identifier of the sequence in the file.
                This is the string between the two first bars "|" on the line preceding the sequence.
        """
        found = False
        with open(filename) as file:
            for line in file:
                # If we found the needed identifier line
                if line[0] == ">" and line.split("|")[1] == sequenceId:
                    found = True
                    sequence = file.readline()
                    break

        if not found:
            raise RuntimeError("Sequence not found in the specified file.")

        try:
            sequence = [AminoAcid[character] for character in sequence if not character.isspace()]
        except KeyError as e:
            raise RuntimeError("An amino acid is not valid in the sequence: " + str(e))

        # We didn't worked on self.sequence directly to preserve the internal state in case of failure
        self.sequence = sequence
    
class Score:
    """Class used to compute the score of substitution between two amino acids, by using well known matrix
    such as the BLOSUM."""
    
    def __init__(self, filename):
        """Contructor. Loads the matrix from a file.
        
        Parameter:
            filename: the filename of the matrix to load. It must be a .iij file as given in the blosum archive.
        """
        self.loadFromFile(filename)
    
    def getScore(self, aminoAcidA, aminoAcidB):
        """Returns the score of matching between two amino acids according to the loaded matrix."""
        indexA = self.indices.index(aminoAcidA)
        indexB = self.indices.index(aminoAcidB)
        
        # The matrix is triangular, so we need to test the two possibilities for indexing
        # since one of the two would result in an index error
        if len(self.matrix[indexA]) > indexB:
            return self.matrix[indexA][indexB]
        else:
            return self.matrix[indexB][indexA]
    
    def loadFromFile(self, filename):
        """Loads a substitution matrix from a file.
        
        Parameter:
            filename: the filename of the matrix to load. It must be a .iij file as given in the blosum archive.
        """
        self.matrix = []
        self.indeices = []
        
        with open(filename) as file:
            foundAminoAcidList = False
            for line in file:
                # Skip commentary lines
                if line.strip().startswith("#"):
                    continue
                
                if not foundAminoAcidList:
                    # The first non-commentary line is the list of amino acids
                    # We reload this list because some files may order the acids differently
                    self.indices = [AminoAcid[character] for character in line.split()]
                    foundAminoAcidList = True
                else:
                    self.matrix.append([float(number) for number in line.split()])
                

In [6]:
score = Score("blosum/blosum40.iij")
sequence = Sequence("SH3-sequence.fasta", "Q8IZP0")
print(sequence.sequence)

RuntimeError: An amino acid is not valid in the sequence: '\n'