In [1]:
from Bio import SeqIO
from Bio import AlignIO 
from enum import Enum

In [2]:
nucleicAcids = ["A", "C", "G", "T", "U", "R", "Y", "K", "M", "S", "W", "B", "D", "H", "V", "N", "X", "-"]
aminoAcids = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "Y", "Z", "X", "*", "-"]

class SequenceType(Enum):
    NUCLEIC_ACID = 1
    AMINO_ACID = 2


In [34]:
def checkSequenceIsNucleicAcid(seq):
    for elem in seq:
        if (elem not in nucleicAcids):
            return False
    return True

def checkSequenceIsAminoAcid(seq):
    for elem in seq:
        if (elem not in aminoAcids):
            return False
    return True

def checkSequence(seq):
    if (checkSequenceIsNucleicAcid(seq)):
        return SequenceType.NUCLEIC_ACID
    if (checkSequenceIsAminoAcid(seq)):
        return SequenceType.AMINO_ACID
    raise Exception(f"Sequence not valid: {seq.id}")

def checkAllSameType(sequences):
    if (not sequences):
        raise Exception("Not found sequences")
    sequenceType = None
    for sequence in sequences:
        if (not sequenceType):
          sequenceType = checkSequence(sequence)
        if (checkSequence(sequence) != sequenceType):
            raise Exception(f"Sequence {sequence.id} is not a {sequenceType}")
    return sequenceType

In [4]:
def isAlignment(sequences):
    seqLength = None
    containsAlignmentMarks = False
    for sequence in sequences:
        if (not seqLength):
            seqLength = sequence.length
        if (sequence.length != seqLength):
            return False
        containsAlignmentMarks = containsAlignmentMarks or ('-' in sequence.seq)
    return containsAlignmentMarks

In [33]:
def readFasta(fileName):
    sequences = SeqIO.parse(fileName, "fasta")
    sequenceType = checkAllSameType(sequences)
    if (not isAlignment(sequences)):
        print("Not Alignment")
        sequences = AlignIO.parse(open(fileName), "fasta")
        output_handle = open("example.sth", "w")
        AlignIO.write(sequences, output_handle, "fasta")
    print("Alignment", sequences)


In [37]:
readFasta("fasta.txt")

Not Alignment
Alignment <generator object parse at 0x000001F263BA8848>
