## Archivo:

### Header

 * GenBank                           gi|gi-number|gb|accesión|location

 * EMBL Data Library                 gi|gi-number|emb|accesión|location
 
 * DDBJ, DNA Database of Japan       gi|gi-number|dbj|accesión|location (lat=xXx,lng=xXx)
 
https://classroom.google.com/u/0/c/NTQ5MzQ3MDU2NzNa/a/MTA5Mjg2OTUyOTc0/details

In [93]:
from enum import Enum
import re

class SequenceType(Enum):
    NUCLEIC_ACID = 1
    AMINO_ACID = 2

class Header:
    def __init__(self, seqHeader):
        header = seqHeader.split('|')
        if (len(header) < 6):
            raise Exception(f"Error on Header: {seqHeader}")
        self.genBankId = header[1]
        self.db = header[2]
        self.accession = header[3]
        self.locus = header[4]
        try:
            self.location = Location(header[5])
        except Exception:
            raise Exception(f"Error on Location: {seqHeader}")

    def __repr__(self):
        return f"{self.genBankId} - {self.db} - {self.accession} - {self.locus} - {self.location}"

class Location:
    def __init__(self, location):
        header = re.search('lat=([0-9.,]*),lng=([0-9.,]*)', location)
        if (header == None):
            raise Exception("Location not found")
        self.lat = header.group(1)
        self.lng = header.group(2)

    def __repr__(self):
        return f"(lat={self.lat}, lng={self.lng})"

class Sequence:
    def __init__(self, seq):
        self.sequenceType = checkSequence(seq)
        self.header = Header(seq.id)
        self.seq = seq.seq
    
    def size(self):
        return len(self.seq)
    
    def __repr__(self):
        return f"Header: {self.header} | Type: {self.sequenceType} | Seq: {self.seq}"


In [103]:
nucleicAcids = ["A", "C", "G", "T", "U", "R", "Y", "K", "M", "S", "W", "B", "D", "H", "V", "N", "X", "-"]
aminoAcids = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "Y", "Z", "X", "*", "-"]

def checkSequenceIsNucleicAcid(seq):
    for elem in seq:
        if (elem not in nucleicAcids):
            return False
    return True

def checkSequenceIsAminoAcid(seq):
    for elem in seq:
        if (elem not in aminoAcids):
            return False
    return True

def checkSequence(seq):
    if (checkSequenceIsNucleicAcid(seq)):
        return SequenceType.NUCLEIC_ACID
    if (checkSequenceIsAminoAcid(seq)):
        return SequenceType.AMINO_ACID
    raise Exception(f"Sequence not valid: {seq.id}")

def isAlignment(sequences):
    seqLength = sequences[0].size()
    containsAlignmentMarks = False
    for sequence in sequences:
        if (sequence.size() != seqLength):
            return False
        containsAlignmentMarks = containsAlignmentMarks or (('X' or '*' or '-') in sequence.seq)
    return containsAlignmentMarks


In [81]:
from Bio import SeqIO

In [82]:
def readFasta(fileName):
    sequences = list(map(Sequence, SeqIO.parse(fileName, "fasta")))
    if (isAlignment(sequences)):
        
    else:
        print ("not Aligment")

In [104]:
readFasta("1fasta.txt")

not Aligment
