In [1]:
import numpy as np
from enum import Enum
from IPython.core.display import HTML,display
import pandas as pd

# TP final Heurística GRASP para MSA

## Needleman-Wunsch del TP anterior

In [2]:
class op(Enum):
    MA_MM = 0
    GAP_A = 1
    GAP_B = 2
    EMPTY = 3


In [3]:
score_mtx = np.full((4,4),-1) + np.identity(4) *2
score_mtx

array([[ 1., -1., -1., -1.],
       [-1.,  1., -1., -1.],
       [-1., -1.,  1., -1.],
       [-1., -1., -1.,  1.]])

In [4]:
amino= "ACGT"

### TRACEBACK

In [5]:

def traceback(solution_table, seqA, seqB):
    seqA_align = ""
    seqB_align = ""
    i = len(seqA)
    j = len(seqB)
    while i > 0 or j > 0:
        if solution_table[i, j] == op.GAP_A:
            seqA_align = "-" + seqA_align
            seqB_align = seqB[j-1] + seqB_align
            j -= 1
        elif solution_table[i, j] == op.GAP_B:
            seqA_align = seqA[i-1] + seqA_align
            seqB_align = "-" + seqB_align
            i -= 1
        else: # es decir, match / mismatch
            i -= 1
            j -= 1
            seqA_align = seqA[i] + seqA_align
            seqB_align = seqB[j] + seqB_align
    return seqA_align, seqB_align



### Needleman-Wunsch

In [6]:
def nw(seqA_str, seqB_str, score_mtx, gap_score = 0):
    dim_A = len(seqA_str)
    dim_B = len(seqB_str)
    
    table = get_init_value_table(dim_A,dim_B,gap_score)
    memorization_table = get_init_memo_table(dim_A,dim_B)
    for i in range(1,dim_A+1):
        for j in range(1,dim_B+1):
            aminoacid_A = amino.index(seqA_str[i-1])
            aminoacid_B =amino.index(seqB_str[j-1])
            
            match_or_mismatch_value = table[i-1][j-1] + score_mtx[aminoacid_A][aminoacid_B]
            
            gap_A_value = table[i][j-1] + gap_score
            gap_B_value = table[i-1][j] + gap_score
            values = [match_or_mismatch_value, gap_A_value, gap_B_value]
            index_min = max(range(len(values)), key=values.__getitem__)
            table[i][j] = values[index_min]
                        
            memorization_table[i][j] = op(index_min)                     
            
    aln_a, aln_b = traceback(memorization_table,seqA_str, seqB_str)
    return aln_a, aln_b, table[dim_A, dim_B]


def get_init_memo_table(dim_A,dim_B):
    table = np.full((dim_A+1,dim_B+1),op.EMPTY)
    for i in range(1,dim_B+1):
        table[0][i] = op.GAP_A
    for j in range(1,dim_A+1):
        table[j][0] = op.GAP_B
    return table
    

def get_init_value_table(x,y,gap_score):
    table = np.zeros((x+1, y+1))
    for i in range(0,y+1):
        table[0,i] = gap_score * i
    for j in range(0,x+1): 
        table[j,0] = gap_score * j
    return table



-------

# Profile

La clase Profile contendra:
- El conteo de los nucleotidos de las secuencias alineadas
- Una lista con todas las alineaciones al momento
- La cantidad de alineaciones alojadas en profile
- El tamaño de las secuencias alojadas
- Un parser sobre nucleotidos.

In [7]:
class Profile:

    def __init__(self):
        self.counter = [[],[],[],[]]
        self.sequences = []
        self.size_sequences= 0
        self.seq_numbers = 0
        self.parser= {"A":0,"C":1,"G":2,"T":3}

    def add_sequence(self, sequence):
        if(self.size_sequences < len(sequence)):
            for a in range(0,4):
                self.counter[a] = self.counter[a] + ([0] * (len(sequence)-self.size_sequences))
            self.size_sequences= len(sequence)
        self.seq_numbers += 1
        self.sequences.append(sequence)
        for idx, val in enumerate(sequence):
            if(val != "-"):
                self.counter[self.parser[val]][idx] += 1
    
    def add_gap_counter(self, position):
        for idx, seq in enumerate(self.sequences):
            self.sequences[idx] = self.insert_gap(seq,position)
        for a in range(0,4):
            self.counter[a].insert(position,0)
        self.size_sequences += 1
    
    def insert_gap(self, a_string, position):
        return a_string[:position] + "-" + a_string[position:]
    
    def get_count_position(self,position):
        count = []
        for a in range(0,4):
            count.append( self.counter[a][position] / self.seq_numbers)
        return tuple(count)
        #return self.counter[0,position], self.counter[1,position], self.counter[2,position], self.counter[3,position]
        
    def print_seq(self):
        print(np.array(self.sequences).reshape(self.seq_numbers,1))
        display(headers_table(np.array(self.counter),"ACGT",range(1,self.size_sequences+1)))
        
    def print_seq_period(self,i,f):
        print(np.array(self.sequences).reshape(self.seq_numbers,1))
        display(headers_table(np.array(self.counter)[i:f],"ACGT",range(1,self.size_sequences+1)[i:f]))
        
            
def headers_table(table, header,col_header):
    row_labels = [label for label in header]
    column_labels = [label for label in col_header]
    df = pd.DataFrame(table,index=row_labels,columns=column_labels)
    table_html = df.to_html()
    return HTML(table_html)

#### Pruebas sobre profile

In [8]:
p = Profile()
p.add_sequence("TA-TA")
p.add_sequence("T-CT")

p.add_gap_counter(2)
p.add_gap_counter(5)
p.print_seq()
p.size_sequences
p.get_count_position(0)

[['TA--T-A']
 ['T--CT-']]


Unnamed: 0,1,2,3,4,5,6,7
A,0,1,0,0,0,0,1
C,0,0,0,1,0,0,0
G,0,0,0,0,0,0,0
T,2,0,0,0,2,0,0


(0.0, 0.0, 0.0, 1.0)

In [9]:
a_string= [0,0,0,0,0]
a_string.insert(2,2)
a_string

[0, 0, 2, 0, 0, 0]

### Needleman-Wunsch MSA

In [10]:
def nw_msa(seq_str, profile, score_mtx, gap_score = 0):
    dim_seq = len(seq_str)
    dim_profile = profile.size_sequences
    
    table = get_init_value_table(dim_seq,dim_profile,gap_score)
    memorization_table = get_init_memo_table(dim_seq,dim_profile)
    for i in range(1,dim_seq+1):
        for j in range(1,dim_profile+1):
            nucleotide = nucleotide_index(seq_str[i-1])
            
            a, c, g, t = profile.get_count_position(j-1)
            score_position = a * score_mtx[nucleotide][nucleotide_index("A")] + c * score_mtx[nucleotide][nucleotide_index("C")]+ g * score_mtx[nucleotide][nucleotide_index("G")]+ t * score_mtx[nucleotide][nucleotide_index("T")]
            
            match_or_mismatch_value = table[i-1][j-1] + score_position
            
            gap_seq_value = table[i][j-1] + gap_score
            gap_profile_value = table[i-1][j] + gap_score
            values = [match_or_mismatch_value, gap_seq_value, gap_profile_value]
            index_min = max(range(len(values)), key=values.__getitem__)
            table[i][j] = values[index_min]
                        
            memorization_table[i][j] = op(index_min)                     
    aln_seq, gaps_profile = traceback_msa(memorization_table,seq_str, profile)
    return aln_seq, gaps_profile, table[dim_seq, dim_profile]

nucleotides= "ACGT"

def nucleotide_index(nucleotide):
    return nucleotides.index(nucleotide)

### traceback msa

In [11]:

def traceback_msa(solution_table, seq, profile):
    seq_align = ""
    gaps_profile = []
    i = len(seq)
    j = profile.size_sequences
    while i > 0 or j > 0:
        if solution_table[i, j] == op.GAP_A:
 
            seq_align = "-" + seq_align
            j -= 1
        elif solution_table[i, j] == op.GAP_B:
            seq_align = seq[i-1] + seq_align
            gaps_profile.append(i-1)
            i -= 1
        else: # es decir, match / mismatch
            i -= 1
            j -= 1
            seq_align = seq[i] + seq_align
    return seq_align, gaps_profile

### Pruebas sobre Needleman-Wunsch MSA

In [12]:
seq1= "TATA"
seq2= "TCT"
alig1,alig2,score = nw(seq1,seq2,score_mtx,0)

In [13]:
p = Profile()

p.add_sequence(alig1) #"TA-TA"
p.add_sequence(alig2) #"T-CT-"

p.print_seq()

[['TA-TA']
 ['T-CT-']]


Unnamed: 0,1,2,3,4,5
A,0,1,0,0,1
C,0,0,1,0,0
G,0,0,0,0,0
T,2,0,0,2,0


In [14]:
seq_align, gaps_profile, score = nw_msa("TTTT", p, score_mtx, -1)

for position_gap in gaps_profile:
    p.add_gap_counter(position_gap)
p.add_sequence(seq_align)

p.print_seq()

[['TA-TA']
 ['T-CT-']
 ['T-TTT']]


Unnamed: 0,1,2,3,4,5
A,0,1,0,0,1
C,0,0,1,0,0
G,0,0,0,0,0
T,3,0,1,3,1


### Prueba datos reales

cargamos los datos de un archivo y lo parseamos para obtener solo sus secuencias.

In [15]:
def read_fasta(fp):
        name, seq = None, []
        for line in fp:
            line = line.rstrip()
            if line.startswith(">"):
                if name: yield (name, ''.join(seq))
                name, seq = line, []
            else:
                seq.append(line)
        if name: yield (name, ''.join(seq))
seqs = []
with open('10.fas') as fp:
    for name, seq in read_fasta(fp):
        seqs.append(seq)
seqs

['GCGGGTCACTGAGGGCTGGGATGAGGACGGCCACCACTTCGAGGAGTCCCTTCACTACGAGGGCAGGGCCGTGGACATCACCACGTCAGACAGGGACAAGAGCAAGTACGGCACCCTGTCCAGACTGGCGGTGGAAGCTGGGTTCGACTGGGTCTACTATGAGTCCAAAGCGCACATCCACTGCTCTGTGAAAGCAGAAAGCTCAGTCGCTGCAAAGTCGGGCGGTTGCTTCCCAGGATCCTCCACGGTCACCCTGGAAAATGGCACCCAGAGGCCCGTCAAAGATCTCCAACCCGGGGACAGAGTACTGGCCGCGGATTACGACGGAAACCCGGTTTATACCGACTTCATCATGTTCAA',
 'CTACGGCAGAAGAAGACATCCGAAAAAGCTGACACCTCTCGCCTACAAGCAGTTCATACCTAATGTCGCGGAGAAGACCTTAGGGGCCAGCGGCAGATACGAGGGCAAGATAACGCGCAATTCGGAGAGATTTAAAGAACTTACTCCAAATTACAATCCCGACATTATCTTTAAGGATGAGGAGAACACG',
 'CTACGGCAGAAGAAGACATCCCAAGAAGCTGACACCTCTCGCCTACAAGCAGTTTATACCTAATGTCGCGGAGAAGACCTTAGGGGCCAGCGGCAGATACGAGGGCAAGATCACGCGCAATTCGGAGAGATTTAAAGAACTTACTCCAAATTACAATCCCGACATTATCTTTAAGGATGAGGAGAACACT',
 'TGCTGCTGCTGGCGAGATGTCTGCTGGTGCTGCTTGTCTCCTCGCTGTTGATGTGCTCGGGGCTGGCGTGCGGACCCGGCAGGGGATTTGGCAAGAGGCGGAACCCCAAAAAGCTGACCCCTTTAGCCTACAAGCAGTTTATCCCCAACGTGGCGGAGAAGACCCTAGGGGCCAGTGGAAGATATGAGGGGAAGATCACCAGAAACTCAGAGCGATTTAAGGAACTCACCCCCAATTACAACC

### Creacion de entorno

Alineamos las dos primeras secuencias

In [16]:
seq1= seqs[0]
seq2= seqs[1]
alig1,alig2,score = nw(seq1,seq2,score_mtx,-3)

Creamos nuestro profile y agregamos los dos alineamientos al profile

In [17]:
p = Profile()

p.add_sequence(alig1) 
p.add_sequence(alig2)

p.print_seq()

[['GCGGGTCACTGAGGGCTGGGATGAGGACGGCCACCACTTCGAGGAGTCCCTTCACTACGAGGGCAGGGCCGTGGACATCACCACGTCAGACAGGGACAAGAGCAAGTACGGCACCCTGTCCAGACTGGCGGTGGAAGCTGGGTTCGACTGGGTCTACTATGAGTCCAAAGCGCACATCCACTGCTCTGTGAAAGCAGAAAGCTCAGTCGCTGCAAAGTCGGGCGGTTGCTTCCCAGGATCCTCCACGGTCACCCTGGAAAATGGCACCCAGAGGCCCGTCAAAGATCTCCAACCCGGGGACAGAGTACTGGCCGCGGATTACGACGGAAACCCGGTTTATACCGACTTCATCATGTTCAA']
 ['-C---T-AC----GGC----A-GA--A--G--A--A----GA-CA-T-CC--GA--A--A----A-AG-C-T-G--A-CACCTC-TC-G-C--CTAC-A-AGC-AGTTC-ATA-CC--T--A-A-TGTC-GCGG-A-----G-AAGAC-----CT--TA-GGGGCCAGCG-GCAGAT--AC-G---AG-G---GC---AAGAT-A-ACGC-GC-AA-T---TCGG-AG------A-GA---T------T-----T---AAA--G-A-ACTTA----C-TCCAA-AT-TACAATCC--CGAC--ATTA-T-----C--TTTA--A-GG--ATGAGG-----A--GA----A-CA----C-G']]


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360
A,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,0,0,1,0,2,0,0,2,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,2,0,1,2,0,2,0,0,1,2,0,0,1,0,0,1,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,0,2,0,0,1,0,0,0,0,2,1,1,0,0,0,0,2,0,2,0,0,0,2,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,2,2,0,1,0,0,2,0,1,0,0,0,0,0,0,1,2,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,2,2,2,0,0,0,0,2,0,1,0,1,0,2,0,0,0,0,0,0,0,0,1,2,2,0,2,0,0,0,1,0,2,2,0,0,0,0,0,0,0,2,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,2,0,0,0,1,1,2,0,0,1,0,0,0,0,0,1,0,2,0,0,0,2,0,0,0,0,2,0,0,2,0,0,0,0,0,1,1
C,0,2,0,0,0,0,1,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,2,2,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,2,0,0,0,0,0,1,0,0,2,0,2,2,0,2,0,0,2,0,0,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,0,0,2,0,0,1,0,1,2,2,0,0,0,1,1,0,0,0,1,0,0,0,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,1,0,2,0,1,0,0,1,1,0,2,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,2,0,2,0,0,2,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,2,0,0,0,0,0,1,1,2,0,0,2,1,0,0,0,0,0,1,0,1,2,0,0,1,2,2,0,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,1,1,0,2,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,2,0,0,0,0,0,2,0,0
G,1,0,1,1,1,0,0,0,0,0,1,0,1,2,2,0,0,1,1,1,0,0,2,0,1,1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,2,0,0,1,0,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,2,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,2,1,0,1,2,0,2,2,0,0,1,0,0,1,1,2,0,0,0,2,0,0,0,1,1,1,0,0,0,0,0,0,0,0,2,1,2,1,0,0,0,1,0,2,0,2,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,2,0,2,0,0,0,2,0,0,1,0,0,0,2,0,0,0,0,1,0,0,2,0,0,2,0,0,0,0,1,0,0,1,1,1,0,2,2,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,2,2,0,0,0,0,1,0,2,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1
T,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,2,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,2,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,2,0,0,2,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,2,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0


#### Vamos a alinear las secuencias restantes

In [18]:
for fa in seqs[2:]:
    seq_align, gaps_profile, score = nw_msa(fa, p, score_mtx, -10)

    for position_gap in gaps_profile:
        p.add_gap_counter(position_gap)
    p.add_sequence(seq_align)
        
p.print_seq()

[['GCGGGTCACTGAGGGCTGGGATGAGGACGGCCACCACTTCGAGGAGTCCCTTCACTACGAGGGCAGGGCCGTGGACATCACCACGTCAGACAGGGACAAGAGCAAGTACGGCACCCTGTCCAGACTGGCGGTGGAAGCTGGGTTCGACTGGGTCTACTATGAGTCCAAAGCGCACATCCACTGCTCTGTGAAAGCAGAAAGCTCAGTCGCTGCAAAGTCGGGCGGTTGCTTCCCAGGATCCTCCACGGTCACCCTGGAAAATGGCACCCAGAGGCCCGTCAAAGATCTCCAACCCGGGGACAGAGTACTGGCCGCGGATTACGACGGAAACCCGGTTTATACCGACTTCATCATGTTCAA']
 ['-C---T-AC----GGC----A-GA--A--G--A--A----GA-CA-T-CC--GA--A--A----A-AG-C-T-G--A-CACCTC-TC-G-C--CTAC-A-AGC-AGTTC-ATA-CC--T--A-A-TGTC-GCGG-A-----G-AAGAC-----CT--TA-GGGGCCAGCG-GCAGAT--AC-G---AG-G---GC---AAGAT-A-ACGC-GC-AA-T---TCGG-AG------A-GA---T------T-----T---AAA--G-A-ACTTA----C-TCCAA-AT-TACAATCC--CGAC--ATTA-T-----C--TTTA--A-GG--ATGAGG-----A--GA----A-CA----C-G']
 ['-C---T-AC----GGC----A-GA--A--G--A--A----GA-CA-TCCC---A--A-GA----A--G-C-T-G--A-CACCTC-TC-G-C--CTAC-A-AGC-AGTT-TATA-CC--T--A-A-TGTC-GCGG-A-----G-AAGAC-----CT--TA-GGGGCCAGCG-GCAGAT--AC-G---AG-G---GC---AAGATCA--CGC-GC-AA-T---TCGG-AG------A-GA---T------T-----T---AAA--G-

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360
A,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,1,0,4,0,0,3,0,0,3,0,0,0,0,0,3,0,0,3,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,3,0,0,3,0,0,3,0,0,0,0,4,0,1,0,0,0,0,0,0,0,1,0,3,0,0,3,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,3,0,1,3,0,3,0,0,1,3,0,0,1,0,0,2,0,4,0,0,0,0,0,0,0,0,3,0,4,0,0,0,0,0,0,0,0,0,0,1,4,0,0,0,0,0,0,0,3,3,0,4,0,0,0,0,0,0,0,0,2,0,0,4,0,0,1,0,0,0,0,4,2,2,1,1,0,0,3,0,4,0,0,0,3,0,0,0,0,0,0,3,0,0,0,1,1,2,0,0,2,1,1,3,4,0,2,0,0,4,0,1,0,0,0,0,0,0,2,4,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,1,0,4,0,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,1,3,4,4,0,0,0,0,4,0,3,0,1,0,4,0,0,0,0,0,0,0,0,1,4,4,0,4,0,0,0,2,0,3,4,0,0,0,0,1,0,0,4,0,1,0,4,0,0,4,0,0,0,1,0,1,0,0,0,1,1,0,0,4,0,0,4,0,0,0,1,1,4,0,0,3,0,0,0,0,0,2,0,3,0,0,0,4,1,0,0,0,4,0,0,4,0,1,0,0,0,1,1
C,0,3,0,0,1,0,1,0,4,0,0,1,0,0,0,4,0,0,0,0,0,0,0,0,1,0,0,2,0,0,1,1,0,2,1,0,2,0,0,1,0,0,0,3,0,0,0,2,4,4,0,1,1,0,2,0,0,1,0,0,0,0,0,1,0,0,0,0,1,3,0,0,0,0,0,1,0,0,3,0,3,3,0,4,0,0,4,0,0,0,3,0,0,2,0,0,4,0,0,0,0,0,4,0,0,0,0,0,3,0,0,1,0,2,4,4,0,0,0,1,2,0,0,0,1,0,0,0,3,0,0,2,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,3,0,0,1,0,0,3,0,0,1,0,0,1,0,0,0,1,4,4,0,0,2,0,1,0,4,0,1,0,0,2,2,1,4,0,0,1,0,1,0,0,1,0,1,0,0,0,4,0,0,0,1,0,0,1,0,2,0,0,0,4,0,4,1,0,4,0,0,1,0,0,1,0,0,0,4,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,2,2,0,1,1,0,1,0,0,0,2,0,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,3,0,0,0,0,0,1,1,3,0,0,3,2,0,0,0,0,0,1,0,1,4,0,0,1,4,4,0,0,2,0,0,3,0,0,0,0,0,0,2,0,1,0,1,1,0,4,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,2,2,1,0,1,0,0,1,0,0,4,0,0,0,1,1,4,0,0
G,1,0,1,2,1,0,1,0,0,0,2,0,1,4,4,0,0,2,1,2,0,0,4,0,1,1,1,0,1,4,1,0,1,0,0,1,0,0,0,0,4,0,1,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,3,0,1,1,2,0,0,1,1,4,0,0,1,0,1,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,4,0,0,0,2,2,2,0,0,0,0,2,0,4,0,0,0,4,0,1,0,2,2,0,0,0,0,0,0,2,0,1,0,0,1,0,0,0,4,1,0,1,4,0,4,4,0,0,1,0,0,1,2,4,0,0,0,4,0,0,1,2,1,2,1,0,0,0,0,0,0,0,3,2,3,2,0,0,0,2,0,3,0,4,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,4,0,3,0,0,0,3,0,0,1,1,0,0,4,0,0,0,0,1,0,0,3,0,0,3,0,0,0,0,2,0,0,2,2,1,0,4,4,0,0,4,0,0,0,0,0,0,0,1,4,0,0,0,0,0,0,0,0,1,2,2,1,0,0,0,0,1,0,2,2,0,0,0,0,0,1,4,0,0,0,0,0,0,2,0,2,2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,1,1,4,0,0,0,1,0,1,0,0,0,0,1,1,1,0,2,0,2,1,0,0,0,0,0,1,0,0,4,4,0,0,0,0,2,0,3,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1
T,0,0,1,0,0,4,0,0,0,2,0,0,1,0,0,0,1,0,0,0,0,2,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,2,2,0,0,0,1,0,0,0,4,0,0,0,2,1,0,0,0,2,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,4,0,0,0,0,0,1,0,0,0,0,2,0,0,4,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,4,2,0,1,0,2,0,0,0,0,1,0,3,0,0,0,0,0,0,3,0,2,0,0,0,1,0,0,0,0,0,1,2,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,3,0,0,3,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,3,0,0,0,0,2,0,0,2,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,4,1,0,0,2,0,0,0,1,0,0,0,0,0,0,4,0,0,0,2,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,4,0,0,0,0,0,0,3,0,0,0,0,0,4,0,0,0,0,0,0,1,0,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,0,4,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,3,0,0,4,0,0,0,0,0,0,0,0,3,4,4,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,2,1,0,1,0,0,0,0,0,0,2,2,0,0,1,0,0,1,0,1,1,0,1,0


### Guardamos los datos obtenidos en un archivo output.txt

In [19]:
data = ""
for idx, align in enumerate(p.sequences):
    data += "seq"+str(idx) +"\n"+ align +"\n" 
f = open("output.txt", "w")
f.write(data)
f.close()