In [4]:
!pip install biopython



In [5]:
from Bio import SeqIO
import numpy as np
import plotly.graph_objects as go
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.SeqUtils import MeltingTemp as mt
from Bio import pairwise2 as p2

In [6]:
import warnings
warnings.filterwarnings("ignore")

##Sequência 1

###Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome

In [7]:
sequencia1 = SeqIO.read("/content/sequence1.fasta", "fasta")

In [8]:
sequencia1

SeqRecord(seq=Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA'), id='MN908947.3', name='MN908947.3', description='MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome', dbxrefs=[])

In [9]:
print("Tamanho da sequência 1: {}".format(len(sequencia1)))

Tamanho da sequência 1: 29903


In [10]:
print(sequencia1.description)

MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome


In [11]:
teste = sequencia1.seq

In [12]:
a = teste.count("A")
c = teste.count("C")
g = teste.count("G")
t = teste.count("T")
u = teste.count("U")

bases = ['A', 'C', 'G', 'T', 'U']
num = [a, c, g, t, u]

layout = go.Layout(title="Frequência da Cada Base Nitrogenada na Sequência 1",
                   yaxis=go.layout.YAxis(title='Frequência de cada base'),
                   xaxis=go.layout.XAxis(title='Bases'))

data = [go.Bar(x=bases, y=num,  hoverinfo='x',
               text=num,
               marker=dict(color=num, colorscale='YlGnBu'),
               textposition='inside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="white"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

In [13]:
print('Temperatura GC: %0.2f' % mt.Tm_GC(sequencia1.seq))

Temperatura GC: 75.20


In [14]:
seq1_traduzida = sequencia1.translate()
print("Sequência traduzida: ")
print(str(seq1_traduzida.seq))

Sequência traduzida: 
IKGLYLPR*QTNQLSISCRSVL*TNFKICVAVTRLHA*CTHAV*LITNYCR*QDTSNSSIFCRLLTVSSVLQPIISTSRFRPGVTER*DGEPCPWFQRENTRPTQFACFTGSRRARTWLWRLRGGGLIRGTSTS*RWHLWLSRS*KRRFAST*TALCVHQTFGCSNCTSWSCYG*AGSRTRRHSVRS*W*DTWCPCPSCGRNTSGLPQGSSS*ER**RSWWP*LRRRSKVI*LRRRAWH*SL*RFSRKLEH*T*QWCYP*THA*A*RRGIHSLCR*QLLWP*WLPS*VH*RPSSTCW*SFMHFVRTTGLY*H*EGCILLP*T*A*NCLVHGTF*KEL*IADTF*N*IGKEI*HLQWGMSKFCISLKFHNQDYSTKG*KEKA*WLYG*NSICLSSCVTK*MQPNVPFNSHEV*SLW*NFMADGRFC*SHLRILWH*EFD*RRCHYLWLLTPKCCC*NLLSSMSQFRSRT*A*SCRIP**IWLENHSS*GWSHYCLWRLCVLLCWLP*QVCLLGSTC*R*HRL*PYRCCWRRFRRS**QPS*NTPKRESQHQYCW*L*T**RDRHYFGIFFCFHKCFCGNCERFGL*SIQTNC*ILW*F*SYKRKS*KRCLEYW*TEINTESSLCICIRGCSCCTINFLPHS*NCSKFCACFTEGRYNNTRWNFTVFTETH*CYDVHI*FGY*QSSCNGLHYRWCCSVDFAVAN*HLWHCL*KTQTRP*LA*REV*GRCRVS*RRLGNC*IYLNLCL*NCRWTNCHLCKGN*GECSDIL*ACK*IFGFVC*LYHYWWS*T*SLEFR*NICHALKGIVQKVC*IQRRNWPTHASKSPKRNYLLRGRNTSHRSVNRGSCLENW*FTTIRTTY**SC*SSIGWYTSLY*RAYVARNQRHRKVLCPCT*YDGNKQYLHTQRRCTNKGYFW**HCDRSARLQECEYHF*T**KD**ST**EVLCLYS*TRYRSK*VRLCCGRCCHKNFATSI*IT

In [15]:
x = ProteinAnalysis(str(seq1_traduzida.seq))

In [16]:
dic_amino = x.count_amino_acids()

In [17]:
print(type(dic_amino))

<class 'dict'>


In [18]:
lista_nomes = dic_amino.keys()
lista_nomes = list(lista_nomes)

In [19]:
lista_amino = list(dic_amino.values())

In [20]:
layout = go.Layout(title="Frequência dos Aminoácidos na Sequência 1",
                   yaxis=go.layout.YAxis(title='Frequência'),
                   xaxis=go.layout.XAxis(title='Aminoácidos'))

data = [go.Bar(x=lista_nomes, y=lista_amino,  hoverinfo='x',
               text=lista_amino,
               marker=dict(color=lista_amino, colorscale='YlGnBu'),
               textposition='outside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="gray"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

###Fração Secudária

In [21]:
x.secondary_structure_fraction()

(0.32416976020868865, 0.1974515902478178, 0.1653456406140263)

##Sequência 2 - Genoma da SARS-CoV-2

###Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/29/2020, complete genome

In [22]:
sequencia2 = SeqIO.read("/content/sequence2.fasta", "fasta")

In [23]:
seq2 = str(sequencia2.seq)

In [24]:
print("Tamanho da sequência 2: {}".format(len(seq2)))

Tamanho da sequência 2: 29854


In [25]:
print(sequencia2.description)

MT012098.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/29/2020, complete genome


In [26]:
a = seq2.count("A")
c = seq2.count("C")
g = seq2.count("G")
t = seq2.count("T")
u = seq2.count("U")

bases = ['A', 'C', 'G', 'T', 'U']
num = [a, c, g, t, u]

layout = go.Layout(title="Frequência da Cada Base Nitrogenada no genoma da SARS-CoV-2",
                   yaxis=go.layout.YAxis(title='Frequência de cada base'),
                   xaxis=go.layout.XAxis(title='Bases'))

data = [go.Bar(x=bases, y=num,  hoverinfo='x',
               text=num,
               marker=dict(color=num, colorscale='sunsetdark'),
               textposition='inside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="white"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

##Temperatura GC

In [27]:
print('Temperatura GC: %0.2f' % mt.Tm_GC(sequencia2.seq))

Temperatura GC: 75.22


In [28]:
seq2_traduzida = sequencia2.translate()
print("Sequência traduzida: ")
print(str(seq2_traduzida.seq))

Sequência traduzida: 
TFPGNKPTNFRSLVDLFSKRTLKSVWLSLGCMLSALTQYN**LITVVDRTRVTRLSSAGCLRFRPCCSRSSAHLGFVRV*PKGKMESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKETKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLG

In [29]:
y = ProteinAnalysis(str(seq2_traduzida.seq))

In [30]:
dic_amino = y.count_amino_acids()

In [31]:
lista_nomes_y = list(dic_amino.keys())
lista_amino_y = list(dic_amino.values())

In [32]:
layout = go.Layout(title="Frequência de cada Aminoácido no genoma da SARS-CoV-2",
                   yaxis=go.layout.YAxis(title='Frequência'),
                   xaxis=go.layout.XAxis(title='Aminoácidos'))

data = [go.Bar(x=lista_nomes_y, y=lista_amino_y,  hoverinfo='x',
               text=lista_amino_y,
               marker=dict(color=lista_amino_y, colorscale='sunsetdark'),
               textposition='inside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="white"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

In [33]:
y.secondary_structure_fraction()

(0.3559441262184705, 0.2023917194251834, 0.24690985830569792)

#Sequência 3 
Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IRN/K1r-26/2020 surface glycoprotein (S) gene, complete cds

In [34]:
sequencia3 = SeqIO.read("/content/sequence3.fasta", "fasta")

In [35]:
seq3 = str(sequencia3.seq)

In [36]:
print("Tamanho da sequência 3: {}".format(len(seq3)))

Tamanho da sequência 3: 3822


In [37]:
print(sequencia3.description)

MW090854.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IRN/K1r-26/2020 surface glycoprotein (S) gene, complete cds


In [38]:
print(sequencia3.name)

MW090854.1


In [39]:
a = seq3.count("A")
c = seq3.count("C")
g = seq3.count("G")
t = seq3.count("T")
u = seq3.count("U")

bases = ['A', 'C', 'G', 'T', 'U']
num = [a, c, g, t, u]

layout = go.Layout(title="Frequência da Cada Base Nitrogenada no genoma da SARS-CoV-2/human/IRN/K1r-26/2020",
                   yaxis=go.layout.YAxis(title='Frequência de cada base'),
                   xaxis=go.layout.XAxis(title='Bases'))

data = [go.Bar(x=bases, y=num,  hoverinfo='x',
               text=num,
               marker=dict(color=num, colorscale='icefire'),
               textposition='outside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="black"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

In [40]:
print('Temperatura GC: %0.2f' % mt.Tm_GC(sequencia3.seq))

Temperatura GC: 74.76


In [41]:
seq3_traduzida = sequencia3.translate()
print("Sequência traduzida: ")
print(str(seq3_traduzida.seq))

Sequência traduzida: 
MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCVFQFCNDPFLGVYYHKNNKSWMESEFRIYSNANNCTFEYVSFPFLIYLEGKQGNFNHLIDFLLKNTDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVESFNCYFPLQSYGFQPTNGVGYQPYRVVVFSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYADCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLN

In [42]:
w = ProteinAnalysis(str(seq3_traduzida.seq))

In [43]:
dic_amino = w.count_amino_acids()

In [44]:
lista_nomes_w = list(dic_amino.keys())
lista_amino_w = list(dic_amino.values())

In [45]:
layout = go.Layout(title="Frequência de cada Aminoácido no genoma da SARS-CoV-2/human/IRN/K1r-26/2020",
                   yaxis=go.layout.YAxis(title='Frequência'),
                   xaxis=go.layout.XAxis(title='Aminoácidos'))

data = [go.Bar(x=lista_nomes_w, y=lista_amino_w,  hoverinfo='x',
               text=lista_amino_w,
               marker=dict(color=lista_amino_w, colorscale='icefire'),
               textposition='outside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="gray"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

##Sequência 4

###Middle East respiratory syndrome-related coronavirus isolate HCoV-EMC/2012, complete genome

In [46]:
sequencia4 = SeqIO.read("/content/sequence4.fasta", "fasta")

In [47]:
seq4 = str(sequencia4.seq)
print("Tamanho da sequência 4: {}".format(len(seq4)))

Tamanho da sequência 4: 30119


In [48]:
print(sequencia4.description)

NC_019843.3 Middle East respiratory syndrome-related coronavirus isolate HCoV-EMC/2012, complete genome


In [49]:
a = seq4.count("A")
c = seq4.count("C")
g = seq4.count("G")
t = seq4.count("T")
u = seq4.count("U")

bases = ['A', 'C', 'G', 'T', 'U']
num = [a, c, g, t, u]

layout = go.Layout(title="Frequência da Cada Base Nitrogenada no genoma da HCoV-EMC/2012",
                   yaxis=go.layout.YAxis(title='Frequência de cada base'),
                   xaxis=go.layout.XAxis(title='Bases'))

data = [go.Bar(x=bases, y=num,  hoverinfo='x',
               text=num,
               marker=dict(color=num, colorscale='viridis'),
               textposition='inside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="black"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

In [50]:
print('Temperatura GC: %0.2f' % mt.Tm_GC(sequencia4.seq))

Temperatura GC: 76.54


In [51]:
seq4_traduzida = sequencia4.translate()
print("Sequência traduzida: ")
print(str(seq4_traduzida.seq))

Sequência traduzida: 
DLSE*LGYLTSPRSLAEL*F*RT*IKALLFSVSLHLSGGIVALICLLI*AVDICSTLGIILIEYYFSVRASCLLYVSVTIHGFVRCVAIRGTSCLSWLV*PRKVRAVRIEQRSTLKNIKTMCL*LCHSVVQETWLKNFHHGSWMAKMPMKW*RPCYLKRSHFSMCPSGWLDTLDTSQVLVCTWLRGSLLVKIHSWLTNWLIALVQMAAWLAQLCRASLLVCSSLMTSNLSQESKIFSCASMAVVVITTPHSTMSETTPLALSGWTILRRILKANMPRICLRS*LAVMSLQLTNTCVALMENPLVPTHF*WPRME*PNWLMLKRTSQHVLMTKASSH*RTIYIDWFGMLSVKTFHILSNLFLLLIVWSKRMVLKTLLLTILLLDAKF*RSPHATSGVAFLTCPSNKNSFTPSMVRSHLRTQPTFTTPHSLSVEVVVMIPGLQGMLSKGLPVDVGHHIQLMMSKSNHLA*LSQMLFFVLLAPLLRVIAVLLIANIQLLSWLVTFLNAVMLLLILSPSHLSLVA*LTPTLDVRKVLCTLCLELSLLSQGLETPSLQAVLALGTRSLKLLTCSWNRLSIPLTLWESSLSTMLSSQFSLEPQLMLTKYASFSKVSPLTSCVII*LTMT*QSLPAHSWIMLLMLVVQDYSMPPLLHLM*FSLA*VSPLRKLQPYRIRFATLLRILWLIMLTACCTEFFLMTWILVCHPLVNYFLIALIFQ*LLPIF*SASCKIRLATLCLQLLLPAKLLLVSF*IHVLKLQKQHLTSC*IWQDCSESFSAMPMCTLHKGLWWSMAKFLHLSNKC*TCLIRVCNFCIQRSPGLVLKSLLLSTAAGSL*YSHREPITVSPLRLSPFNKILTLFCLVSFPRSS*DCSNLLTILQLLVLLYPVTWLKLLWVNLSKLICIVLML**VTMSLLVKNCLCVVRKKTDLPSTLLALMVMLYRLSLDLREVHL*KK*PLAVIKYMRLLL*EVLLSSTTFMLY*

In [52]:
z = ProteinAnalysis(str(seq4_traduzida.seq))

In [53]:
dic_amino = z.count_amino_acids()

In [54]:
lista_nomes_z = list(dic_amino.keys())
lista_amino_z = list(dic_amino.values())

In [55]:
layout = go.Layout(title="Frequência da cada Aminoácido no genoma da HCoV-EMC/2012",
                   yaxis=go.layout.YAxis(title='Frequência'),
                   xaxis=go.layout.XAxis(title='Aminoácidos'))

data = [go.Bar(x=lista_nomes_z, y=lista_amino_z,  hoverinfo='x',
               text=lista_amino_z,
               marker=dict(color=lista_amino_z, colorscale='viridis'),
               textposition='outside',textfont=dict(
                                                  family="sans serif",
                                                  size=16,
                                                  color="black"))]

fig = go.Figure(data=data, layout=layout)
fig.show()

In [56]:
z.secondary_structure_fraction()

(0.3915728658232892, 0.1806952883753362, 0.2574957665106086)

#Alinhamento


In [57]:
seq1_curta = str(sequencia1.seq)[:800] 

In [58]:
len(seq1_curta)

800

In [59]:
seq1_curta

'ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG'

In [60]:
seq2_curta = str(sequencia2.seq)[:800] 
seq3_curta = str(sequencia3.seq)[:800] 
seq4_curta = str(sequencia4.seq)[:800] 

In [61]:
alinhamento1 = p2.align.globalxx(seq1_curta, seq2_curta)
for i in alinhamento1:
  print(p2.format_alignment(*i))

ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG-------------
             |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

In [64]:
alinhamento2 = p2.align.globalxx(seq1_curta, seq3_curta)
for i in alinhamento2:
  print(p2.format_alignment(*i))

ATTAAAG---GTTTATAC-CTTCCCAGGTAACAAACCA---A---CCAACTTTCGA-TCTCTT-GT-AGATC-TGTTC--TCT-A-AAC--GAACTTTAA-AATCTGTGTGG-C----TGTCACT-CGG-CTGCA-TG-CTTAGTGCACTCACGCA-G-TA-TAATTAATA-AC--T-A-ATTACT-GTCGTTGA-CAGGA-CACG--AGTAACTCGTCTATC-TTC---TGCAGG-CT-G--CTTACGG-TTTCGT---CCG--TGTTGCAGCCGATCATCAG---CACAT-CTAGGTTT-C--GTC-CGGGTGTG-ACCGAAA-GGTA--AGATGGAGAGCCTT-G-T--CCCTGGTTTCAACGAGAAA-ACACACGTCC--AACTC-A-G-T-TTGCCTG-TTTTACAGG-TTCGCGACGTGCTC-GTAC-GTGGCT----T--TGGA-GACTCCG--TGGA-----GG-AGG-T-CTTATCAGA---G--GCAC---GTCA---AC--ATCT-T-AA-AGATG-GC-ACTTG--TGGCTTAGTAGA-AGTTGAAAAAGG-C-GT-T-TTGCCTCAACTT--GA-ACA-GC--CCTAT----G--TGTTC-AT--C-A-AA---C-------GTTCGGATGCTCGAAC--TGCACC-T-CATGGTCA-TGTTATGGTTGAGCTGGTA--GCAGAACTCGAAG--GCA-TTCAGTACGGTCG--TAGTGGTGAGACA-CTTGGTGTCCTTGTCCCTC--ATGTG------G--GCGAAATACCAGTGGCT--T---A--C--C-GCAAGGTT--CTT-CTTCGTA-AGAACGGTAATAA-AGGAGCTGGTGGCC-AT---AG---T-TA--CG---GCG-C-CG---ATCTAA---AGT-CATT-TGA-CT---T-AGGCGA-----CGAGCTTGGCACT-GAT-CCTTAT-GA--AGATTTT-C-AA--G--A--AA-A-C--T-GG----AACACT

In [65]:
alinhamento3 = p2.align.globalxx(seq1_curta, seq4_curta)
for i in alinhamento3:
  print(p2.format_alignment(*i))

-ATTA-AAGGTTT-A-TAC-CTTCCCAGG-TAA-CAA--AC--CAACCAACTTTCGAT-CTCTTGT-AGAT-CT--G-TTCTCTAAACGAACTT---TAAAAT-C--TGTGTG----GCTGTCACTCGGCT-GCATGCTTAGTGCACTCACGCAGT---ATAAT-TA---ATAACTAATTA--C-TG-TCG-T-TGA--CAG-G-ACACG-A-G-T-AACT-C-G--TC-TA-T-CT--TCTGCAGGC-TG-CTTACGGTTT--C-GT-----C--CGTGT-TGCA--GC--CGATCATCAG--CACA-T-CTA-GGTTTCGTCCGGGTGTGACCGAAA-GGTA-AGAT--GGAGAGC-C-T--TGTCCCTGGTTTCAACGA-GAAAACACACGTCCAACTCAGT-T-TG-CCTGTTTTAC--A-GGTT-CGCGACGTGCT-CGTACGT-G-GCTTT-G-G---AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAA--C-AT----CT-TAAAGA-TG-GC-ACT-TGTGGCTT-AGTAGAA----G-TTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCT-CGAAC--TGCACCTCATGGTCATGTTA-TGGTTGA-G-C--TGGT-AGC--AGAACTCGAAGG--C-A-TTCAGTAC---G-G----TC-GTAG-TGG-TGAGACACTTGG--T-GT-C-CTTG-TCCC---TCA-T-GTGGGC-GAAATACCAGTGGCTTACCGCA-AGGT-TC-TT-CTTCGT-AAGAA-C-------GGTAATAA--AGGAGCTGGTGGCC--ATAG-T-TA---C----GGC-GCC-GA-TCT---A-AAGTCATTTGACTTAGGCGACGA-GC-T-T-GGC-ACTGAT-C--C--TTATGA-A--GAT--TT-TCA-AGA-AAACTGGAACACTAAA----C--------A--TA-G-CA-GTGGTG-T

In [66]:
alinhamento4 = p2.align.globalxx(seq2_curta, seq3_curta)
for i in alinhamento4:
  print(p2.format_alignment(*i))

ACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT-GTTCTCTAAACGAACTTTAAA---ATCT-GTG-TGGCT-GTCACTCGGCTGCATGCTTAGTGCA-CT--CA-CGCAGTA--T-AATTA--------ATA-ACTAATTACTGT-C----GT--TG---AC--AG----GACACGA-GT----A-A-C-TC-GTCTAT--C-TTC---TGCAGG-CT-G--CTTACGG-TTTCGT---CCG--TGTTGCAGCCGATCATCAG---CACAT-CTAGGTTT-C--GTC-CGGGTGTG-ACCGAAA-GGTA--AGATGGAGAGCCTT-G-T--CCCTGGTTTCAACGAGAAA-ACACACGTCC--AACTC-A-G-T-TTGCCTG-TTTTACAGG-TTCGCGACGTGCTC-GTAC-GTGGCT----T--TGGA-GACTCCG--TGGA-----GG-AGG-T-CTTATCAGA---G--GCAC---GTCA---AC--ATCT-T-AA-AGATG-GC-ACTTG--TGGCTTAGTAGA-AGTTGAAAAAGG-C-GT-T-TTGCCTCAACTT--GA-ACA-GC--CCTAT----G--TGTTC-AT--C-A-AA---C-------GTTCGGATGCTCGAAC--TGCACC-T-CATGGTCA-TGTTATGGTTGAGCTGGTA--GCAGAACTCGAAG--GCA-TTCAGTACGGTCG--TAGTGGTGAGACA-CTTGGTGTCCTTGTCCCTC--ATGTG------G--GCGAAATACCAGTGGCT--T---A--C--C-GCAAGGTT--CTT-CTTCGTA-AGAACGGTAATAA-AGGAGCTGGTGGCC-AT---AG---T-TA--CG---GCG-C-CG---ATCTAA---AGT-CATT-TGA-CT---T-AGGCGA-----CGAGCTTGGCACT-GAT-CCTTAT-GA--AGATTTT-C-AA--G--A--AA-A-C--T-GG----AACACTA--A--

In [67]:
alinhamento5 = p2.align.globalxx(seq2_curta, seq4_curta)
for i in alinhamento5:
  print(p2.format_alignment(*i))

-ACCTTCCCAGGTAAC---AA-A-C----CA-A-C-CAACTT----TCGAT-CTCTTGT-AGAT-CT--G-TTCTCTAAACGAACTT---TAAAAT-C--TGTGTG----GCTGTCACTCGGCT-GCATGCTTAGTGCACTCACGCAGT---ATAAT-TA---ATAACTAATTA--C-TG-TCG-T-TGA--CAG-G-ACACG-A-G-T-AACT-C-G--TC-TA-T-CT--TCTGCAGGC-TG-CTTACGGTTT--C-GT-----C--CGTGT-TGCA--GC--CGATCATCAG--CACA-T-CTA-GGTTTCGTCCGGGTGTGACCGAAA-GGTA-AGAT--GGAGAGC-C-T--TGTCCCTGGTTTCAACGA-GAAAACACACGTCCAACTCAGT-T-TG-CCTGTTTTAC--A-GGTT-CGCGACGTGCT-CGTACGT-G-GCTTT-G-G---AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAA--C-AT----CT-TAAAGA-TG-GC-ACT-TGTGGCTT-AGTAGAA----G-TTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCT-CGAAC--TGCACCTCATGGTCATGTTA-TGGTTGA-G-C--TGGT-AGC--AGAACTCGAAGG--C-A-TTCAGTAC---G-G----TC-GTAG-TGG-TGAGACACTTGG--T-GT-C-CTTG-TCCC---TCA-T-GTGGGC-GAAATACCAGTGGCTTACCGCA-AGGT-TC-TT-CTTCGT-AAGAA-C-------GGTAATAA--AGGAGCTGGTGGCC--ATAG-T-TA---C----GGC-GCC-GA-TCT---A-AAGTCATTTGACTTAGGCGACGA-GC-T-T-GGC-ACTGAT-C--C--TTATGA-A--GAT--TT-TCA-AGA-AAACTGGAACACTAAA----C--------A--TA-G-CA-GTGGTG-TTAC-C-C

In [69]:
alinhamento6 = p2.align.globalxx(seq3_curta, seq4_curta)
for i in alinhamento6:
  print(p2.format_alignment(*i))

ATG-TTT--GTTTT---T--CTTGT--T-T-T-A-TTGCCAC-TA-GT-CTCTAGT-CAGTG---TGTTA-ATCTTAC-AACCAGAACTC-AAT-TACC----CCCTGCATAC-----A-C-TAATTCT-TTCA-CACGTGGTGT-T--T---ATTACCC-TGA-CA--AAGTTTT-CA--GA-TC--CT---CAGTTTT--ACAT-T-C--AACTCA--GGAC-T-TG--TTCTTACC-TT---T-CT-TTTC--CAATGTTA---CTTG--GT-TCCATGCTAT--ACATGTCTCTGGGA-C-CAATGGTACTAAGA-GGTTT-GATAACCCTG-TC-C-TAC--CATTTAATGAT-GGTGTTTATTTTGCTTCCACTGAGA--A-GTCTAACATAATAA-GA-GGCTGGATTTTTGGT-ACTACTTTAGATTCG-AAGACCCAGT-C-C-C--TACTTATTGTTAAT--A--A-CGCTACTAATG-T-TGTTATTAAAGTCTGTGTATTTCAATTTTGT-AATGATCCATTTTTGGGTGT-T-TATTAC----CACAAAAACAA-CAAAAGT-TGGAT---GGAAAG--TGAGTTCAGAA----TTT-A---T--TCT-A---ATG-CGAATAATTGCACT-T----T--TGAAT----ATGTCT-CTTTT---------CCTT--TTCT-TATT-TA-CC--T-----TGAAG--GAA-A-ACAGGG-TA-ATTTCAA--TC--A--TC-T-GAT-TG-ACTTTCTT--TTAAAGAATACAGATGGT-T-ATT--TT---AAAAT--AT--AT--TCTAAGCACACGCCTATTAA---TT-TAG-TGCGTGA-T-C---T--C--CCTCAGGGTTTTTCGGC-----TTT--AG---AA-CC-ATTGGTA-GATT-TG-CCAA-TAGGTATTA-ACATC-ACTAGG-TT-TCA-A--A---CTT----TACTTGCTTTACA-TAG---AAGTTATT

In [None]:
alinhamento1 = p2.align.globalxx(seq1_curta, seq2_curta)
for i in alinhamento1:
  print(p2.format_alignment(*i))