**Ejercicios BioPython**

Ejercicio 1:

In [None]:
from Bio.Seq import Seq
hebra1 = Seq("ATGCCGTTAGCT")
hebra2 = hebra1.complement()

nueva_hebra1 = hebra1.complement()
print("\nDúplex 1:")
print("5' -", hebra1,       "- 3'   (vieja)")
print("3' -", nueva_hebra1,  "- 5'   (nueva)")

nueva_hebra2 = hebra2.complement()    # devuelve la secuencia de la hebra superior
print("\nDúplex 2:")
print("5' -", nueva_hebra2, "- 3'   (nueva)")
print("3' -", hebra2,  "- 5'   (vieja)")


Dúplex 1:
5' - ATGCCGTTAGCT - 3'   (vieja)
3' - TACGGCAATCGA - 5'   (nueva)

Dúplex 2:
5' - ATGCCGTTAGCT - 3'   (nueva)
3' - TACGGCAATCGA - 5'   (vieja)


Ejercicio 2:

In [2]:
from Bio import SeqIO

for secuencia in SeqIO.parse("gene.fna", "fasta"):
    dna = secuencia.seq

    print("Secuencia de ADN original (" + secuencia.id + "): ")
    print(dna)

    # Transcripción a ARNm
    arnm = dna.transcribe()
    print("\n Secuencia de ARNm:")
    print(arnm)

    # Cambio de la orientación de la hebra => transcripción de la hebra complementaria inversa
    arnm_complementaria = dna.reverse_complement().transcribe()
    print("\n Cambio Orientación: \n Secuencia de ARNm (hebra complementaria inversa):")
    print(arnm_complementaria)
    print ("\n")


Secuencia de ADN original (NC_000017.11:c7687490-7668421): 
CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAACTCATAGCTGTATATTTTAGAGCCCATGGCATCCTAGTGAAAACTGGGGCTCCATTCCGAAATGATCATTTGGGGGTGATCCGGGGAGCCCAAGCTGCTAAGGTCCCACAACTTCCGGACCTTTGTCCTTCCTGGAGCGATCTTTCCAGGCAGCCCCCGGCTCCGCTAGATGGAGAAAATCCAATTGAAGGCTGTCAGTCGTGGAAGTGAGAAGTGCTAAACCAGGGGTTTGCCCGCCAGGCCGAGGAGGACCGTCGCAATCTGAGAGGCCCGGCAGCCCTGTTATTGTTTGGCTCCACATTTACATTTCTGCCTCTTGCAGCAGCATTTCCGGTTTCTTTTTGCCGGAGCAGCTCACTATTCACCCGATGAGAGGGGAGGAGAGAGAGAGAAAATGTCCTTTAGGCCGGTTCCTCTTACTTGGCAGAGGGAGGCTGCTA

Ejercicio 3:

In [5]:
from Bio.Seq import Seq
arn = Seq("AUGUAUGCUUAA")
proteina = arn.translate(to_stop=True)
print("Proteína:", proteina)   

Proteína: MYA


Ejercicio 4:

In [1]:
import re
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Align import PairwiseAligner


FASTA = "Homo_sapiens_FGFR2_sequence.fa"


# Cargar y filtrar CDS o cDNA
buckets = {"cds": [], "cdna": []}
for r in SeqIO.parse(FASTA, "fasta"):
   d = (r.description or r.id).lower()
   tipo = "cds" if "cds" in d else ("cdna" if "cdna" in d else None)
   if not tipo: continue
   iso = re.search(r"\bFGFR2-\d+\b", r.description)
   r.id = (iso.group(0) if iso else r.id.split()[0])
   r.seq = Seq(str(r.seq).upper().replace("U", "T"))
   buckets[tipo].append(r)


# Deduplicar (más larga por isoforma)
def dedupe(recs):
   best = {}
   for r in recs:
       if r.id not in best or len(r.seq) > len(best[r.id].seq):
           best[r.id] = r
   return list(best.values())


cds = dedupe(buckets["cds"]) or dedupe(buckets["cdna"])
if len(cds) < 2:
   raise SystemExit("Necesito ≥2 isoformas (CDS o cDNA).")


# Alineador global
aln = PairwiseAligner()
aln.mode, aln.match_score, aln.mismatch_score = "global", 1, 0
aln.open_gap_score, aln.extend_gap_score = -1, -0.5


# --- Función para calcular identidad ---
def pct_id(seq1, seq2):
   a = aln.align(seq1, seq2)[0]
   A, B, co = str(a.target), str(a.query), a.coordinates
   s1 = s2 = ""
   for k in range(co.shape[1] - 1):
       a0, a1, b0, b1 = map(int, (co[0, k], co[0, k + 1], co[1, k], co[1, k + 1]))
       if a1 > a0 and b1 > b0:
           s1 += A[a0:a1]; s2 += B[b0:b1]
       elif a1 > a0:
           s1 += A[a0:a1]; s2 += "-" * (a1 - a0)
       else:
           s1 += "-" * (b1 - b0); s2 += B[b0:b1]
   return 100 * sum(x == y for x, y in zip(s1, s2)) / len(s1)




print("Comparaciones por pares (nucleótidos):")
for i in range(len(cds)):
   for j in range(i + 1, len(cds)):
       pid = pct_id(cds[i].seq, cds[j].seq)
       print(f"{cds[i].id} vs {cds[j].id}: {pid:.2f}% | len {len(cds[i])}/{len(cds[j])}")




prot = {r.id: r.seq.translate(to_stop=True) for r in cds}
print("\nComparaciones por pares (proteínas):")
for i in range(len(cds)):
   for j in range(i + 1, len(cds)):
       pid = pct_id(prot[cds[i].id], prot[cds[j].id])
       print(f"{cds[i].id} vs {cds[j].id}: {pid:.2f}% | aa {len(prot[cds[i].id])}/{len(prot[cds[j].id])}")


Comparaciones por pares (nucleótidos):
FGFR2-206 vs FGFR2-203: 99.76% | len 2466/2460
FGFR2-206 vs FGFR2-210: 90.59% | len 2466/2304
FGFR2-203 vs FGFR2-210: 90.34% | len 2460/2304

Comparaciones por pares (proteínas):
FGFR2-206 vs FGFR2-203: 99.76% | aa 821/819
FGFR2-206 vs FGFR2-210: 90.29% | aa 821/768
FGFR2-203 vs FGFR2-210: 90.05% | aa 819/768


Ejercicio 6

Sin Biopython:

In [None]:

with open("gene.fna", "r") as f:
    lineas = f.readlines() # leer todas las líneas

adn = ''.join([l.strip().upper() for l in lineas if not l.startswith('>')]) # descartamos encabezados FASTA (empiezan con >)

print(f"Secuencia original:")
print(adn)
print("\n")

# Replicación (generación de hebras complementarias)
pares = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} # Diccionario con bases complementarias
complementario = ''.join([pares[b] for b in adn])          # Hebra complementaria
molde = complementario[::-1]                               # Hebra molde (Invirtiendo hebra complementaria)

print("Paso 1: Replicación")
print(f"Hebra complementaria (5'->3'): {complementario}")
print(f"Hebra molde (3'->5'):         {molde}")
print("\n")

# Transcripción (ADN a ARNm)
arnm = molde.replace('T', 'U') # Convertir a Secuencia ARNm
print("Paso 2: Transcripción")
print(f"ARNm: {arnm}")
print("\n")

# Traducción (ARNm a proteína)
codones = {
    'UUU':'F','UUC':'F','UUA':'L','UUG':'L',
    'UCU':'S','UCC':'S','UCA':'S','UCG':'S',
    'UAU':'Y','UAC':'Y','UAA':'STOP','UAG':'STOP',
    'UGU':'C','UGC':'C','UGA':'STOP','UGG':'W',

    'CUU':'L','CUC':'L','CUA':'L','CUG':'L',
    'CCU':'P','CCC':'P','CCA':'P','CCG':'P',
    'CAU':'H','CAC':'H','CAA':'Q','CAG':'Q',
    'CGU':'R','CGC':'R','CGA':'R','CGG':'R',

    'AUU':'I','AUC':'I','AUA':'I','AUG':'M',
    'ACU':'T','ACC':'T','ACA':'T','ACG':'T',
    'AAU':'N','AAC':'N','AAA':'K','AAG':'K',
    'AGU':'S','AGC':'S','AGA':'R','AGG':'R',

    'GUU':'V','GUC':'V','GUA':'V','GUG':'V',
    'GCU':'A','GCC':'A','GCA':'A','GCG':'A',
    'GAU':'D','GAC':'D','GAA':'E','GAG':'E',
    'GGU':'G','GGC':'G','GGA':'G','GGG':'G'
}

arnm = arnm.upper() 
proteina = ""

for i in range(0, len(arnm), 3): # Recorrer ARNm de 3 en 3 (codones)
    codon = arnm[i:i+3] # Extraer el codón
    if len(codon) < 3: # Si queda menos de 1 codón entero, se para
        break
    aa = codones.get(codon, '') # Obtenemos aminoácido y lo añadimos a la proteína
    if aa == 'STOP': # si se trata de un codón de parada, se para
        break
    proteina += aa

print("Paso 3: Traducción")
print(f"Proteína traducida: {proteina}")
print("\n====================================================\n")



Secuencia original:
CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAACTCATAGCTGTATATTTTAGAGCCCATGGCATCCTAGTGAAAACTGGGGCTCCATTCCGAAATGATCATTTGGGGGTGATCCGGGGAGCCCAAGCTGCTAAGGTCCCACAACTTCCGGACCTTTGTCCTTCCTGGAGCGATCTTTCCAGGCAGCCCCCGGCTCCGCTAGATGGAGAAAATCCAATTGAAGGCTGTCAGTCGTGGAAGTGAGAAGTGCTAAACCAGGGGTTTGCCCGCCAGGCCGAGGAGGACCGTCGCAATCTGAGAGGCCCGGCAGCCCTGTTATTGTTTGGCTCCACATTTACATTTCTGCCTCTTGCAGCAGCATTTCCGGTTTCTTTTTGCCGGAGCAGCTCACTATTCACCCGATGAGAGGGGAGGAGAGAGAGAGAAAATGTCCTTTAGGCCGGTTCCTCTTACTTGGCAGAGGGAGGCTGCTATTCTCCGCCTGCATTTCTTTTTCTGGATTACTTAGTTATG

Con Biopython:

In [3]:
from Bio.Seq import Seq
from Bio import SeqIO

for secuencia in SeqIO.parse("gene.fna", "fasta"):
    dna = secuencia.seq
    print(f"Secuencia original ({secuencia.id}):")
    print(dna)
    print("\n")

    # Replicación (generación de hebras complementarias)
    complementario = dna.complement()        
    molde = dna.reverse_complement() 
    print("Paso 1: Replicación")
    print(f"Hebra complementaria (5'->3'): {complementario}")
    print(f"Hebra molde (3'->5'):         {molde}")
    print("\n")

    # Transcripción (ADN a ARNm)
    arnm = molde.transcribe()
    print("Paso 2: Transcripción")
    print(f"ARNm: {arnm}")
    print("\n")

    # Traducción (ARNm a proteína)
    proteina = arnm.translate(to_stop=True) # Solo hasta el codón Stop
    print("Paso 3: Traducción")
    print(f"Proteína traducida: {proteina}")
    
    print("\n====================================================\n")


Secuencia original (NC_000017.11:c7687490-7668421):
CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAACTCATAGCTGTATATTTTAGAGCCCATGGCATCCTAGTGAAAACTGGGGCTCCATTCCGAAATGATCATTTGGGGGTGATCCGGGGAGCCCAAGCTGCTAAGGTCCCACAACTTCCGGACCTTTGTCCTTCCTGGAGCGATCTTTCCAGGCAGCCCCCGGCTCCGCTAGATGGAGAAAATCCAATTGAAGGCTGTCAGTCGTGGAAGTGAGAAGTGCTAAACCAGGGGTTTGCCCGCCAGGCCGAGGAGGACCGTCGCAATCTGAGAGGCCCGGCAGCCCTGTTATTGTTTGGCTCCACATTTACATTTCTGCCTCTTGCAGCAGCATTTCCGGTTTCTTTTTGCCGGAGCAGCTCACTATTCACCCGATGAGAGGGGAGGAGAGAGAGAGAAAATGTCCTTTAGGCCGGTTCCTCTTACTTGGCAGAGGGAGGCTGCTATTCTCCGC

