### ANÁLISE DAS SEQUÊNCIAS E DAS FEATURES DO GENE NO NCBI

In [25]:
from Bio import Seq
from Bio import SeqIO

In [26]:
record = SeqIO.read("mRNA_seq_HHEX.gb", "genbank")
record

SeqRecord(seq=Seq('AGCTCTGCGAGGGGCCGGAGCGCGGCGGAGCCATGCAGTACCCGCACCCCGGGC...GTT'), id='NM_002729.5', name='NM_002729', description='Homo sapiens hematopoietically expressed homeobox (HHEX), mRNA', dbxrefs=[])

In [27]:
record.seq

Seq('AGCTCTGCGAGGGGCCGGAGCGCGGCGGAGCCATGCAGTACCCGCACCCCGGGC...GTT')

In [28]:
print(record.id)
print(record.name)
print(record.description)
print(len(record.annotations))
print(record.annotations["source"] )
print(len(record.features))

NM_002729.5
NM_002729
Homo sapiens hematopoietically expressed homeobox (HHEX), mRNA
13
Homo sapiens (human)
13


Verificar as anotações dos genes de interesse

In [29]:
for a, b in record.annotations.items():
    print(a, " : ", b)

molecule_type  :  mRNA
topology  :  linear
data_file_division  :  PRI
date  :  21-OCT-2022
accessions  :  ['NM_002729', 'NM_001529']
sequence_version  :  5
keywords  :  ['RefSeq', 'MANE Select']
source  :  Homo sapiens (human)
organism  :  Homo sapiens
taxonomy  :  ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
references  :  [Reference(title='CK2-induced cooperation of HHEX with the YAP-TEAD4 complex promotes colorectal tumorigenesis', ...), Reference(title='Unraveling the Influence of HHEX Risk Polymorphism rs7923837 on Multiple Sclerosis Pathogenesis', ...), Reference(title='Integrated single-cell transcriptomics and epigenomics reveals strong germinal center-associated etiology of autoimmune risk loci', ...), Reference(title='Hhex inhibits cell migration via regulating RHOA/CDC42-CFL1 axis in human lung cancer cells', ...), Reference(title='Inter

In [30]:
print(record.annotations["comment"] )

REVIEWED REFSEQ: This record has been curated by NCBI staff. The
reference sequence was derived from BC015110.1 and BM543214.1.
On Nov 22, 2018 this sequence version replaced NM_002729.4.
Summary: This gene encodes a member of the homeobox family of
transcription factors, many of which are involved in developmental
processes. Expression in specific hematopoietic lineages suggests
that this protein may play a role in hematopoietic differentiation.
[provided by RefSeq, Jul 2008].
Publication Note:  This RefSeq record includes a subset of the
publications that are available for this gene. Please see the Gene
record to access additional publications.
COMPLETENESS: full length.


In [31]:
print("Tamanho da seq: ",len(record), "bp")

Tamanho da seq:  1724 bp


Verificar e analisar a informação complementar fornecida pela lista de features e seus 
qualifiers

In [32]:
for feat in record.features:
    print("-->" , feat)
print(f"Número de features: {len(record.features)}")

--> type: source
location: [0:1724](+)
qualifiers:
    Key: chromosome, Value: ['10']
    Key: db_xref, Value: ['taxon:9606']
    Key: map, Value: ['10q23.33']
    Key: mol_type, Value: ['mRNA']
    Key: organism, Value: ['Homo sapiens']

--> type: gene
location: [0:1724](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:3087', 'HGNC:HGNC:4901', 'MIM:604420']
    Key: gene, Value: ['HHEX']
    Key: gene_synonym, Value: ['HEX; HMPH; HOX11L-PEN; PRH; PRHX']
    Key: note, Value: ['hematopoietically expressed homeobox']

--> type: exon
location: [0:393](+)
qualifiers:
    Key: gene, Value: ['HHEX']
    Key: gene_synonym, Value: ['HEX; HMPH; HOX11L-PEN; PRH; PRHX']
    Key: inference, Value: ['alignment:Splign:2.1.0']

--> type: CDS
location: [32:845](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['CCDS:CCDS7423.1', 'GeneID:3087', 'HGNC:HGNC:4901', 'MIM:604420']
    Key: gene, Value: ['HHEX']
    Key: gene_synonym, Value: ['HEX; HMPH; HOX11L-PEN; PRH; PRHX']
   

Localização e tipo

In [33]:
from Bio.SeqFeature import SeqFeature, FeatureLocation
for feat in record.features:
    print(feat.type)
    print(feat.location)

source
[0:1724](+)
gene
[0:1724](+)
exon
[0:393](+)
CDS
[32:845](+)
misc_feature
[32:443](+)
misc_feature
[188:191](+)
misc_feature
[440:842](+)
misc_feature
[611:842](+)
exon
[393:572](+)
exon
[572:623](+)
exon
[623:1724](+)
regulatory
[1691:1697](+)
polyA_site
[1723:1724](+)


Regiões codificantes

In [34]:
featcds = [ ]
for i in range(len(record.features)):
    if record.features[i].type == "CDS":
        featcds.append(i)
for k in featcds:
    print (record.features[k].location)
for k in featcds:
    print (record.features[k].extract(record.seq))

[32:845](+)
ATGCAGTACCCGCACCCCGGGCCGGCGGCGGGCGCCGTGGGGGTGCCGCTGTACGCGCCCACGCCGCTGCTGCAACCCGCACACCCGACGCCCTTTTACATCGAGGACATCCTGGGCCGCGGGCCCGCCGCGCCCACGCCCGCCCCCACGCTGCCGTCCCCCAACTCCTCCTTCACCAGCCTCGTGTCCCCCTACCGGACCCCGGTGTACGAGCCCACGCCGATCCATCCAGCCTTCTCGCACCACTCCGCCGCCGCGCTGGCCGCTGCCTACGGACCCGGCGGCTTCGGGGGCCCTCTGTACCCCTTCCCGCGGACGGTGAACGACTACACGCACGCCCTGCTCCGCCACGACCCCCTGGGCAAACCTCTACTCTGGAGCCCCTTCTTGCAGAGGCCTCTGCATAAAAGGAAAGGCGGCCAGGTGAGATTCTCCAACGACCAGACCATCGAGCTGGAGAAGAAATTCGAGACGCAGAAATATCTCTCTCCGCCCGAGAGGAAGCGTCTGGCCAAGATGCTGCAGCTCAGCGAGAGACAGGTCAAAACCTGGTTTCAGAATCGACGCGCTAAATGGAGGAGACTAAAACAGGAGAACCCTCAAAGCAATAAAAAAGAAGAACTGGAAAGTTTGGACAGTTCCTGTGATCAGAGGCAAGATTTGCCCAGTGAACAGAATAAAGGTGCTTCTTTGGATAGCTCTCAATGTTCGCCCTCCCCTGCCTCCCAGGAAGACCTTGAATCAGAGATTTCAGAGGATTCTGATCAGGAAGTGGACATTGAGGGCGATAAAAGCTATTTTAATGCTGGATGA


In [35]:
for feat in record.features:
    if feat.type == "CDS":
        print(feat)

type: CDS
location: [32:845](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['CCDS:CCDS7423.1', 'GeneID:3087', 'HGNC:HGNC:4901', 'MIM:604420']
    Key: gene, Value: ['HHEX']
    Key: gene_synonym, Value: ['HEX; HMPH; HOX11L-PEN; PRH; PRHX']
    Key: note, Value: ['homeobox, hematopoietically expressed; proline-rich homeodomain-containing transcription factor; homeobox protein HEX; homeobox protein PRH']
    Key: product, Value: ['hematopoietically-expressed homeobox protein HHEX']
    Key: protein_id, Value: ['NP_002720.1']
    Key: translation, Value: ['MQYPHPGPAAGAVGVPLYAPTPLLQPAHPTPFYIEDILGRGPAAPTPAPTLPSPNSSFTSLVSPYRTPVYEPTPIHPAFSHHSAAALAAAYGPGGFGGPLYPFPRTVNDYTHALLRHDPLGKPLLWSPFLQRPLHKRKGGQVRFSNDQTIELEKKFETQKYLSPPERKRLAKMLQLSERQVKTWFQNRRAKWRRLKQENPQSNKKEELESLDSSCDQRQDLPSEQNKGASLDSSQCSPSPASQEDLESEISEDSDQEVDIEGDKSYFNAG']



In [36]:
featcds

[3]

Proteína codificada e o seu significado biológico

In [37]:
from Bio.SeqFeature import SeqFeature, FeatureLocation
for feat in record.features:
    if feat.type == 'CDS':
        print(feat.qualifiers['product'])

['hematopoietically-expressed homeobox protein HHEX']


In [38]:
for feat in record.features:
    if feat.type == 'gene':
        print(feat.qualifiers["note"])

['hematopoietically expressed homeobox']


In [39]:
for feat in record.features:
    if feat.type == "CDS":
        print(feat.qualifiers['translation'])

['MQYPHPGPAAGAVGVPLYAPTPLLQPAHPTPFYIEDILGRGPAAPTPAPTLPSPNSSFTSLVSPYRTPVYEPTPIHPAFSHHSAAALAAAYGPGGFGGPLYPFPRTVNDYTHALLRHDPLGKPLLWSPFLQRPLHKRKGGQVRFSNDQTIELEKKFETQKYLSPPERKRLAKMLQLSERQVKTWFQNRRAKWRRLKQENPQSNKKEELESLDSSCDQRQDLPSEQNKGASLDSSQCSPSPASQEDLESEISEDSDQEVDIEGDKSYFNAG']


In [40]:
from Bio import SeqIO
records = SeqIO.parse("mRNA_seq_HHEX.gb","genbank")
count = SeqIO.write(records, "HHEX.fasta","fasta")
print(f'Foi convertido {count} registo para FASTA.')

Foi convertido 1 registo para FASTA.
