In [1]:
from Bio import Entrez
from Bio import SeqIO
from Bio import Seq
Entrez.email = "joanaoliveira1000@gmail.com"

In [2]:
id_ncbi = "NC_000011.10"
inicio = "2445008"
fim = "2849110"

In [4]:
handle = Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=id_ncbi, seq_start = inicio, seq_stop = fim)
seq_record = SeqIO.read(handle, "gb")
print(seq_record)

ID: NC_000011.10
Name: NC_000011
Description: Homo sapiens chromosome 11, GRCh38.p14 Primary Assembly
Database cross-references: BioProject:PRJNA168, Assembly:GCF_000001405.40
Number of features: 39
/molecule_type=DNA
/topology=linear
/data_file_division=CON
/date=06-APR-2022
/accessions=['NC_000011', 'REGION:', '2445008..2849110']
/sequence_version=10
/keywords=['RefSeq']
/source=Homo sapiens (human)
/organism=Homo sapiens
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/references=[Reference(title='Human chromosome 11 DNA sequence and analysis including novel gene identification', ...), Reference(title='Finishing the euchromatic sequence of the human genome', ...), Reference(title='Initial sequencing and analysis of the human genome', ...)]
/comment=REFSEQ INFORMATION: The reference sequence is identical to
CM000673.2.
On Feb 3, 2014 this 

In [6]:
SeqIO.write(seq_record, "KCNQ1.gb", "genbank")

1

In [7]:
seq_record = SeqIO.read("KCNQ1.gb", "gb")
seq_record.description

'Homo sapiens chromosome 11, GRCh38.p14 Primary Assembly'

In [8]:
print(len(seq_record.features))


39


In [9]:
for k in seq_record.features: 
    print(k.type,":", k.location, "\n")

source : [0:404103](+) 

gene : [0:404103](+) 

mRNA : join{[0:477](+), [82920:83011](+), [125620:125747](+), [126317:126396](+), [127005:127102](+), [127838:127979](+), [138427:138538](+), [140204:140300](+), [142562:142685](+), [143705:143847](+), [216953:217074](+), [323836:323912](+), [330952:331047](+), [331978:332025](+), [332968:333030](+), [402759:404098](+)} 

CDS : join{[91:477](+), [82920:83011](+), [125620:125747](+), [126317:126396](+), [127005:127102](+), [127838:127979](+), [138427:138538](+), [140204:140300](+), [142562:142685](+), [143705:143847](+), [216953:217074](+), [323836:323912](+), [330952:331047](+), [331978:332025](+), [332968:333030](+), [402759:402996](+)} 

misc_feature : [91:175](+) 

misc_feature : [169:172](+) 

misc_feature : [274:343](+) 

misc_feature : join{[454:477](+), [82920:82960](+)} 

misc_feature : join{[82975:83011](+), [125620:125647](+)} 

misc_feature : join{[125731:125747](+), [126317:126364](+)} 

misc_feature : join{[126388:126396](+),

In [10]:
cds = []
for i in range(len(seq_record.features)):
    if seq_record.features[i].type == "CDS":
        cds.append(i)
print("Número de CDS:", len(cds))
print()
#print(cds)
for k in cds:
    print(k)
    print("Localização:")
    print(seq_record.features[k].location)
    print("Sequência:", seq_record.features[k].extract(seq_record.seq), "\n")

Número de CDS: 2

3
Localização:
join{[91:477](+), [82920:83011](+), [125620:125747](+), [126317:126396](+), [127005:127102](+), [127838:127979](+), [138427:138538](+), [140204:140300](+), [142562:142685](+), [143705:143847](+), [216953:217074](+), [323836:323912](+), [330952:331047](+), [331978:332025](+), [332968:333030](+), [402759:402996](+)}
Sequência: ATGGCCGCGGCCTCCTCCCCGCCCAGGGCCGAGAGGAAGCGCTGGGGTTGGGGCCGCCTGCCAGGCGCCCGGCGGGGCAGCGCGGGCCTGGCCAAGAAGTGCCCCTTCTCGCTGGAGCTGGCGGAGGGCGGCCCGGCGGGCGGCGCGCTCTACGCGCCCATCGCGCCCGGCGCCCCAGGTCCCGCGCCCCCTGCGTCCCCGGCCGCGCCCGCCGCGCCCCCAGTTGCCTCCGACCTTGGCCCGCGGCCGCCGGTGAGCCTAGACCCGCGCGTCTCCATCTACAGCACGCGCCGCCCGGTGTTGGCGCGCACCCACGTCCAGGGCCGCGTCTACAACTTCCTCGAGCGTCCCACCGGCTGGAAATGCTTCGTTTACCACTTCGCCGTCTTCCTCATCGTCCTGGTCTGCCTCATCTTCAGCGTGCTGTCCACCATCGAGCAGTATGCCGCCCTGGCCACGGGGACTCTCTTCTGGATGGAGATCGTGCTGGTGGTGTTCTTCGGGACGGAGTACGTGGTCCGCCTCTGGTCCGCCGGCTGCCGCAGCAAGTACGTGGGCCTCTGGGGGCGGCTGCGCTTTGCCCGGAAGCCCATTTCCATCATCGACCTCATCGTGGTCGTGGCCTCCATGGTGGTCCTCT

# Tipo de Features

In [19]:
genes = [] 
mrna = []  
regulatory = []
ncRNA = []
cds = []
miscfeature = []
source = []
sigpeptide = []
for c in range(len(seq_record.features)):
    if seq_record.features[c].type == "gene":
        genes.append(c)
    if seq_record.features[c].type == "mRNA":
        mrna.append(c)
    if seq_record.features[c].type == "regulatory":
        regulatory.append(c)
    if seq_record.features[c].type == "ncRNA":
        ncRNA.append(c)
    if seq_record.features[c].type == "CDS":
        cds.append(c)
    if seq_record.features[c].type == "misc_feature":
        miscfeature.append(c)
    if seq_record.features[c].type == "source":
        source.append(c)
    if seq_record.features[c].type == "sig_peptide":
        sigpeptide.append(c)

print("Número de genes:", len(genes))
print("Número de mRNA:", len(mrna))
print("Número de regulatory:", len(regulatory))
print("Número de ncRNA:",len(ncRNA))
print("Número de CDS:",len(cds))
print("Número de misc_feature:", len(miscfeature))
print("Número de source:", len(source))
print("Número de sigpeptide:", len(sigpeptide))

Número de genes: 6
Número de mRNA: 2
Número de regulatory: 2
Número de ncRNA: 4
Número de CDS: 2
Número de misc_feature: 21
Número de source: 1
Número de sigpeptide: 1


In [20]:
print("""Isoformas""", "\n")
for k in cds: 
    print("Protein_id:", *seq_record.features[k].qualifiers["protein_id"])
    print("Produto:", *seq_record.features[k].qualifiers["product"])
    print("Sequência:", *seq_record.features[k].qualifiers["translation"])
    print("Tamanho:", len(*seq_record.features[k].qualifiers["translation"]), '\n')

Isoformas 

Protein_id: NP_000209.2
Produto: potassium voltage-gated channel subfamily KQT member 1 isoform 1
Sequência: MAAASSPPRAERKRWGWGRLPGARRGSAGLAKKCPFSLELAEGGPAGGALYAPIAPGAPGPAPPASPAAPAAPPVASDLGPRPPVSLDPRVSIYSTRRPVLARTHVQGRVYNFLERPTGWKCFVYHFAVFLIVLVCLIFSVLSTIEQYAALATGTLFWMEIVLVVFFGTEYVVRLWSAGCRSKYVGLWGRLRFARKPISIIDLIVVVASMVVLCVGSKGQVFATSAIRGIRFLQILRMLHVDRQGGTWRLLGSVVFIHRQELITTLYIGFLGLIFSSYFVYLAEKDAVNESGRVEFGSYADALWWGVVTVTTIGYGDKVPQTWVGKTIASCFSVFAISFFALPAGILGSGFALKVQQKQRQKHFNRQIPAAASLIQTAWRCYAAENPDSSTWKIYIRKAPRSHTLLSPSPKPKKSVVVKKKKFKLDKDNGVTPGEKMLTVPHITCDPPEERRLDHFSVDGYDSSVRKSPTLLEVSMPHFMRTNSFAEDLDLEGETLLTPITHISQLREHHRATIKVIRRMQYFVAKKKFQQARKPYDVRDVIEQYSQGHLNLMVRIKELQRRLDQSIGKPSLFISVSEKSKDRGSNTIGARLNRVEDKVTQLDQRLALITDMLHQLLSLHGGSTPGSGGPPREGGAHITQPCGSGGSVDPELFLPSNTLPTYEQLTVPRRGPDEGS
Tamanho: 676 

Protein_id: NP_861463.1
Produto: potassium voltage-gated channel subfamily KQT member 1 isoform 2 precursor
Sequência: MDFLIVLVCLIFSVLSTIEQYAALATGTLFWMEIVLVVFFGTEYVVRLWSAGCRSKYVGLWGRLRFAR

In [21]:
proteinas = []
id_proteinas = []
for k in cds:
    proteinas.append(*seq_record.features[k].qualifiers["translation"])
    id_proteinas.append(*seq_record.features[k].qualifiers["protein_id"])

In [22]:
genes = []
for i in range(len(seq_record.features)):
    if seq_record.features[i].type == "gene":
        genes.append(i)
for k in genes:
    print(seq_record.features[k].location.strand)
    print(seq_record.features[k].qualifiers["gene"])

1
['KCNQ1']
-1
['LOC124902613']
-1
['KCNQ1OT1']
1
['COX6CP18']
-1
['LOC124902614']
-1
['KCNQ1-AS1']


# Proteína

Isoforma de interesse biológico segundo a literatura: potassium voltage-gated channel subfamily KQT member 1 isoform 1(NP_000209.2)

In [25]:
Entrez.email = "joanaoliveira1000@@gmail.com"             
handle = Entrez.efetch(db="protein", rettype="gb", retmode="text", id="NP_000209.2")
seq_record_pro = SeqIO.read(handle, "gb")
print(seq_record_pro)

ID: NP_000209.2
Name: NP_000209
Description: potassium voltage-gated channel subfamily KQT member 1 isoform 1 [Homo sapiens]
Number of features: 24
/topology=linear
/data_file_division=PRI
/date=28-DEC-2022
/accessions=['NP_000209']
/sequence_version=2
/db_source=REFSEQ: accession NM_000218.3
/keywords=['RefSeq', 'MANE Select']
/source=Homo sapiens (human)
/organism=Homo sapiens
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/references=[Reference(title='Purification and membrane interactions of human KCNQ1100-370 potassium ion channel', ...), Reference(title="To Modify or Not to Modify: Allele-Specific Effects of 3'UTR-KCNQ1 Single Nucleotide Polymorphisms on Clinical Phenotype in a Long QT 1 Founder Population Segregating a Dominant-Negative Mutation", ...), Reference(title='KCNJ11 and KCNQ1 Gene Polymorphisms and Placental Expression in 

Guardar sequência da proteína em formato fasta

In [26]:
SeqIO.write(seq_record_pro, "potassium voltage-gated channel subfamily KQT member 1 isoform 1.fasta", "fasta")

1

Referências externas

In [27]:
ref_externas =seq_record.dbxrefs
print(ref_externas)

['BioProject:PRJNA168', 'Assembly:GCF_000001405.40']
