### **TCF7L2** ###

In [5]:
from Bio import Entrez
Entrez.email = 'pg49846@alunos.uminho.pt'
from Bio import SeqIO
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
from Bio import SearchIO
import re

#### Criação de ficheiros genbank e fasta ####

In [30]:
#para criar o handle e record em formato.gb

handle = Entrez.efetch(db="nucleotide", id='NG_012631', rettype="gb", retmode="text")
record = SeqIO.read(handle, 'gb')

# para criar os ficheiros genbank e fasta
SeqIO.write(record, 'TCF7L2_nucleotide.gb', 'gb')        #devolve um ficheiro no formato .genbank (nucleotidos)
SeqIO.write(record, 'TCF7L2_nucleotide.fasta', 'fasta')  #devolve um ficheiro no formato .fasta (nucleotidos)

1

#### Análise da sequência ####

In [31]:
print(f'''**ANNOTATIONS**
Sequence ID: {record.id}
Description: {record.description}
Source: {record.annotations['source']}
Sequence length: {len(record.seq)} bp
Sequence: {record.seq[0:100] + '...'}
{len(record.features)} features
''')

feat_types={}
for feat in record.features:
    feat_types[feat.type] = feat_types.get(feat.type, 0) + 1
print('Feature count:', feat_types,'\n')

for i in record.features:
    if i.type == 'gene':
        print('**Feature GENE**')
        print('Gene:', i.qualifiers['gene'])
        print('Refs:', i.qualifiers['db_xref'])
        print('Location:', i.location)
        print()
    if i.type == 'CDS':
        print('**Feature CDS**')
        print('Product:', i.qualifiers['product'])
        print('Protein ID:', i.qualifiers['protein_id'])
        print('Location:', i.location)
        print('Translated sequence:', i.qualifiers['translation'][0][0:100], '...')
        print()

print(f'''\n**COMMENTS**
{record.annotations['comment']}
''')

**ANNOTATIONS**
Sequence ID: NG_012631.1
Description: Homo sapiens transcription factor 7 like 2 (TCF7L2), RefSeqGene on chromosome 10
Source: Homo sapiens (human)
Sequence length: 224429 bp
Sequence: ATGCAGACTTGGCTTCAGGCAATAAAACGTCATAGACGTATAACTGTAGCGAGTGAAATTTCTAGGGAGCAAATTAGGCTGAAAACATCTTCTAGAGACA...
33 features

Feature count: {'source': 1, 'gene': 3, 'mRNA': 1, 'exon': 15, 'CDS': 1, 'misc_feature': 11, 'ncRNA': 1} 

**Feature GENE**
Gene: ['TCF7L2']
Refs: ['GeneID:6934', 'HGNC:HGNC:11641', 'MIM:602228']
Location: [4997:222429](+)

**Feature CDS**
Product: ['transcription factor 7-like 2 isoform 17']
Protein ID: ['NP_001354872.1']
Location: join{[5507:5696](+), [5957:6024](+), [6233:6358](+), [19306:19375](+), [94775:94877](+), [195934:196067](+), [198673:198776](+), [200761:200848](+), [205748:205874](+), [206475:206635](+), [207083:207191](+), [212771:212820](+), [213417:213468](+), [214670:214743](+), [220305:220723](+)}
Translated sequence: MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSA

### Criação de ficheiros genbank e fasta - proteina ###

In [6]:
#para criar o handle e record em formato.gb
Entrez.email = 'pg49846@alunos.uminho.pt'
handle = Entrez.efetch(db="protein", id='NP_001354872', rettype="gb", retmode="text")
record = SeqIO.read(handle, 'gb')

# para criar os ficheiros genbank e fasta
SeqIO.write(record, 'TCF7L2_prot.gb', 'gb')        #devolve um ficheiro no formato .genbank (aminoacidos)
SeqIO.write(record, 'TCF7L2_prot.fasta', 'fasta')  #devolve um ficheiro no formato .fasta (aminoacidos)

1