In [1]:
#importação dos diferentes packages

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqFeature

In [2]:
#ler o ficheiro

import re

fonte = open("MRSA.gb",'r')
MRSA = fonte.read()
fonte.close()
print( MRSA )

LOCUS       MA355827                 442 bp    DNA     linear   PAT 31-AUG-2018
DEFINITION  JP 2018068315-A/160: SEQUENCES FOR DETECTION AND IDENTIFICATION OF
            METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE
            XXI.
ACCESSION   MA355827
VERSION     MA355827.1
KEYWORDS    JP 2018068315-A/160.
SOURCE      Staphylococcus aureus
  ORGANISM  Staphylococcus aureus
            Bacteria; Bacillota; Bacilli; Bacillales; Staphylococcaceae;
            Staphylococcus.
REFERENCE   1  (bases 1 to 442)
  AUTHORS   Menard,C. and Roger-dalbert,C.
  TITLE     SEQUENCES FOR DETECTION AND IDENTIFICATION OF METHICILLIN-RESISTANT
            STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE XXI
  JOURNAL   Patent: JP 2018068315-A 160 10-MAY-2018;
            GENEOHM SCIENCES CANADA INC
COMMENT     OS   Staphylococcus aureus
            PN   JP 2018068315-A/160
            PD   10-MAY-2018
            PF   26-DEC-2017 JP 2017250217
            PR   06-APR-2012 US 61/621368
      

In [3]:
#imprimir o link NCBI

existe = re.search(r'ACCESSION\s+[^\s]+', MRSA)
if existe:
    m = re.match(r'ACCESSION\s+([^\s]+)', existe[0] )
    if m:
        id = m.group(1)
        print( "https://www.ncbi.nlm.nih.gov/nuccore/{}".format( id ) )
else:
    print( "Padrão não encontrado" )

https://www.ncbi.nlm.nih.gov/nuccore/MA355827


In [4]:
#imprimir o título

existe = re.findall(r'TITLE\s+.*?(?=JOURNAL)', MRSA, re.DOTALL)
if existe:
    for title in existe:
        m = re.match( r'TITLE\s+(.+)', title, re.DOTALL )
        print( re.sub(r'\s+', ' ', m.group(1) ) )

SEQUENCES FOR DETECTION AND IDENTIFICATION OF METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE XXI 


In [6]:
#imprimir o locus e a source

flag = False
source = ""
locusid = ""
fonte = open("MRSA.gb",'r')
for linha in fonte:
    if re.search(r"^LOCUS", linha):
        s = re.match(r"LOCUS\s+([^ ]+)", linha)
        if s:
            locusid = s.group(1)    
    if re.search(r"^SOURCE", linha):
        s = re.match(r"SOURCE\s+(.+)", linha)
        if s:
            source = s.group(1)
    if re.search(r"//", linha):
        flag = False  
    if re.search(r"ORIGIN", linha):
        flag = True
fonte.close()
print( locusid, source)

MA355827 Staphylococcus aureus


In [7]:
#ler o ficheiro
MRSA = SeqIO.read("MRSA.gb", "genbank")

#Bactéria
print("ID: ", MRSA.id)
print("Descrição: ", MRSA.description)

#Locus
print("Locus: ", MRSA.name)
print("bp: ", len(MRSA))

#Sequência
print("Tamanho da sequência: ", len(MRSA), "pb")

#Features
print("Features: ", MRSA.features)
print("Tamanho das features: ", len(MRSA.features))
#Identificação de features do tipo CDS e a sua localização:
featcds = [] 
for i in range(len(MRSA.features)):
    if MRSA.features[i].type=='CDS':
        featcds.append(i)
for k in featcds:
    print("Features do tipo CDS e a sua localização:\n",MRSA.features[k].location)
    print(len(featcds))

featgenes = []  #número de genes anotados no registo (feature tipo “gene”)
for i in range(len(MRSA.features)):
    if MRSA.features[i].type == "gene":
        featgenes.append(i)
print(len(featgenes))

#Reference
print("Referências bibliográficas relacionadas com registo:\n", MRSA.annotations['references'])

#Anotations
print(len(MRSA.annotations) )
print("from:", MRSA.annotations["source"])
print(MRSA.letter_annotations)
print("Organismo:", MRSA.annotations["organism"])
print("Taxonomia:", MRSA.annotations["taxonomy"])
print("Tipo de molécula: ", MRSA.annotations['molecule_type'])
print("Código do registo: ", MRSA.annotations['accessions'])
print ("Keywords:\n",MRSA.annotations.keys())


print(MRSA.dbxrefs)


ID:  MA355827.1
Descrição:  JP 2018068315-A/160: SEQUENCES FOR DETECTION AND IDENTIFICATION OF METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE XXI
Locus:  MA355827
bp:  442
Tamanho da sequência:  442 pb
Features:  [SeqFeature(FeatureLocation(ExactPosition(0), ExactPosition(442), strand=1), type='source')]
Tamanho das features:  1
0
Referências bibliográficas relacionadas com registo:
 [Reference(title='SEQUENCES FOR DETECTION AND IDENTIFICATION OF METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE XXI', ...)]
12
from: Staphylococcus aureus
{}
Organismo: Staphylococcus aureus
Taxonomia: ['Bacteria', 'Bacillota', 'Bacilli', 'Bacillales', 'Staphylococcaceae', 'Staphylococcus']
Tipo de molécula:  DNA
Código do registo:  ['MA355827']
Keywords:
 dict_keys(['molecule_type', 'topology', 'data_file_division', 'date', 'accessions', 'sequence_version', 'keywords', 'source', 'organism', 'taxonomy', 'references', 'comment'])
[]


In [8]:
print(MRSA.seq)

GATCAATCTTTGTCGGTACACGATATTCTTCACGACTAAATAAACGCTCATTCGCGATTTTATAAATGAATGTTGATAACAATGTTGTATTATCTACTGAAATCTCATTACGTTGCATCGGAAACATTGTGTTCTGTATGTAAAAGCCGTCTTGATAATCTTTAGTAGTACCGAAGCTGGTCATACGAGAGTTATATTTTCCAGCCAAAACGATATTTTTATAATCATTACGTGAAAAAGGTTTCCCTTCATTATCACACAAATATTTTAGCTTTTCAGTTTCTATATCAACTGTAGCTTCTTTATCCATACGTTGAATAATTGTACGATTCTGACGCACCATCTTTTGCACACCTTTAATGTTATTTGTTTTAAAAGCATGAATAAGTTTTTCAACACAACGATGTGAATCTTCTAAGAAGTCACCGTAAAATGAAGGATC


Bactérias\
    id_bacteria: \
    id: MA355827.1\
    classificacao: IACS / Resistentes\
    descrição: JP 2018068315-A/160: SEQUENCES FOR DETECTION AND IDENTIFICATION OF METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE XXI\
    link_NCBI: https://www.ncbi.nlm.nih.gov/nuccore/MA355827
    
Locus\
    id_locus:
    id_l: MA355827\
    bp: 442
    
Sequência\
    id_seq\
    len_seq: 442 bp

Features\
    id_features\
    tamanho: 1\
    location\
    type\
        CDS: 0\
        gene: 0

Reference\
    id_reference\
    journal\
    title:\
        SEQUENCES FOR DETECTION AND IDENTIFICATION OF METHICILLIN-RESISTANT STAPHYLOCOCCUS AUREUS (MRSA) OF MREJ TYPE XXI\
    author
    
Anotations\
    id_anotations\
    tamanho das anotations: 12\
    source: Staphylococcus aureus\
    letter_anotations: {} \
    organism: Staphylococcus aureus\
    taxonomy: 'Bacteria', 'Bacillota', 'Bacilli', 'Bacillales', 'Staphylococcaceae', 'Staphylococcus'\
    mol_type: DNA\
    keywords: 'molecule_type', 'topology', 'data_file_division', 'date', 'accessions', 'sequence_version', 'keywords', 'source', 'organism', 'taxonomy', 'references', 'comment'\
    accessions: MA355827
    
dbxrefs: []