In [None]:
from Bio import SeqIO


In [None]:
help(SeqIO)
#gives n overview of how we can use the SeqIO module

In [None]:
# to read every sequence in a given file, the for...loop comes in handy however,
# if its one sequence its advisable to just use SeqRecord.read('filename', 'fileType') method

covid = SeqIO.parse('400 Zim hCoV Genome Sequences.fasta', 'fasta')
for seq_record in covid:
    print (seq_record.id)
    print (repr(seq_record.seq))
    print (len(seq_record))

In [None]:
for seq_record in SeqIO.parse('ls_orchid.gbk', 'genbank'):
    print (seq_record.id)
    print (repr(seq_record.seq))
    print (len(seq_record))

In [2]:
# when the sequence file has mulitple records, but you only want the first one,
# the following code is very concise:
from Bio import SeqIO
first_record = next(SeqIO.parse('400 Zim hCoV Genome Sequences.fasta', 'fasta'))
first_record

SeqRecord(seq=Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA'), id='hCoV-19/Wuhan/WIV04/2019|EPI_ISL_402124|2019-12-30', name='hCoV-19/Wuhan/WIV04/2019|EPI_ISL_402124|2019-12-30', description='hCoV-19/Wuhan/WIV04/2019|EPI_ISL_402124|2019-12-30', dbxrefs=[])

In [None]:
# Suppose you want to access the seq records in random order, the for...loop is non-ideal
from Bio import SeqIO
records = list(SeqIO.parse('ls_orchid.gbk', 'genbank'))
print ('Found %i records' % len(records))
print ('The last record')
last_record = records [-1]  #using Python's tricks
print(last_record.id)
print(repr(last_record.seq))
print(len(last_record))

print('The first record')
first_record = records[0] #remember, python counts from ero
print (first_record.id)
print (repr(first_record.seq))
print (len(first_record))


In [None]:
from Bio import SeqIO
record_iterator = SeqIO.parse('ls_orchid.gbk','genbank')
first_record = next(record_iterator)
print(first_record.annotations.values())

print(first_record.annotations.keys())

print(first_record.annotations)
print(first_record.annotations['source'])

In [None]:
# You can access online databases to fetch sequences, however for reuse it is aadvisable to download the sequences first
from Bio import Entrez
from Bio import SeqIO
Entrez.email = 'mskambarami@gmail.com'
with Entrez.efetch (db='nucleotide', rettype = 'gb', retmode = 'text', id = '6273291') as handle:
    seq_record = SeqIO.read(handle, 'gb')
print ('%s with %i features' % (seq_record.id, len(seq_record.features)))


In [None]:
# Writing to a file name:
from Bio import SeqIO
SeqIO.write (first_record, 'writingfas.faa', 'fasta')

In [None]:
# Converting between sequence formats

from Bio import SeqIO
records = SeqIO.parse ('ls_orchid.gbk', 'genbank')
count = SeqIO.write (records, 'format_converted.fasta', 'fasta')
print (f'Converted {count} records')

In [None]:
# because this is a very common task there is a .convert function in Biopython
# as a shortcut
from Bio import SeqIO
count = SeqIO.convert ('ls_orchid.gbk', 'genbank','formatconvert.fasta','fasta')
print (f' Converted {count} records')

In [None]:
help(SeqIO.convert)