# Biopython - Python tools for Computational Molecular Biology

More information: http://biopython.org/wiki/SeqIO

## SeqIO  - Sequence Input/Output

More information: http://biopython.org/wiki/SeqIO

### Basic usage of SeqIO

In [38]:
from Bio import SeqIO

# SeqIO.read only for files with one sequence
record = SeqIO.read('one_seq_prot.fasta', 'fasta')

In [None]:
type(record)

In [None]:
record.name

In [None]:
print(record.seq)

In [None]:
# print all avaible methods and attributes for selected object
dir(record)

In [None]:
print(record)

In [None]:
record.id

In [None]:
record.seq

In [None]:
print(record.seq)

In [None]:
record.description

In [None]:
record = SeqIO.read('one_seq_nucl.fasta', 'fasta')

In [None]:
print(record.seq)

In [None]:
# method for reverse complement strand
print(record.reverse_complement().seq)

In [None]:
# SeqIO.parse if for files with more than one sequence
records = SeqIO.parse('rab20.fasta', 'fasta')

In [None]:
type(records)

In [None]:
filtered = []
for record in SeqIO.parse('rab20_ncbi.fasta', 'fasta'):
    if len(record.seq) > 220:
        filtered.append((record.id, len(record.seq)))
#         print(record.id, len(record.seq))

In [41]:
with open('filtered.fasta', 'w') as res:
    for record in SeqIO.parse('rab20_ncbi.fasta', 'fasta'):
        if len(record.seq) > 220:
            res.write(f'>{record.id}\t{len(record.seq)}\n{record.seq}\n')

In [42]:
filtered.sort(key=lambda x: x[1])

In [None]:
for i in filtered[::-1]:
    print(i)

In [None]:
records = SeqIO.parse('rab20.fasta', 'fasta')

In [None]:
next(records)

In [None]:
records = SeqIO.parse('rab20.fasta', 'fasta')
for record in records:
    print(record.id)

In [None]:
# we can use next to work only with one sequence
record = next(records)

In [None]:
record.description

In [None]:
records = SeqIO.parse('rab20.fasta', 'fasta')
record = list(records)[0]

In [None]:
# SeqIO.to_dict parse our fasta as a dictionary in a format: ID: sequence
from Bio import SeqIO
record_dict = SeqIO.to_dict(SeqIO.parse('rab20.fasta', 'fasta'))

In [None]:
# making dictionary from fasta file without SeqIO.to_dict
record_dict = {}
for record in SeqIO.parse('rab20.fasta', 'fasta'):
    record_dict[record.id] = record.seq

In [None]:
record_dict['NP_001086022.1']

In [None]:
record_dict['NP_001086022.1'].seq

In [None]:
with open('test_out.fasta', 'w') as res:
    for record in SeqIO.parse("rab20.fasta", 'fasta'):
        res.write(f'>{record.id}\n{record.seq}\n')

In [None]:
# this simple code will write as a result only those sequence which are longer than 230 amino acids
sequences = SeqIO.parse('rab20_ncbi.fasta', 'fasta')

with open("longer_than_230.fasta", 'w') as res:
    for record in sequences:
        if len(record.seq) > 230:
            res.write('>{} longer than 230 \n{}\n'.format(record.name, record.seq))            

In [None]:
# we can use SeqIO also for format conversion
from Bio import SeqIO
SeqIO.convert("NP_035357.1.gb", "genbank", "NP_035357.1.fasta", "fasta")