nuin / beginning-python-for-bioinformatics

repository for the code featured in the blog

This URL has Read+Write access

beginning-python-for-bioinformatics / scripts / motifs / fasta.py
100644 48 lines (40 sloc) 1.077 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#! /usr/bin/env python
 
class Fasta:
    def __init__(self, name, sequence):
        self.name = name
        self.sequence = sequence
 
def read_fasta(file):
    items = []
    index = 0
    for line in file:
        if line.startswith(">"):
           if index >= 1:
               items.append(aninstance)
           index+=1
           name = line.strip()
           seq = ''
           aninstance = Fasta(name, seq)
        else:
           seq += line.strip()
           aninstance = Fasta(name, seq)
 
    items.append(aninstance)
    return items
 
def read_seqs(file):
    items = []
    seq = ''
    index = 0
    for line in file:
        if line.startswith(">"):
            if index >= 1:
                items.append(seq)
                seq = ''
            index += 1
        else:
            seq += line[:-1]
 
    items.append(seq)
    return items
 
def format_output(sequence, length):
    temp = []
    for j in range(0,len(sequence),length):
        temp.append(sequence[j:j+length])
    return '\n'.join(temp)