In [1]:
seqA = "CCGGTTTTT"
seqB = "AGTTTAA"
seqC = "AGGTTT"
sequences = [seqA, seqB, seqC]
print(sequences)

['CCGGTTTTT', 'AGTTTAA', 'AGGTTT']


In [2]:
import numpy as np
coordinates = np.array([[1, 3, 4, 7, 9], [0, 2, 2, 5, 5], [0, 2, 3, 6, 6]])
print(coordinates)

[[1 3 4 7 9]
 [0 2 2 5 5]
 [0 2 3 6 6]]


In [3]:
from Bio.Align import Alignment
alignment = Alignment(sequences, coordinates)
alignment 

<Alignment object (3 rows x 8 columns) at 0x106e93bc0>

In [4]:
alignment.sequences

['CCGGTTTTT', 'AGTTTAA', 'AGGTTT']

In [5]:
alignment.coordinates

array([[1, 3, 4, 7, 9],
       [0, 2, 2, 5, 5],
       [0, 2, 3, 6, 6]])

In [6]:
print(alignment)

                  1 CGGTTTTT 9
                  0 AG-TTT-- 5
                  0 AGGTTT-- 6



In [7]:
lines = ["CGGTTTTT", "AG-TTT--", "AGGTTT--"]
for line in lines:
    print(line)

CGGTTTTT
AG-TTT--
AGGTTT--


In [8]:
lines = [line.encode() for line in lines]  # convert to bytes
lines

[b'CGGTTTTT', b'AG-TTT--', b'AGGTTT--']

In [9]:
sequences, coordinates = Alignment.parse_printed_alignment(lines)
sequences

[b'CGGTTTTT', b'AGTTT', b'AGGTTT']

In [10]:
sequences = [sequence.decode() for sequence in sequences]
sequences

['CGGTTTTT', 'AGTTT', 'AGGTTT']

In [11]:
print(coordinates)

[[0 2 3 6 8]
 [0 2 2 5 5]
 [0 2 3 6 6]]


In [12]:
from Bio.Seq import Seq
sequences[0] = "C" + sequences[0]
sequences[1] = sequences[1] + "AA"
sequences

['CCGGTTTTT', 'AGTTTAA', 'AGGTTT']

In [13]:
coordinates[0, :] += 1
print(coordinates)

[[1 3 4 7 9]
 [0 2 2 5 5]
 [0 2 3 6 6]]


In [14]:
alignment = Alignment(sequences, coordinates)
print(alignment)

                  1 CGGTTTTT 9
                  0 AG-TTT-- 5
                  0 AGGTTT-- 6



In [15]:
ungapped_alignment = Alignment(["ACGTACGT", "AAGTACGT", "ACGTACCT"])
ungapped_alignment  

<Alignment object (3 rows x 8 columns) at 0x105ffaea0>

In [16]:
print(ungapped_alignment.coordinates)

[[0 8]
 [0 8]
 [0 8]]


In [17]:
print(ungapped_alignment)

                  0 ACGTACGT 8
                  0 AAGTACGT 8
                  0 ACGTACCT 8



In [18]:
print(alignment)

                  1 CGGTTTTT 9
                  0 AG-TTT-- 5
                  0 AGGTTT-- 6



In [19]:
alignment.length

8

In [20]:
alignment[0]

'CGGTTTTT'

In [21]:
alignment[1]

'AG-TTT--'

In [22]:
alignment[2]

'AGGTTT--'

In [23]:
alignment[0, :]

'CGGTTTTT'

In [24]:
alignment[1, :]

'AG-TTT--'

In [25]:
alignment[0, 1:-1]

'GGTTTT'

In [26]:
alignment[1, 1:-1]

'G-TTT-'

In [27]:
alignment[0, (1, 2, 4)]

'GGT'

In [28]:
alignment[1, range(0, 5, 2)]

'A-T'

In [29]:
alignment[0, 2]

'G'

In [30]:
alignment[2, 6]

'-'

In [31]:
alignment[:, 0]

'CAA'

In [32]:
alignment[:, 1]

'GGG'

In [33]:
alignment[:, 2]

'G-G'

In [34]:
alignment[1:]

<Alignment object (2 rows x 6 columns) at 0x107560f20>

In [35]:
print(alignment[1:])

target            0 AG-TTT 5
                  0 ||-||| 6
query             0 AGGTTT 6



In [36]:
alignment[:, :4] 

<Alignment object (3 rows x 4 columns) at 0x107258980>

In [37]:
print(alignment[:, :4])

                  1 CGGT 5
                  0 AG-T 3
                  0 AGGT 4



In [38]:
alignment[:, -6:] 

<Alignment object (3 rows x 6 columns) at 0x1075615e0>

In [39]:
print(alignment[:, -6:])

                  3 GTTTTT 9
                  2 -TTT-- 5
                  2 GTTT-- 6



In [40]:
print(alignment[:, (1, 3, 0)])

                  0 GTC 3
                  0 GTA 3
                  0 GTA 3



In [41]:
len(alignment)

3

In [42]:
alignment.length

8

In [43]:
alignment.shape

(3, 8)

In [44]:
pairwise_alignment = alignment[:2, :]
print(pairwise_alignment)

target            1 CGGTTTTT 9
                  0 .|-|||-- 8
query             0 AG-TTT-- 5



In [45]:
print(pairwise_alignment.aligned)

[[[1 3]
  [4 7]]

 [[0 2]
  [2 5]]]


In [46]:
pairwise_alignment1 = Alignment(["AAACAAA", "AAAGAAA"],
                                np.array([[0, 3, 4, 4, 7], [0, 3, 3, 4, 7]]))  # fmt: skip

pairwise_alignment2 = Alignment(["AAACAAA", "AAAGAAA"],
                                np.array([[0, 3, 3, 4, 7], [0, 3, 4, 4, 7]]))  # fmt: skip

print(pairwise_alignment1)

target            0 AAAC-AAA 7
                  0 |||--||| 8
query             0 AAA-GAAA 7



In [47]:
print(pairwise_alignment2)

target            0 AAA-CAAA 7
                  0 |||--||| 8
query             0 AAAG-AAA 7



In [48]:
pairwise_alignment1.aligned

array([[[0, 3],
        [4, 7]],

       [[0, 3],
        [4, 7]]])

In [49]:
pairwise_alignment2.aligned

array([[[0, 3],
        [4, 7]],

       [[0, 3],
        [4, 7]]])

In [50]:
print(alignment)

                  1 CGGTTTTT 9
                  0 AG-TTT-- 5
                  0 AGGTTT-- 6



In [51]:
alignment.indices

array([[ 1,  2,  3,  4,  5,  6,  7,  8],
       [ 0,  1, -1,  2,  3,  4, -1, -1],
       [ 0,  1,  2,  3,  4,  5, -1, -1]])

In [52]:
alignment.sequences

['CCGGTTTTT', 'AGTTTAA', 'AGGTTT']

In [53]:
alignment.inverse_indices  

[array([-1,  0,  1,  2,  3,  4,  5,  6,  7]),
 array([ 0,  1,  3,  4,  5, -1, -1]),
 array([0, 1, 2, 3, 4, 5])]

In [54]:
print(pairwise_alignment)

target            1 CGGTTTTT 9
                  0 .|-|||-- 8
query             0 AG-TTT-- 5



In [55]:
pairwise_alignment.counts()

AlignmentCounts(gaps=3, identities=4, mismatches=1)

In [56]:
print(alignment)

                  1 CGGTTTTT 9
                  0 AG-TTT-- 5
                  0 AGGTTT-- 6



In [57]:
alignment.counts()

AlignmentCounts(gaps=8, identities=14, mismatches=2)

In [58]:
alignment.frequencies

{'C': array([1., 0., 0., 0., 0., 0., 0., 0.]),
 'G': array([0., 3., 2., 0., 0., 0., 0., 0.]),
 'T': array([0., 0., 0., 3., 3., 3., 1., 1.]),
 'A': array([2., 0., 0., 0., 0., 0., 0., 0.]),
 '-': array([0., 0., 1., 0., 0., 0., 2., 2.])}

In [59]:
m = alignment.substitutions
print(m)

    A   C   G   T
A 1.0 0.0 0.0 0.0
C 2.0 0.0 0.0 0.0
G 0.0 0.0 4.0 0.0
T 0.0 0.0 0.0 9.0



In [60]:
m["C", "A"]

2.0

In [61]:
m["A", "C"]

0.0

In [62]:
m += m.transpose()
m /= 2.0
print(m)

    A   C   G   T
A 1.0 1.0 0.0 0.0
C 1.0 0.0 0.0 0.0
G 0.0 0.0 4.0 0.0
T 0.0 0.0 0.0 9.0



In [63]:
m["A", "C"]

1.0

In [64]:
m["C", "A"]

1.0

In [65]:
align_array = np.array(alignment)
align_array.shape

(3, 8)

In [66]:
align_array

array([[b'C', b'G', b'G', b'T', b'T', b'T', b'T', b'T'],
       [b'A', b'G', b'-', b'T', b'T', b'T', b'-', b'-'],
       [b'A', b'G', b'G', b'T', b'T', b'T', b'-', b'-']], dtype='|S1')

In [67]:
align_array = np.array(alignment, dtype="U")
print(align_array.shape)
print(align_array)

(3, 8)
[['C' 'G' 'G' 'T' 'T' 'T' 'T' 'T']
 ['A' 'G' '-' 'T' 'T' 'T' '-' '-']
 ['A' 'G' 'G' 'T' 'T' 'T' '-' '-']]


In [68]:
print(alignment)

                  1 CGGTTTTT 9
                  0 AG-TTT-- 5
                  0 AGGTTT-- 6



In [69]:
alignment.sort()
print(alignment)

                  0 AGGTTT-- 6
                  0 AG-TTT-- 5
                  1 CGGTTTTT 9



In [70]:
from Bio.SeqUtils import gc_fraction
alignment.sort(key=gc_fraction)
print(alignment)

                  0 AG-TTT-- 5
                  0 AGGTTT-- 6
                  1 CGGTTTTT 9



In [71]:
alignment.sort(key=gc_fraction, reverse=True)
print(alignment)

                  1 CGGTTTTT 9
                  0 AGGTTT-- 6
                  0 AG-TTT-- 5



In [72]:
alignment.sequences

['CCGGTTTTT', 'AGGTTT', 'AGTTTAA']

In [73]:
rc_alignment = alignment.reverse_complement()
print(rc_alignment.sequences)

['AAAAACCGG', 'AAACCT', 'TTAAACT']


In [74]:
print(rc_alignment)

                  0 AAAAACCG 8
                  0 --AAACCT 6
                  2 --AAA-CT 7



In [75]:
alignment[:, :4].sequences

['CCGGTTTTT', 'AGGTTT', 'AGTTTAA']

In [76]:
print(alignment[:, :4])

                  1 CGGT 5
                  0 AGGT 4
                  0 AG-T 3



In [77]:
rc_alignment = alignment[:, :4].reverse_complement()
rc_alignment[:, :4].sequences

['AAAAACCGG', 'AAACCT', 'TTAAACT']

In [78]:
print(rc_alignment[:, :4])

                  4 ACCG 8
                  2 ACCT 6
                  4 A-CT 7



In [79]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
a1 = SeqRecord(Seq("AAAAC"), id="Alpha")
b1 = SeqRecord(Seq("AAAC"), id="Beta")
c1 = SeqRecord(Seq("AAAAG"), id="Gamma")
a2 = SeqRecord(Seq("GTT"), id="Alpha")
b2 = SeqRecord(Seq("TT"), id="Beta")
c2 = SeqRecord(Seq("GT"), id="Gamma")
left = Alignment(
    [a1, b1, c1], coordinates=np.array([[0, 3, 4, 5], [0, 3, 3, 4], [0, 3, 4, 5]])
)
left.annotations = {"tool": "demo", "name": "start"}
left.column_annotations = {"stats": "CCCXC"}
right = Alignment(
    [a2, b2, c2], coordinates=np.array([[0, 1, 2, 3], [0, 0, 1, 2], [0, 1, 1, 2]])
)
right.annotations = {"tool": "demo", "name": "end"}
right.column_annotations = {"stats": "CXC"}

In [80]:
print(left)

Alpha             0 AAAAC 5
Beta              0 AAA-C 4
Gamma             0 AAAAG 5



In [81]:
print(right)

Alpha             0 GTT 3
Beta              0 -TT 2
Gamma             0 G-T 2



In [82]:
combined = left + right
print(combined)

Alpha             0 AAAACGTT 8
Beta              0 AAA-C-TT 6
Gamma             0 AAAAGG-T 7



In [83]:
len(right)

3

In [84]:
len(left)

3

In [85]:
len(combined)

3

In [86]:
combined.annotations

{'tool': 'demo'}

In [87]:
combined.column_annotations

{'stats': 'CCCXCCXC'}

In [88]:
chromosome = "AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA"
transcript = "CCCCCCCGGGGGG"
sequences1 = [chromosome, transcript]
coordinates1 = np.array([[8, 15, 26, 32], [0, 7, 7, 13]])
alignment1 = Alignment(sequences1, coordinates1)
print(alignment1)

target            8 CCCCCCCAAAAAAAAAAAGGGGGG 32
                  0 |||||||-----------|||||| 24
query             0 CCCCCCC-----------GGGGGG 13



In [89]:
rnaseq = "CCCCGGGG"
sequences2 = [transcript, rnaseq]
coordinates2 = np.array([[3, 11], [0, 8]])
alignment2 = Alignment(sequences2, coordinates2)
print(alignment2)

target            3 CCCCGGGG 11
                  0 ||||||||  8
query             0 CCCCGGGG  8



In [90]:
alignment3 = alignment1.map(alignment2)
print(alignment3)

target           11 CCCCAAAAAAAAAAAGGGG 30
                  0 ||||-----------|||| 19
query             0 CCCC-----------GGGG  8



In [91]:
print(alignment3.coordinates)

[[11 15 26 30]
 [ 0  4  4  8]]


In [92]:
format(alignment3, "psl")

'8\t0\t0\t0\t0\t0\t1\t11\t+\tquery\t8\t0\t8\ttarget\t40\t11\t30\t2\t4,4,\t0,4,\t11,26,\n'

In [93]:
from Bio import Align
chain = Align.read(r"/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/panTro5ToPanTro6.over.chain", "chain")
chain.sequences[0].id

FileNotFoundError: [Errno 2] No such file or directory: '/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/panTro5ToPanTro6.over.chain'

In [None]:
len(chain.sequences[0].seq)

228573443

In [None]:
chain.sequences[1].id

'chr1'

In [None]:
len(chain.sequences[1].seq)

224244399

In [None]:
import numpy as np
np.set_printoptions(threshold=5)  # print 5 array elements per row
print(chain.coordinates)

[[122250000 122250400 122250400 ... 122909818 122909819 122909835]
 [111776384 111776784 111776785 ... 112019962 112019962 112019978]]


In [None]:
transcript = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/est.panTro5.psl", "psl")
transcript.sequences[0].id

'chr1'

In [None]:
len(transcript.sequences[0].seq)

228573443

In [None]:
transcript.sequences[1].id

'DC525629'

In [None]:
len(transcript.sequences[1].seq)

407

In [None]:
print(transcript.coordinates)

[[122835789 122835847 122840993 122841145 122907212 122907314]
 [       32        90        90       242       242       344]]


In [None]:
len(chain.sequences[0].seq) == len(transcript.sequences[0].seq)

True

In [None]:
chain = chain[::-1]
chain.sequences[0].id

'chr1'

In [None]:
len(chain.sequences[0].seq)

224244399

In [None]:
chain.sequences[1].id

'chr1'

In [None]:
len(chain.sequences[1].seq)

228573443

In [None]:
print(chain.coordinates) 

[[111776384 111776784 111776785 ... 112019962 112019962 112019978]
 [122250000 122250400 122250400 ... 122909818 122909819 122909835]]


In [None]:
np.set_printoptions(threshold=1000)  # reset the print options

In [None]:
lifted_transcript = chain.map(transcript)
lifted_transcript.sequences[0].id

'chr1'

In [None]:
len(lifted_transcript.sequences[0].seq)

224244399

In [None]:
lifted_transcript.sequences[1].id

'DC525629'

In [None]:
len(lifted_transcript.sequences[1].seq)

407

In [None]:
print(lifted_transcript.coordinates)

[[111982717 111982775 111987921 111988073 112009200 112009302]
 [       32        90        90       242       242       344]]


In [None]:
from Bio import Align
path = "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/panTro5.maf"
genome_alignment = Align.read(path, "maf")
for record in genome_alignment.sequences:
    print(record.id, len(record.seq))

panTro5.chr1 228573443
hg19.chr1 249250621
rheMac8.chr1 225584828
calJac3.chr18 47448759
mm10.chr3 160039680
rn6.chr2 266435125


In [None]:
print(genome_alignment.coordinates)

[[133922962 133922962 133922970 133922970 133922972 133922972 133922995
  133922998 133923010]
 [155784573 155784573 155784581 155784581 155784583 155784583 155784606
  155784609 155784621]
 [130383910 130383910 130383918 130383918 130383920 130383920 130383943
  130383946 130383958]
 [  9790455   9790455   9790463   9790463   9790465   9790465   9790488
    9790491   9790503]
 [ 88858039  88858036  88858028  88858026  88858024  88858020  88857997
   88857997  88857985]
 [188162970 188162967 188162959 188162959 188162957 188162953 188162930
  188162930 188162918]]


In [None]:
print(genome_alignment)

panTro5.c 133922962 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAACTTAAAggcc
hg19.chr1 155784573 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAACTTAAAggcc
rheMac8.c 130383910 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAACTTAAAggcc
calJac3.c   9790455 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAGCTTAAAggct
mm10.chr3  88858039 TATAATAATTGTATATGTCACAGAAAAAAATGAATTTTCAAT---GACTTAATAGCC
rn6.chr2  188162970 TACAATAATTG--TATGTCATAGAAAAAAATGAATTTTCAAT---AACTTAATAGCC

panTro5.c 133923010
hg19.chr1 155784621
rheMac8.c 130383958
calJac3.c   9790503
mm10.chr3  88857985
rn6.chr2  188162918



In [None]:
paths = [
    "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/panTro5ToPanTro6.chain",
    "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/hg19ToHg38.chain",
    "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/rheMac8ToRheMac10.chain",
    "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/calJac3ToCalJac4.chain",
    "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/mm10ToMm39.chain",
    "/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/rn6ToRn7.chain",
]
liftover_alignments = [Align.read(path, "chain") for path in paths]
for liftover_alignment in liftover_alignments:
    print(liftover_alignment.target.id, liftover_alignment.coordinates[0, :])


chr1 [133919957 133924947 133924947 133926309 133926312 133932620]
chr1 [155184381 156354347 156354348 157128497 157128497 157137496]
chr1 [130382477 130383872 130383872 130384222 130384222 130388520]
chr18 [9786631 9787941 9788508 9788508 9795062 9795065 9795737]
chr3 [66807541 74196805 74196831 94707528 94707528 94708176 94708178 94708718]
chr2 [188111581 188158351 188158351 188171225 188171225 188228261 188228261
 188236997]


In [None]:
genome_alignment = genome_alignment.mapall(liftover_alignments)
for record in genome_alignment.sequences:
    print(record.id, len(record.seq))

chr1 224244399
chr1 248956422
chr1 223616942
chr18 47031477
chr3 159745316
chr2 249053267


In [None]:
print(genome_alignment.coordinates)

[[130611000 130611000 130611008 130611008 130611010 130611010 130611033
  130611036 130611048]
 [155814782 155814782 155814790 155814790 155814792 155814792 155814815
  155814818 155814830]
 [ 95186253  95186253  95186245  95186245  95186243  95186243  95186220
   95186217  95186205]
 [  9758318   9758318   9758326   9758326   9758328   9758328   9758351
    9758354   9758366]
 [ 88765346  88765343  88765335  88765333  88765331  88765327  88765304
   88765304  88765292]
 [174256702 174256699 174256691 174256691 174256689 174256685 174256662
  174256662 174256650]]


In [None]:
from Bio import SeqIO
names = ("panTro6", "hg38", "rheMac10", "calJac4", "mm39", "rn7")
for i, name in enumerate(names):
    filename = f"/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/{name}.2bit"
    genome = SeqIO.parse(filename, "twobit")
    chromosome = genome_alignment.sequences[i].id
    assert len(genome_alignment.sequences[i]) == len(genome[chromosome])
    genome_alignment.sequences[i] = genome[chromosome]
    genome_alignment.sequences[i].id = f"{name}.{chromosome}"

print(genome_alignment)

panTro6.c 130611000 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAACTTAAAggcc
hg38.chr1 155814782 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAACTTAAAggcc
rheMac10.  95186253 ---ACTAGTTA--CA----GTAACAGAAAATAAAATTTAAATAGAAACTTAAAggcc
calJac4.c   9758318 ---ACTAGTTA--CA----GTAACAGAaaataaaatttaaatagaagcttaaaggct
mm39.chr3  88765346 TATAATAATTGTATATGTCACAGAAAAAAATGAATTTTCAAT---GACTTAATAGCC
rn7.chr2  174256702 TACAATAATTG--TATGTCATAGAAAAAAATGAATTTTCAAT---AACTTAATAGCC

panTro6.c 130611048
hg38.chr1 155814830
rheMac10.  95186205
calJac4.c   9758366
mm39.chr3  88765292
rn7.chr2  174256650



In [None]:
alignment_list = [alignment1, alignment2, alignment3]
alignment_iterator1 = iter(alignment_list)
alignment_iterator2 = iter(alignment_list)
next(alignment_iterator1)

<Alignment object (2 rows x 24 columns) at 0x10d0a2e70>

In [None]:
next(alignment_iterator2)

<Alignment object (2 rows x 24 columns) at 0x10d0a2e70>

In [None]:
next(alignment_iterator1)

<Alignment object (2 rows x 8 columns) at 0x10d0a3320>

In [None]:
next(alignment_iterator1)

<Alignment object (2 rows x 19 columns) at 0x10d0a2ae0>

In [None]:
next(alignment_iterator2)

<Alignment object (2 rows x 8 columns) at 0x10d0a3320>

In [None]:
next(alignment_iterator2)

<Alignment object (2 rows x 19 columns) at 0x10d0a2ae0>

In [None]:
from Bio.Align import Alignments
alignments = Alignments([alignment1, alignment2, alignment3])
alignment_iterator1 = iter(alignments)
alignment_iterator2 = iter(alignments)
alignment_iterator1 is alignment_iterator2

True

In [None]:
next(alignment_iterator1)

<Alignment object (2 rows x 24 columns) at 0x10d0a2e70>

In [None]:
next(alignment_iterator2)

<Alignment object (2 rows x 8 columns) at 0x10d0a3320>

In [None]:
next(alignment_iterator1)

<Alignment object (2 rows x 19 columns) at 0x10d0a2ae0>

In [None]:
for item in alignments:
    print(repr(item))  

<Alignment object (2 rows x 24 columns) at 0x10d0a2e70>
<Alignment object (2 rows x 8 columns) at 0x10d0a3320>
<Alignment object (2 rows x 19 columns) at 0x10d0a2ae0>


In [None]:
for item in alignments:
    print(repr(item))  

<Alignment object (2 rows x 24 columns) at 0x10d0a2e70>
<Alignment object (2 rows x 8 columns) at 0x10d0a3320>
<Alignment object (2 rows x 19 columns) at 0x10d0a2ae0>


In [None]:
# alignments.score = 100
# alignments.score

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ucsc_mm9_chr10.maf", "maf")
alignments

<Bio.Align.maf.AlignmentIterator at 0x10d070ec0>

In [None]:
alignments.metadata

{'MAF Version': '1', 'Scoring': 'autoMZ.v1'}

In [None]:
for a in alignments:
    print(len(a.sequences))

2
4
5
6
7
7
1
4
1
5
6
4
1
9
10
7
1
3
1
4
3
5
1
2
2
1
2
3
6
5
1
2
7
10
1
4
1
9
10
11
12
13
14
15
15
14
7
6


In [None]:
len(alignments)

48

In [None]:
alignment_list = list(alignments)
len(alignment_list)

48

In [None]:
alignment_list[27]

<Alignment object (3 rows x 91 columns) at 0x10d706390>

In [None]:
print(alignment_list[27])

mm9.chr10   3019377 CCCCAGCATTCTGGCAGACACAGTG-AAAAGAGACAGATGGTCACTAATAAAATCTGT-A
felCat3.s     46845 CCCAAGTGTTCTGATAGCTAATGTGAAAAAGAAGCATGTGCCCACCAGTAAGCTTTGTGG
canFam2.c  47545247 CCCAAGTGTTCTGATTGCCTCTGTGAAAAAGAAACATGGGCCCGCTAATAagatttgcaa

mm9.chr10   3019435 TAAATTAG-ATCTCAGAGGATGGATGGACCA  3019465
felCat3.s     46785 TGAACTAGAATCTCAGAGGATG---GGACTC    46757
canFam2.c  47545187 tgacctagaatctcagaggatg---ggactc 47545159



In [None]:
type(alignments)

Bio.Align.maf.AlignmentIterator

In [None]:
alignments = alignments[:]
type(alignments)

Bio.Align.Alignments

In [None]:
len(alignments)

48

In [None]:
print(alignments[11])

mm9.chr10   3014742 AAGTTCCCTCCATAATTCCTTCCTCCCACCCCCACA 3014778
calJac1.C      6283 AAATGTA-----TGATCTCCCCATCCTGCCCTG---    6311
otoGar1.s    175262 AGATTTC-----TGATGCCCTCACCCCCTCCGTGCA  175231
loxAfr1.s      9317 AGGCTTA-----TG----CCACCCCCCACCCCCACA    9290



In [None]:
alignments.metadata

{'MAF Version': '1', 'Scoring': 'autoMZ.v1'}

In [None]:
from Bio import Align
target = "myfile.txt"
Align.write(alignments, target, "clustal")

48

In [None]:
from Bio import Align
alignments = Align.Alignments(alignments)
metadata = {"Program": "Biopython", "Version": "1.81"}
alignments.metadata = metadata
target = "myfile.txt"
Align.write(alignments, target, "clustal")

48

In [None]:
str(alignment)

'                  1 CGGTTTTT 9\n                  0 AGGTTT-- 6\n                  0 AG-TTT-- 5\n'

In [None]:
format(alignment)

'                  1 CGGTTTTT 9\n                  0 AGGTTT-- 6\n                  0 AG-TTT-- 5\n'

In [None]:
print(format(alignment))

                  1 CGGTTTTT 9
                  0 AGGTTT-- 6
                  0 AG-TTT-- 5



In [None]:
format(alignment, "clustal")

'sequence_0                          CGGTTTTT\nsequence_1                          AGGTTT--\nsequence_2                          AG-TTT--\n\n\n'

In [None]:
print(format(alignment, "clustal"))

sequence_0                          CGGTTTTT
sequence_1                          AGGTTT--
sequence_2                          AG-TTT--





In [None]:
print(f"*** this is the alignment in Clustal format: ***\n{alignment:clustal}\n***")

*** this is the alignment in Clustal format: ***
sequence_0                          CGGTTTTT
sequence_1                          AGGTTT--
sequence_2                          AG-TTT--



***


In [None]:
format(alignment, "maf")

'a\ns sequence_0 1 8 + 9 CGGTTTTT\ns sequence_1 0 6 + 6 AGGTTT--\ns sequence_2 0 5 + 7 AG-TTT--\n\n'

In [None]:
print(format(alignment, "maf"))

a
s sequence_0 1 8 + 9 CGGTTTTT
s sequence_1 0 6 + 6 AGGTTT--
s sequence_2 0 5 + 7 AG-TTT--




In [None]:
print(pairwise_alignment)

target            1 CGGTTTTT 9
                  0 .|-|||-- 8
query             0 AG-TTT-- 5



In [None]:
print(format(pairwise_alignment, "bed"))  

target	1	7	query	0	+	1	7	0	2	2,3,	0,3,



In [None]:
print(pairwise_alignment.format("bed"))

target	1	7	query	0	+	1	7	0	2	2,3,	0,3,



In [None]:
print(pairwise_alignment.format("bed", bedN=3)) 

target	1	7



In [None]:
print(pairwise_alignment.format("bed", bedN=6)) 

target	1	7	query	0	+



In [None]:
from Bio import Align
alignment = Align.read("probcons.fa", "fasta")
alignment  

<Alignment object (5 rows x 101 columns) at 0x10d7aa180>

In [None]:
print(alignment)

plas_horv         0 D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQE
plas_chlr         0 --VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRD
plas_anav         0 --VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHK
plas_proh         0 VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNT
azup_achc         0 VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A------

plas_horv        57 EYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV 95
plas_chlr        56 DYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV 94
plas_anav        58 QLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV 99
plas_proh        56 KLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV 94
azup_achc        51 -FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV 88



In [None]:
print(format(alignment, "fasta"))

>plas_horvu
D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV
>plas_chlre
--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV
>plas_anava
--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV
>plas_proho
VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV
>azup_achcy
VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV



In [None]:
print(format(alignment, "clustal"))

plas_horvu                          D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-
plas_chlre                          --VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-
plas_anava                          --VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKS
plas_proho                          VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-
azup_achcy                          VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-

plas_horvu                          VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVT
plas_chlre                          VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKII
plas_anava                          ADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKIT
plas_proho                          ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTIT
azup_achcy                          AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVE

plas_horvu                          V
plas_chlre                          V
plas_anava                          V
plas_proho    

In [None]:
alignment.sequences

[SeqRecord(seq=Seq('DVLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSGVDVSKI...VTV'), id='plas_horvu', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSGVNADAIS...IIV'), id='plas_chlre', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKS...ITV'), id='plas_anava', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDKVPAGESAPALS...ITV'), id='plas_proho', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDKGHNVETIKGMIPDGAEAFKS...VEV'), id='azup_achcy', name='<unknown name>', description='', dbxrefs=[])]

In [None]:
print(alignment.coordinates)

[[ 0  1  1 33 34 42 44 48 48 50 50 51 58 73 73 95]
 [ 0  0  0 32 33 41 43 47 47 49 49 50 57 72 72 94]
 [ 0  0  0 32 33 41 43 47 48 50 51 52 59 74 77 99]
 [ 0  1  2 34 35 43 43 47 47 49 49 50 57 72 72 94]
 [ 0  1  2 34 34 42 44 48 48 50 50 51 51 66 66 88]]


In [None]:
from io import StringIO
stream = StringIO()
Align.write(alignment, stream, "FASTA")

1

In [None]:
print(stream.getvalue())

>plas_horvu
D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV
>plas_chlre
--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV
>plas_anava
--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV
>plas_proho
VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV
>azup_achcy
VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/opuntia.aln", "clustal")

In [None]:
alignments.metadata

{'Program': 'CLUSTAL', 'Version': '2.1'}

In [None]:
alignment = next(alignments)
print(alignment)

gi|627328         0 TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT
gi|627328         0 TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT
gi|627328         0 TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT
gi|627328         0 TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT
gi|627329         0 TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT
gi|627328         0 TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT
gi|627329         0 TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAAAAAAATGAAT

gi|627328        60 CTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATCATATCATAAAAGACAATGTA
gi|627328        60 CTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATCATATCATAAAAGACAATGTA
gi|627328        60 CTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATCATATCATAAAAGACAATGTA
gi|627328        60 CTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATCATATCATAAAAGACAATGTA
gi|627329        60 CTAAATGATATAGGATTCCACTATGTAAGGTCTTTGAATCATATCATAAAAGACAATGTA
gi|627328        60 CTAAATG

In [None]:
from Bio import Align
alignment = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/opuntia.aln", "clustal")

In [None]:
alignment.column_annotations

{'clustal_consensus': '******* **** *********************************************************** ****************************************************************************************************************************************************************************************************************************************************************************      ****** ********************************************************************************************************************** ** **************** ****************************************** * ******************          ********  **** ********* ********************************************* *********** ***************** ******************************************* ******************************* ********************************************************************************************************************************************************* *********'}

In [None]:
print(format(alignment, "clustal"))

gi|6273285|gb|AF191659.1|AF191      TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273284|gb|AF191658.1|AF191      TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273287|gb|AF191661.1|AF191      TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273286|gb|AF191660.1|AF191      TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273290|gb|AF191664.1|AF191      TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273289|gb|AF191663.1|AF191      TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273291|gb|AF191665.1|AF191      TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
                                    ******* **** *************************************

gi|6273285|gb|AF191659.1|AF191      AAAAATGAATCTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATC
gi|6273284|gb|AF191658.1|AF191      AAAAATGAATCTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATC
gi|6273287|gb|AF191661.1|AF191      AAAAATGAATCTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATC
gi|6273286|gb|AF191660.1|AF191      AAAAAT

In [None]:
from io import StringIO
stream = StringIO()
Align.write(alignments, stream, "clustal")

1

In [None]:
print(stream.getvalue())

CLUSTAL 2.1 multiple sequence alignment


gi|6273285|gb|AF191659.1|AF191      TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273284|gb|AF191658.1|AF191      TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273287|gb|AF191661.1|AF191      TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273286|gb|AF191660.1|AF191      TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273290|gb|AF191664.1|AF191      TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273289|gb|AF191663.1|AF191      TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273291|gb|AF191665.1|AF191      TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
                                    ******* **** *************************************

gi|6273285|gb|AF191659.1|AF191      AAAAATGAATCTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATC
gi|6273284|gb|AF191658.1|AF191      AAAAATGAATCTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATC
gi|6273287|gb|AF191661.1|AF191      AAAAATGAATCTAAATGATATACGATTCCACTATGTAAGGTCTTTGAATC


In [None]:
from Bio import Align
alignment = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/pfam2.seed.txt", "stockholm")
alignment

<Alignment object (3 rows x 59 columns) at 0x10d6d9280>

In [None]:
print(alignment)

DN7_METS5         0 KIKFKYKGQDLEVDISKVKKVWKVGKMVSFTYDD-NGKTGRGAVSEKDAPKELLNMIGK
DN7A_SACS         0 TVKFKYKGEEKQVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEK
DN7E_SULA         0 KVRFKYKGEEKEVDTSKIKKVWRVGKMVSFTYDD-NGKTGRGAVSEKDAPKELMDMLAR

DN7_METS5        58
DN7A_SACS        59
DN7E_SULA        58



In [None]:
print(format(alignment, "stockholm"))

# STOCKHOLM 1.0
#=GF ID   7kD_DNA_binding
#=GF AC   PF02294.20
#=GF DE   7kD DNA-binding domain
#=GF AU   Mian N;0000-0003-4284-4749
#=GF AU   Bateman A;0000-0002-6982-4660
#=GF SE   Pfam-B_8148 (release 5.2)
#=GF GA   25.00 25.00;
#=GF TC   26.60 46.20;
#=GF NC   23.20 19.20;
#=GF BM   hmmbuild HMM.ann SEED.ann
#=GF SM   hmmsearch -Z 57096847 -E 1000 --cpu 4 HMM pfamseq
#=GF TP   Domain
#=GF CL   CL0049
#=GF RN   [1]
#=GF RM   3130377
#=GF RT   Microsequence analysis of DNA-binding proteins 7a, 7b, and 7e from
#=GF RT   the archaebacterium Sulfolobus acidocaldarius.
#=GF RA   Choli T, Wittmann-Liebold B, Reinhardt R;
#=GF RL   J Biol Chem 1988;263:7087-7093.
#=GF DR   INTERPRO; IPR003212;
#=GF DR   SCOP; 1sso; fa;
#=GF DR   SO; 0000417; polypeptide_domain;
#=GF CC   This family contains members of the hyper-thermophilic
#=GF CC   archaebacterium  7kD DNA-binding/endoribonuclease P2 family. There
#=GF CC   are five 7kD DNA-binding proteins, 7a-7e, found as monomers in the
#=GF CC   cel

In [None]:
print(format(alignment, "fasta"))

>DN7_METS5/4-61
KIKFKYKGQDLEVDISKVKKVWKVGKMVSFTYDD-NGKTGRGAVSEKDAPKELLNMIGK
>DN7A_SACS2/3-61
TVKFKYKGEEKQVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEK
>DN7E_SULAC/3-60
KVRFKYKGEEKEVDTSKIKKVWRVGKMVSFTYDD-NGKTGRGAVSEKDAPKELMDMLAR



In [None]:
print(format(alignment, "phylip"))

3 59
DN7_METS5/KIKFKYKGQDLEVDISKVKKVWKVGKMVSFTYDD-NGKTGRGAVSEKDAPKELLNMIGK
DN7A_SACS2TVKFKYKGEEKQVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEK
DN7E_SULACKVRFKYKGEEKEVDTSKIKKVWRVGKMVSFTYDD-NGKTGRGAVSEKDAPKELMDMLAR



In [None]:
alignment.annotations["identifier"]

'7kD_DNA_binding'

In [None]:
alignment.annotations["clan"]

'CL0049'

In [None]:
alignment.annotations["database references"]

[{'reference': 'INTERPRO; IPR003212;'},
 {'reference': 'SCOP; 1sso; fa;'},
 {'reference': 'SO; 0000417; polypeptide_domain;'}]

In [None]:
for record in alignment.sequences:
    print("%s %s %s" % (record.id, record.annotations["accession"], record.dbxrefs))

DN7_METS5/4-61 A4YEA2.1 []
DN7A_SACS2/3-61 P61991.2 ['PDB; 1SSO A; 2-60;', 'PDB; 1JIC A; 2-60;', 'PDB; 2CVR A; 2-60;', 'PDB; 1B4O A; 2-60;']
DN7E_SULAC/3-60 P13125.2 []


In [None]:
alignment.sequences[0].letter_annotations

{}

In [None]:
alignment.sequences[1].letter_annotations

{'secondary structure': 'EEEEESSSSEEEEETTTEEEEEESSSSEEEEEE-SSSSEEEEEEETTTS-CHHHHHHTT'}

In [None]:
alignment.sequences[2].letter_annotations

{}

In [None]:
alignment.column_annotations 

{'consensus secondary structure': 'EEEEESSSSEEEEETTTEEEEEESSSSEEEEEE-SSSSEEEEEEETTTS-CHHHHHHTT',
 'consensus sequence': 'KVKFKYKGEEKEVDISKIKKVWRVGKMVSFTYDD.NGKTGRGAVSEKDAPKELLsMLuK'}

In [None]:
from Bio import Align
alignment = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/sequential.phy", "phylip")
alignment  

<Alignment object (3 rows x 384 columns) at 0x10d704230>

In [None]:
alignment2 = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/interlaced.phy", "phylip")
alignment2

<Alignment object (3 rows x 384 columns) at 0x10c81d040>

In [None]:
alignment == alignment2

True

In [None]:
alignment.shape

(3, 384)

In [None]:
print(alignment)

CYS1_DICD         0 -----MKVILLFVLAVFTVFVSS---------------RGIPPEEQ------------SQ
ALEU_HORV         0 MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALR
CATH_HUMA         0 ------MWATLPLLCAGAWLLGV--------PVCGAAELSVNSLEK------------FH

CYS1_DICD        28 FLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE
ALEU_HORV        60 FARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTN----RKGLPYRLGINRFSDMSWEE
CATH_HUMA        34 FKSWMSKHRKTY-STEEYHHRLQTFASNWRKINAHN----NGNHTFKMALNQFSDMSFAE

CYS1_DICD        87 FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG-AVTPVKNQGQCGSCWSFS
ALEU_HORV       116 FQATRL-GAAQTCSATLAGNHLMRDA--AALPETKDWREDG-IVSPVKNQAHCGSCWTFS
CATH_HUMA        89 IKHKYLWSEPQNCSAT--KSNYLRGT--GPYPPSVDWRKKGNFVSPVKNQGACGSCWTFS

CYS1_DICD       146 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG
ALEU_HORV       172 TTGALEAAYTQATGKNISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYIKYNG
CATH_HUMA       145 TTGALESAIAIATGKMLSLAEQQLVDCAQDFNNY--------GCQGGLPSQAFEYILYNK

CYS1_DICD       206 GIQT

In [None]:
print(format(alignment, "phylip"))

3 384
CYS1_DICDI-----MKVILLFVLAVFTVFVSS---------------RGIPPEEQ------------SQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG-AVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII--
ALEU_HORVUMAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTN----RKGLPYRLGINRFSDMSWEEFQATRL-GAAQTCSATLAGNHLMRDA--AALPETKDWREDG-IVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYIKYNGGIDTEESYPYKGVNGV-CHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAFQVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGV-----PYWLIKNSWGADWGDNGYFKMEMGKNMCAIATCASYPVVAA
CATH_HUMAN------MWATLPLLCAGAWLLGV--------PVCGAAELSVNSLEK------------FHFKSWMSKHRKTY-STEEYHHRLQTFASNWRKINAHN----NGNHTFKMALNQFSDMSFAEIKHKYLWSEPQNCSAT--KSNYLRGT--GPYPPSVDWRKKGNFVSPVKNQGACGSCWTFSTTGALESAIAIATG

In [None]:
from io import StringIO
stream = StringIO()
Align.write(alignment, stream, "phylip")

1

In [None]:
stream.seek(0)

0

In [None]:
alignment3 = Align.read(stream, "phylip")
alignment == alignment3

True

In [None]:
[record.id for record in alignment.sequences]

['CYS1_DICDI', 'ALEU_HORVU', 'CATH_HUMAN']

In [None]:
[record.id for record in alignment3.sequences]

['CYS1_DICDI', 'ALEU_HORVU', 'CATH_HUMAN']

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/water.txt", "emboss")

In [None]:
alignments.metadata

{'Align_format': 'srspair',
 'Program': 'water',
 'Rundate': 'Wed Jan 16 17:23:19 2002',
 'Report_file': 'stdout'}

In [None]:
alignment = next(alignments)
alignment

<Alignment object (2 rows x 131 columns) at 0x10dacccb0>

In [None]:
alignment.shape

(2, 131)

In [None]:
print(alignment)

IXI_234           0 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
                  0 |||||||||||||||---------||||||||||||||||||||||||||||||||||||
IXI_235           0 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC

IXI_234          60 TTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
                 60 ||||||||||||||----------||||||||||||||||||||||||||||||||||||
IXI_235          51 TTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG

IXI_234         120 PPAWAGDRSHE 131
                120 ||||||||||| 131
IXI_235         101 PPAWAGDRSHE 112



In [None]:
print(alignment.coordinates)

[[  0  15  24  74  84 131]
 [  0  15  15  65  65 112]]


In [None]:
alignment[0]

'TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE'

In [None]:
alignment[1]

'TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE'

In [None]:
alignment[1, 10:30]

'GPSSR---------RPSPPG'

In [None]:
alignment.annotations

{'Matrix': 'EBLOSUM62',
 'Gap_penalty': 10.0,
 'Extend_penalty': 0.5,
 'Identity': 112,
 'Similarity': 112,
 'Gaps': 19,
 'Score': 591.5}

In [None]:
alignment.counts()

AlignmentCounts(gaps=19, identities=112, mismatches=0)

In [None]:
alignment.column_annotations

{'emboss_consensus': '|||||||||||||||         ||||||||||||||||||||||||||||||||||||||||||||||||||          |||||||||||||||||||||||||||||||||||||||||||||||'}

In [None]:
print(format(alignment, "phylip"))

2 131
IXI_234   TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE
IXI_235   TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE



In [None]:
alignment.target

SeqRecord(seq=Seq('TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWK...SHE'), id='IXI_234', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [None]:
alignment.query

SeqRecord(seq=Seq('TSPASIRPPAGPSSRRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTS...SHE'), id='IXI_235', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [None]:
from Bio import Align
alignment = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/W_prot.msf", "msf")

In [None]:
alignment

<Alignment object (11 rows x 99 columns) at 0x10d6d8d40>

In [None]:
print(alignment)

W*01:01:0         0 GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*01:01:0         0 GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*01:01:0         0 GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*01:01:0         0 GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*01:01:0         0 GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*01:01:0         0 GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*02:01           0 GLTPSNGYTAATWTRTAASSVGMNIPYDGASYLVRNQELRSWTAADKAAQMPWRRNMQSC
W*03:01:0         0 GLTPSSGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*03:01:0         0 GLTPSSGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC
W*04:01           0 GLTPSNGYTAATWTRTAASSVGMNIPYDGASYLVRNQELRSWTAADKAAQMPWRRNMQSC
W*05:01           0 GLTPSSGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWRRNRQSC

W*01:01:0        60 SKPTCREGGRSGSAKSLRMGRRGCSAQNPKDSHDPPPHL 99
W*01:01:0        60 SKPTCREGGRSGSAKSLRMGRRGCS

In [None]:
len(alignment.sequences)

11

In [None]:
alignment.sequences[0].id

'W*01:01:01:01'

In [None]:
alignment.sequences[0].seq

Seq('GLTPFNGYTAATWTRTAVSSVGMNIPYHGASYLVRNQELRSWTAADKAAQMPWR...PHL')

In [None]:
print(alignment.coordinates)

[[ 0 93 99]
 [ 0 93 99]
 [ 0 93 99]
 [ 0 93 99]
 [ 0 93 99]
 [ 0 93 99]
 [ 0 93 93]
 [ 0 93 93]
 [ 0 93 93]
 [ 0 93 93]
 [ 0 93 99]]


In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/exn_22_m_cdna2genome_vulgar.exn", "exonerate")

In [None]:
alignments.metadata

{'Program': 'exonerate',
 'Command line': 'exonerate -m cdna2genome ../scer_cad1.fa /media/Waterloo/Downloads/genomes/scer_s288c/scer_s288c.fa --bestn 3 --showalignment no --showcigar no --showvulgar yes',
 'Hostname': 'blackbriar'}

In [None]:
alignment = next(alignments)
alignment.score

6146.0

In [None]:
print(alignment.coordinates)

[[1319275 1319274 1319271 1318045]
 [      0       1       4    1230]]


In [None]:
print(alignment)

gi|330443   1319275 ????????????????????????????????????????????????????????????
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gi|296143         0 ????????????????????????????????????????????????????????????

gi|330443   1319215 ????????????????????????????????????????????????????????????
                 60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gi|296143        60 ????????????????????????????????????????????????????????????

gi|330443   1319155 ????????????????????????????????????????????????????????????
                120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gi|296143       120 ????????????????????????????????????????????????????????????

gi|330443   1319095 ????????????????????????????????????????????????????????????
                180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gi|296143       180 ????????????????????????????????????????????????????????????

gi|330443   1319035 ????

In [None]:
print(format(alignment, "exonerate"))

vulgar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 1 C 3 3 M 1226 1226



In [None]:
print(alignment.format("exonerate", "vulgar"))

vulgar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 1 C 3 3 M 1226 1226



In [None]:
print(alignment.format("exonerate", "cigar"))

cigar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 M 3 M 1226



In [None]:
alignment.operations

bytearray(b'MCM')

In [None]:
print(format(alignment, "bed"))

gi|330443520|ref|NC_001136.10|	1318045	1319275	gi|296143771|ref|NM_001180731.1|	6146	-	1318045	1319275	0	3	1226,3,1,	0,1226,1229,



In [None]:
print(format(alignment, "psl"))

1230	0	0	0	0	0	0	0	-	gi|296143771|ref|NM_001180731.1|	1230	0	1230	gi|330443520|ref|NC_001136.10|	1319275	1318045	1319275	3	1226,3,1,	0,1226,1229,	1318045,1319271,1319274,



In [None]:
del alignment.operations
print(format(alignment, "sam"))

gi|296143771|ref|NM_001180731.1|	16	gi|330443520|ref|NC_001136.10|	1318046	255	1226M3M1M	*	0	0	*	*	AS:i:6146



In [None]:
alignment = next(alignments)  # second alignment
alignment = next(alignments)  # third alignment
print(alignment)

gi|330443     85010 ???????????-???????????????--????-?-????????----????????????
                  0 |||||||||||-|||||||||||||||--||||-|-||||||||----||||||||||||
gi|296143         0 ????????????????????????????????????????????????????????????

gi|330443     85061 ????????????????????????????????????????????????????????????
                 60 |||||-------------------------------------------------------
gi|296143        60 ?????-------------------------------------------------------

gi|330443     85121 ????????????????????????????????????????????????????????????
                120 ------------------------------------------------------------
gi|296143        65 ------------------------------------------------------------

gi|330443     85181 ????????????????????????????????????????????????????????????
                180 ------------------------------------------------------------
gi|296143        65 ------------------------------------------------------------

gi|330443     85241 ????

In [None]:
print(format(alignment, "exonerate"))

vulgar: gi|296143771|ref|NM_001180731.1| 0 516 + gi|330443688|ref|NC_001145.3| 85010 667216 + 518 M 11 11 G 1 0 M 15 15 G 2 0 M 4 4 G 1 0 M 1 1 G 1 0 M 8 8 G 4 0 M 17 17 5 0 2 I 0 168904 3 0 2 M 4 4 G 0 1 M 8 8 G 2 0 M 3 3 G 1 0 M 33 33 G 0 2 M 7 7 G 0 1 M 102 102 5 0 2 I 0 96820 3 0 2 M 14 14 G 0 2 M 10 10 G 2 0 M 5 5 G 0 2 M 10 10 G 2 0 M 4 4 G 0 1 M 20 20 G 1 0 M 15 15 G 0 1 M 5 5 G 3 0 M 4 4 5 0 2 I 0 122114 3 0 2 M 20 20 G 0 5 M 6 6 5 0 2 I 0 193835 3 0 2 M 12 12 G 0 2 M 5 5 G 1 0 M 7 7 G 0 2 M 1 1 G 0 1 M 12 12 C 75 75 M 6 6 G 1 0 M 4 4 G 0 1 M 2 2 G 0 1 M 3 3 G 0 1 M 41 41



In [None]:
from Bio import Align
alignment = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/codonposset.nex", "nexus")
print(alignment)

Aegothele         0 AAAAAGGCATTGTGGTGGGAAT 22
                  0 .........||||||||||||| 22
Aerodramu         0 ?????????TTGTGGTGGGAAT 22



In [None]:
alignment.shape

(2, 22)

In [None]:
alignment.sequences[0].id

'Aegotheles'

In [None]:
alignment.sequences[0].seq

Seq('AAAAAGGCATTGTGGTGGGAAT')

In [None]:
alignment.sequences[0].annotations

{'molecule_type': 'DNA'}

In [None]:
alignment.sequences[1].id

'Aerodramus'

In [None]:
alignment.sequences[1].seq

Seq('?????????TTGTGGTGGGAAT')

In [None]:
alignment.sequences[1].annotations

{'molecule_type': 'DNA'}

In [None]:
print(format(alignment, "nexus"))

#NEXUS
begin data;
dimensions ntax=2 nchar=22;
format datatype=dna missing=? gap=-;
matrix
Aegotheles AAAAAGGCATTGTGGTGGGAAT
Aerodramus ?????????TTGTGGTGGGAAT
;
end;



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/nucleotide_m8CC.txt", "tabular")

In [None]:
alignments.metadata

{'Command line': 'fasta36 -m 8CC seq/mgstm1.nt seq/gst.nlib',
 'Program': 'FASTA',
 'Version': '36.3.8h May, 2020',
 'Database': 'seq/gst.nlib'}

In [None]:
alignment = next(alignments)
alignment = next(alignments)
alignment = next(alignments)
alignment = next(alignments)
print(alignment)

RABGSTB         156 ??????????????????????????????????--????????????????????????
                  0 |||||||||||||||--|||||||||||||||||--|||||||||||-||||||||||||
pGT875          158 ???????????????--??????????????????????????????-????????????

RABGSTB         214 ??????????????????????????????????????????????????????????-?
                 60 ||||||||||||||||||||||||||||||||||||||||||||||-|||||||||||-|
pGT875          215 ??????????????????????????????????????????????-?????????????

RABGSTB         273 ??????-???????? 287
                120 ||||||-|||||||| 135
pGT875          274 ??????????????? 289



In [None]:
print(alignment.coordinates)

[[156 171 173 190 190 201 202 260 261 272 272 279 279 287]
 [158 173 173 190 192 203 203 261 261 272 273 280 281 289]]


In [None]:
alignment.aligned

array([[[156, 171],
        [173, 190],
        [190, 201],
        [202, 260],
        [261, 272],
        [272, 279],
        [279, 287]],

       [[158, 173],
        [173, 190],
        [192, 203],
        [203, 261],
        [261, 272],
        [273, 280],
        [281, 289]]])

In [None]:
alignment.target

SeqRecord(seq=Seq(None, length=287), id='RABGSTB', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [None]:
alignment.query

SeqRecord(seq=Seq(None, length=657), id='pGT875', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [None]:
alignment.annotations

{'% identity': 66.93,
 'mismatches': 42,
 'gap opens': 8,
 'evalue': 3.2e-07,
 'bit score': 45.0}

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/2uvo_hhblits.hhr", "hhr")

In [None]:
alignments.metadata

{'Match_columns': 171,
 'No_of_seqs': (1560, 4005),
 'Neff': 8.3,
 'Searched_HMMs': 34,
 'Rundate': 'Fri Feb 15 16:34:13 2019',
 'Command line': 'hhblits -i 2uvoAh.fasta -d /pdb70'}

In [None]:
alignments.query_name

'2UVO:A|PDBID|CHAIN|SEQUENCE'

In [None]:
for alignment in alignments:
    print(alignment.target.id)

2uvo_A
2wga
1ulk_A
1ulk_A
2uvo_A
2wga
1uha_A
1en2_A
1en2_A
1uha_A
1wkx_A
1p9g_A
1p9g_A
1wkx_A
4mpi_A
2lb7_A
4mpi_A
2lb7_A
2kus_A
2kus_A
1mmc_A
1mmc_A
2n1s_A
2n1s_A
2dkv_A
2dkv_A
4zxm_A
1wga
4zxm_A
4z8i_A
4z8i_A
1wga


In [None]:
alignments = iter(alignments)
alignment = next(alignments)
alignment

<Alignment object (2 rows x 171 columns) at 0x10da2eae0>

In [None]:
print(alignment)

2uvo_A            0 ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGATCTNNQC
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
2UVO:A|PD         0 ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGATCTNNQC

2uvo_A           60 CSQYGYCGFGAEYCGAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGG
                 60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
2UVO:A|PD        60 CSQYGYCGFGAEYCGAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGG

2uvo_A          120 CQSGACSTDKPCGKDAGGRVCTNNYCCSKWGSCGIGPGYCGAGCQSGGCDG 171
                120 ||||||||||||||||||||||||||||||||||||||||||||||||||| 171
2UVO:A|PD       120 CQSGACSTDKPCGKDAGGRVCTNNYCCSKWGSCGIGPGYCGAGCQSGGCDG 171



In [None]:
alignment.target is alignment.sequences[0]

True

In [None]:
alignment.query is alignment.sequences[1]

True

In [None]:
alignment.query.id

'2UVO:A|PDBID|CHAIN|SEQUENCE'

In [None]:
alignment.target.id

'2uvo_A'

In [None]:
alignment.target.seq

Seq('ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGAT...CDG')

In [None]:
alignment.query.seq

Seq('ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGAT...CDG')

In [None]:
alignment.target.annotations

{'hmm_name': '2uvo_A',
 'hmm_description': 'Agglutinin isolectin 1; carbohydrate-binding protein, hevein domain, chitin-binding, GERM agglutinin, chitin-binding protein; HET: NDG NAG GOL; 1.40A {Triticum aestivum} PDB: 1wgc_A* 2cwg_A* 2x3t_A* 4aml_A* 7wga_A 9wga_A 2wgc_A 1wgt_A 1k7t_A* 1k7v_A* 1k7u_A 2x52_A* 1t0w_A*'}

In [None]:
alignment.query.letter_annotations

{'Consensus': '~~cg~~~~~~~c~~~~CCs~~g~CG~~~~~c~~~c~~~~c~~~~~Cg~~~~~~~c~~~~CCs~~g~CG~~~~~c~~~c~~~~~~~~~~Cg~~~~~~~c~~~~CCS~~G~CG~~~~~C~~~Cq~~~c~~~~~Cg~~~~~~~c~~~~CCS~~G~CG~~~~~C~~gCq~~~c~~'}

In [None]:
alignment.annotations

{'Probab': 99.95,
 'E-value': 3.7e-34,
 'Score': 210.31,
 'Identities': 100.0,
 'Similarity': 2.05,
 'Sum_probs': 166.9}

In [None]:
alignment.column_annotations

{'column score': '||||++.++..||++.|||+|+|||.+.+||+++||.+.|++..+|+++++.++|....|||.++||+.+.+||+.+||.+++++|+.|+...+++.||++.|||.|||||...+||+.+||+++|++|.+|++.+++++|..+.|||+++-||+...||+++||++.|||',
 'Confidence': '799999999999999999999999999999999999999999999999999999999999999999999999999999999999999999998899999999999999999999999999999999999999999999999999999999999999999999999999986'}

In [None]:
from Bio import Align
alignment = Align.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/probcons.a2m", "a2m")
alignment

<Alignment object (5 rows x 101 columns) at 0x10d6daff0>

In [None]:
print(alignment)

plas_horv         0 D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQE
plas_chlr         0 --VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRD
plas_anav         0 --VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHK
plas_proh         0 VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNT
azup_achc         0 VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A------

plas_horv        57 EYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV 95
plas_chlr        56 DYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV 94
plas_anav        58 QLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV 99
plas_proh        56 KLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV 94
azup_achc        51 -FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV 88



In [None]:
alignment.column_annotations

{'state': 'DIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDIDDDDDDDDDDDDDDDDDDDDDDDIIIDDDDDDDDDDDDDDDDDDDDDD'}

In [None]:
print(format(alignment, "a2m"))

>plas_horvu
D.VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG.VD.VSKISQEEYLTAPGETFSVTLTV...PGTYGFYCEPHAGAGMVGKVTV
>plas_chlre
-.VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG.VN.ADAISRDDYLNAPGETYSVKLTA...AGEYGYYCEPHQGAGMVGKIIV
>plas_anava
-.VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKsADlAKSLSHKQLLMSPGQSTSTTFPAdapAGEYTFYCEPHRGAGMVGKITV
>plas_proho
VqIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG.ES.APALSNTKLRIAPGSFYSVTLGT...PGTYSFYCTPHRGAGMVGTITV
>azup_achcy
VhMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG.AE.A-------FKSKINENYKVTFTA...PGVYGVKCTPHYGMGMVGVVEV



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/combined.xmfa", "mauve")

In [None]:
alignments.metadata

{'FormatVersion': 'Mauve1',
 'BackboneFile': 'combined.xmfa.bbcols',
 'File': 'combined.fa'}

In [None]:
alignments.identifiers

['0', '1', '2']

In [None]:
for alignment in alignments:
    print([record.id for record in alignment.sequences])
    print(alignment)
    print("******")


['0', '1', '2']
0                49 AAGCCCTCCTAGCACACACCCGGAGTGG-CCGGGCCGTACTTTCCTTTT  1
1                 0 -------------------------------------------------  0
2                 1 AAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGCTTTCCTTTT 48

******
['0']
0                 0 G 1

******
['0']
0                49 A 50

******
['1']
1                 0 GAAGAGGAAAAGTAGATCCCTGGCGTCCGGAGCTGGGACGT 41

******
['2']
2                 0 C 1

******
['2']
2                48 C 49

******


In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/separate.xmfa", "mauve")

In [None]:
alignments.metadata

{'FormatVersion': 'Mauve1', 'BackboneFile': 'separate.xmfa.bbcols'}

In [None]:
alignments.identifiers

['equCab1.fa', 'canFam2.fa', 'mm9.fa']

In [None]:
for alignment in alignments:
    print([record.id for record in alignment.sequences])
    print(alignment)
    print("******")


['equCab1.fa', 'canFam2.fa', 'mm9.fa']
equCab1.f        50 TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC  0
canFam2.f         0 CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC 49
mm9.fa           19 ---------------------------------GGATCTACTTTTCCTCTTC  0

******
['mm9.fa']
mm9.fa           19 CTGGCGTCCGGAGCTGGGACGT 41

******


In [None]:
from io import StringIO
stream = StringIO()
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/separate.xmfa", "mauve")
Align.write(alignments, stream, "mauve")

2

In [None]:
print(stream.getvalue())

#FormatVersion Mauve1
#Sequence1File	equCab1.fa
#Sequence1Format	FastA
#Sequence2File	canFam2.fa
#Sequence2Format	FastA
#Sequence3File	mm9.fa
#Sequence3Format	FastA
#BackboneFile	separate.xmfa.bbcols
> 1:1-50 - equCab1.fa
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
> 2:1-49 + canFam2.fa
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
> 3:1-19 - mm9.fa
---------------------------------GGATCTACTTTTCCTCTTC
=
> 3:20-41 + mm9.fa
CTGGCGTCCGGAGCTGGGACGT
=



In [None]:
stream = StringIO()
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/separate.xmfa", "mauve")
metadata = alignments.metadata
identifiers = alignments.identifiers
alignments = list(alignments)  # this drops the attributes

In [None]:
Align.write(alignments, stream, "mauve", metadata=metadata, identifiers=identifiers)

2

In [None]:
print(stream.getvalue())

#FormatVersion Mauve1
#Sequence1File	equCab1.fa
#Sequence1Format	FastA
#Sequence2File	canFam2.fa
#Sequence2Format	FastA
#Sequence3File	mm9.fa
#Sequence3Format	FastA
#BackboneFile	separate.xmfa.bbcols
> 1:1-50 - equCab1.fa
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
> 2:1-49 + canFam2.fa
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
> 3:1-19 - mm9.fa
---------------------------------GGATCTACTTTTCCTCTTC
=
> 3:20-41 + mm9.fa
CTGGCGTCCGGAGCTGGGACGT
=



In [None]:
alignments = Align.Alignments(alignments)
alignments.metadata = metadata
alignments.identifiers = identifiers
stream = StringIO()
Align.write(alignments, stream, "mauve", metadata=metadata, identifiers=identifiers)

2

In [None]:
print(stream.getvalue())

#FormatVersion Mauve1
#Sequence1File	equCab1.fa
#Sequence1Format	FastA
#Sequence2File	canFam2.fa
#Sequence2Format	FastA
#Sequence3File	mm9.fa
#Sequence3Format	FastA
#BackboneFile	separate.xmfa.bbcols
> 1:1-50 - equCab1.fa
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
> 2:1-49 + canFam2.fa
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
> 3:1-19 - mm9.fa
---------------------------------GGATCTACTTTTCCTCTTC
=
> 3:20-41 + mm9.fa
CTGGCGTCCGGAGCTGGGACGT
=



In [None]:
alignment = alignments[0]
print(alignment.format("mauve", metadata=metadata, identifiers=identifiers))

> 1:1-50 - equCab1.fa
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
> 2:1-49 + canFam2.fa
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
> 3:1-19 - mm9.fa
---------------------------------GGATCTACTTTTCCTCTTC
=



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ex1.sam", "sam")
alignment = next(alignments)

In [None]:
alignment.flag

69

In [None]:
bin(69)

'0b1000101'

In [None]:
bin(4)

'0b100'

In [None]:
if alignment.flag & 4:
    print("unmapped")
else:
    print("mapped")

unmapped


In [None]:
alignment.sequences

[None,
 SeqRecord(seq=Seq('CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA'), id='EAS56_57:6:190:289:82', name='<unknown name>', description='', dbxrefs=[])]

In [None]:
alignment.target is None

True

In [None]:
alignment = next(alignments)
if alignment.flag & 4:
    print("unmapped")
else:
    print("mapped")


mapped


In [None]:
alignment.target

SeqRecord(seq=None, id='chr1', name='<unknown name>', description='', dbxrefs=[])

In [None]:
format(alignment, "sam")

'EAS56_57:6:190:289:82\t137\tchr1\t100\t73\t35M\t=\t100\t0\tAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCAC\t<<<<<<;<<<<<<<<<<;<<;<<<<;8<6;9;;2;\tMF:i:64\tAq:i:0\tNM:i:0\tUQ:i:0\tH0:i:1\tH1:i:0\n'

In [None]:
format(alignment, "bed")

'chr1\t99\t134\tEAS56_57:6:190:289:82\t0\t+\t99\t134\t0\t1\t35,\t0,\n'

In [None]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
target = SeqRecord(Seq(None, length=1575), id="chr1")
alignment.target = target
format(alignment, "psl")  

'35\t0\t0\t0\t0\t0\t0\t0\t+\tEAS56_57:6:190:289:82\t35\t0\t35\tchr1\t1575\t99\t134\t1\t35,\t0,\t99,\n'

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ex1_header.sam", "sam")
len(alignments.targets)

2

In [None]:
alignments.targets[0]

SeqRecord(seq=Seq(None, length=1575), id='chr1', name='<unknown name>', description='', dbxrefs=[])

In [None]:
alignments.targets[1]

SeqRecord(seq=Seq(None, length=1584), id='chr2', name='<unknown name>', description='', dbxrefs=[])

In [None]:
alignments.metadata

{'HD': {'VN': '1.3', 'SO': 'coordinate'}}

In [None]:
alignment = next(alignments)  # the unmapped sequence; skip it
alignment = next(alignments)
format(alignment, "psl")

'35\t0\t0\t0\t0\t0\t0\t0\t+\tEAS56_57:6:190:289:82\t35\t0\t35\tchr1\t1575\t99\t134\t1\t35,\t0,\t99,\n'

In [None]:
print(alignment)

chr1             99 ??????????????????????????????????? 134
                  0 ...................................  35
EAS56_57:         0 AGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCAC  35



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/sam1.sam", "sam")
for alignment in alignments:
    if not alignment.flag & 4:  # Skip the unmapped lines
        break

alignment 

<Alignment object (2 rows x 101 columns) at 0x10da5ce30>

In [None]:
print(alignment)

1            136185 TCACGGTGGCCTGTTGAGGCAGGGGGTCACGCTGACCTCTGTCCGCGTGGGAGGGGCCGG
                  0 |||||||||||||||||||||||||.||||||||||||||.||.||||||||||||||||
HWI-1KL12         0 TCACGGTGGCCTGTTGAGGCAGGGGCTCACGCTGACCTCTCTCGGCGTGGGAGGGGCCGG

1            136245 TGTGAGGCAAGGGCTCACACTGACCTCTCTCAGCGTGGGAG 136286
                 60 ||||||||||||||||||.||||||||||||.|||||||||    101
HWI-1KL12        60 TGTGAGGCAAGGGCTCACGCTGACCTCTCTCGGCGTGGGAG    101



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/dna_rna.sam", "sam")
alignment = next(alignments)
alignment = next(alignments)
alignment = next(alignments)
print(format(alignment, "SAM"))

NR_111921.1	0	chr3	48663768	0	46M1827N82M3376N76M12H	*	0	0	CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGCTGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGGAGACGGGAGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATATTAAAAAA	*	AS:i:1000	NM:i:0



In [None]:
print(alignment.coordinates)

[[48663767 48663813 48665640 48665722 48669098 48669174]
 [       0       46       46      128      128      204]]


In [None]:
alignment.operations

bytearray(b'MNMNM')

In [None]:
alignment.query.annotations["hard_clip_right"]

12

In [None]:
from io import StringIO
import numpy as np

from Bio import Align
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

alignments = Align.Alignments()

seq1 = Seq(None, length=10000)
target1 = SeqRecord(seq1, id="chr1")
seq2 = Seq(None, length=15000)
target2 = SeqRecord(seq2, id="chr2")
alignments.targets = [target1, target2]
alignments.metadata = {"HD": {"VN": "1.3", "SO": "coordinate"}}

seqA = Seq(None, length=20)
queryA = SeqRecord(seqA, id="readA")
sequences = [target1, queryA]
coordinates = np.array([[4300, 4320], [0, 20]])
alignment = Align.Alignment(sequences, coordinates)
alignments.append(alignment)

seqB = Seq(None, length=25)
queryB = SeqRecord(seqB, id="readB")
sequences = [target1, queryB]
coordinates = np.array([[5900, 5925], [25, 0]])
alignment = Align.Alignment(sequences, coordinates)
alignments.append(alignment)

seqC = Seq(None, length=40)
queryC = SeqRecord(seqC, id="readC")
sequences = [target2, queryC]
coordinates = np.array([[12300, 12318], [0, 18]])
alignment = Align.Alignment(sequences, coordinates)
alignments.append(alignment)

stream = StringIO()
Align.write(alignments, stream, "sam")

3

In [None]:
print(stream.getvalue())

@HD	VN:1.3	SO:coordinate
@SQ	SN:chr1	LN:10000
@SQ	SN:chr2	LN:15000
readA	0	chr1	4301	255	20M	*	0	0	*	*
readB	16	chr1	5901	255	25M	*	0	0	*	*
readC	0	chr2	12301	255	18M22S	*	0	0	*	*



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/bed12.bed", "bed")
len(alignments)

2

In [None]:
for alignment in alignments:
    print(alignment.coordinates)


[[1000 1567 4512 5000]
 [   0  567  567 1055]]
[[2000 2433 5601 6000]
 [ 832  399  399    0]]


In [None]:
alignment.target

SeqRecord(seq=Seq(None, length=9223372036854775807), id='chr22', name='<unknown name>', description='', dbxrefs=[])

In [None]:
alignment.query

SeqRecord(seq=Seq(None, length=832), id='mRNA2', name='<unknown name>', description='', dbxrefs=[])

In [None]:
alignment.score

900.0

In [None]:
alignment.thickStart

2300

In [None]:
alignment.thickEnd

5960

In [None]:
alignment.itemRgb

'0,255,0'

In [None]:
print(format(alignment, "bed"))

chr22	2000	6000	mRNA2	900	-	2300	5960	0,255,0	2	433,399,	0,3601,



In [None]:
print(alignment.format("bed")) 

chr22	2000	6000	mRNA2	900	-	2300	5960	0,255,0	2	433,399,	0,3601,



In [None]:
print(alignment.format("bed", 3))

chr22	2000	6000



In [None]:
print(alignment.format("bed", 6))

chr22	2000	6000	mRNA2	900	-



In [None]:
Align.write(alignments, "mybed3file.bed", "bed", bedN=3)

2

In [None]:
Align.write(alignments, "mybed6file.bed", "bed", bedN=6)

2

In [None]:
Align.write(alignments, "mybed12file.bed", "bed")

2

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/dna_rna.bb", "bigbed")
len(alignments)

4

In [None]:
print(alignments.declaration) 

table bed
"Browser Extensible Data"
(
   string          chrom;          "Reference sequence chromosome or scaffold"
   uint            chromStart;     "Start position in chromosome"
   uint            chromEnd;       "End position in chromosome"
   string          name;           "Name of item."
   uint            score;          "Score (0-1000)"
   char[1]         strand;         "+ or - for strand"
   uint            thickStart;     "Start of where display should be thick (start codon)"
   uint            thickEnd;       "End of where display should be thick (stop codon)"
   uint            reserved;       "Used as itemRgb as of 2004-11-22"
   int             blockCount;     "Number of blocks"
   int[blockCount] blockSizes;     "Comma separated list of block sizes"
   int[blockCount] chromStarts;    "Start positions relative to chromStart"
)



In [None]:
alignments.targets

[SeqRecord(seq=Seq(None, length=198295559), id='chr3', name='<unknown name>', description='<unknown description>', dbxrefs=[])]

In [None]:
alignment = next(alignments)
alignment.target.id

'chr3'

In [None]:
alignment.query.id

'NR_046654.1'

In [None]:
alignment.coordinates

array([[42530895, 42530958, 42532020, 42532095, 42532563, 42532606],
       [     181,      118,      118,       43,       43,        0]])

In [None]:
alignment.thickStart

42530895

In [None]:
alignment.thickEnd

42532606

In [None]:
print(alignment)

chr3       42530895 ????????????????????????????????????????????????????????????
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_046654       181 ????????????????????????????????????????????????????????????

chr3       42530955 ????????????????????????????????????????????????????????????
                 60 |||---------------------------------------------------------
NR_046654       121 ???---------------------------------------------------------

chr3       42531015 ????????????????????????????????????????????????????????????
                120 ------------------------------------------------------------
NR_046654       118 ------------------------------------------------------------

chr3       42531075 ????????????????????????????????????????????????????????????
                180 ------------------------------------------------------------
NR_046654       118 ------------------------------------------------------------

chr3       42531135 ????

In [None]:
for alignment in alignments:
    print(format(alignment, "bed"))  


chr3	42530895	42532606	NR_046654.1	1000	-	42530895	42532606	0	3	63,75,43,	0,1125,1668,

chr3	42530895	42532606	NR_046654.1_modified	978	-	42530895	42532606	0	5	27,36,17,56,43,	0,27,1125,1144,1668,

chr3	48663767	48669174	NR_111921.1	1000	+	48663767	48669174	0	3	46,82,76,	0,1873,5331,

chr3	48663767	48669174	NR_111921.1_modified	972	+	48663767	48669174	0	5	28,17,76,6,76,	0,29,1873,1949,5331,



In [None]:
selected_alignments = alignments.search("chr3", 48000000, 49000000)
for alignment in selected_alignments:
    print(alignment.query.id)


NR_111921.1
NR_111921.1_modified


In [None]:
Align.write(alignments, "output.bb", "bigbed")

In [None]:
Align.write(alignments, "output.bb", "bigbed", bedN=6)

In [None]:
from Bio.Align import bigbed
with open("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/bedExample2.as") as stream:
    autosql_data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(autosql_data)
type(declaration)

Bio.Align.bigbed.AutoSQLTable

In [None]:
print(declaration)

table hg18KGchr7
"UCSC Genes for chr7 with color plus GeneSymbol and SwissProtID"
(
   string  chrom;         "Reference sequence chromosome or scaffold"
   uint    chromStart;    "Start position of feature on chromosome"
   uint    chromEnd;      "End position of feature on chromosome"
   string  name;          "Name of gene"
   uint    score;         "Score"
   char[1] strand;        "+ or - for strand"
   uint    thickStart;    "Coding region start"
   uint    thickEnd;      "Coding region end"
   uint    reserved;      "Green on + strand, Red on - strand"
   string  geneSymbol;    "Gene Symbol"
   string  spID;          "SWISS-PROT protein Accession number"
)



In [None]:
#Align.write(
#   alignments,
#  "output.bb",
# "bigbed",
# bedN=9,
# declaration=declaration,
# extraIndex=["name", "geneSymbol"],
#)

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/dna_rna.psl", "psl")
alignments.metadata

{'psLayout version': '3'}

In [None]:
for alignment in alignments:
    print(alignment.target.id, alignment.query.id)

chr3 NR_046654.1
chr3 NR_046654.1_modified
chr3 NR_111921.1
chr3 NR_111921.1_modified


In [None]:
alignment.matches

162

In [None]:
alignment.misMatches

2

In [None]:
alignment.repMatches

39

In [None]:
alignment.nCount

0

In [None]:
alignment.target

SeqRecord(seq=Seq(None, length=198295559), id='chr3', name='<unknown name>', description='', dbxrefs=[])

In [None]:
alignment.query

SeqRecord(seq=Seq(None, length=220), id='NR_111921.1_modified', name='<unknown name>', description='', dbxrefs=[])

In [None]:
print(format(alignment, "bed"))

chr3	48663767	48669174	NR_111921.1_modified	0	+	48663767	48669174	0	5	28,17,76,6,76,	0,29,1873,1949,5331,



In [None]:
print(format(alignment, "psl")) 

162	2	39	0	1	2	3	5204	+	NR_111921.1_modified	220	3	208	chr3	198295559	48663767	48669174	5	28,17,76,6,76,	3,31,48,126,132,	48663767,48663796,48665640,48665716,48669098,



In [None]:
import numpy as np
from Bio import Align
query = "GGTGGGGG"
target = "AAAAAAAggggGGNGAAAAA"
coordinates = np.array([[0, 7, 15, 20], [0, 0, 8, 8]])
alignment = Align.Alignment([target, query], coordinates)
print(alignment)

target            0 AAAAAAAggggGGNGAAAAA 20
                  0 -------....||.|----- 20
query             0 -------GGTGGGGG-----  8



In [None]:
line = alignment.format("psl")
print(line)

6	1	0	1	0	0	0	0	+	query	8	0	8	target	20	7	15	1	8,	0,	7,



In [None]:
line = alignment.format("psl", mask="lower")
print(line) 

3	1	3	1	0	0	0	0	+	query	8	0	8	target	20	7	15	1	8,	0,	7,



In [None]:
line = alignment.format("psl", mask="lower", wildcard=None)
print(line)

3	2	3	0	0	0	0	0	+	query	8	0	8	target	20	7	15	1	8,	0,	7,



In [None]:
print(format(alignment, "psl")) 

6	1	0	1	0	0	0	0	+	query	8	0	8	target	20	7	15	1	8,	0,	7,



In [None]:
line = f"The alignment in PSL format is '{alignment:psl}'."
print(line) 

The alignment in PSL format is '6	1	0	1	0	0	0	0	+	query	8	0	8	target	20	7	15	1	8,	0,	7,
'.


In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/dna_rna.psl.bb", "bigpsl")
len(alignments)

4

In [None]:
print(alignments.declaration) 

table bigPsl
"bigPsl pairwise alignment"
(
   string          chrom;           "Reference sequence chromosome or scaffold"
   uint            chromStart;      "Start position in chromosome"
   uint            chromEnd;        "End position in chromosome"
   string          name;            "Name or ID of item, ideally both human readable and unique"
   uint            score;           "Score (0-1000)"
   char[1]         strand;          "+ or - indicates whether the query aligns to the + or - strand on the reference"
   uint            thickStart;      "Start of where display should be thick (start codon)"
   uint            thickEnd;        "End of where display should be thick (stop codon)"
   uint            reserved;        "RGB value (use R,G,B string in input file)"
   int             blockCount;      "Number of blocks"
   int[blockCount] blockSizes;      "Comma separated list of block sizes"
   int[blockCount] chromStarts;     "Start positions relative to chromStart"
   uint    

In [None]:
alignments.targets

[SeqRecord(seq=Seq(None, length=198295559), id='chr3', name='<unknown name>', description='<unknown description>', dbxrefs=[])]

In [None]:
for alignment in alignments:
    print(alignment.target.id, alignment.query.id)

chr3 NR_046654.1
chr3 NR_046654.1_modified
chr3 NR_111921.1
chr3 NR_111921.1_modified


In [None]:
alignment.coordinates

array([[48663767, 48663795, 48663796, 48663813, 48665640, 48665716,
        48665716, 48665722, 48669098, 48669174],
       [       3,       31,       31,       48,       48,      124,
             126,      132,      132,      208]])

In [None]:
alignment.thickStart

48663767

In [None]:
alignment.thickEnd

48669174

In [None]:
alignment.matches

162

In [None]:
alignment.misMatches

2

In [None]:
alignment.repMatches

39

In [None]:
alignment.nCount

0

In [None]:
print(format(alignment, "bed"))

chr3	48663767	48669174	NR_111921.1_modified	1000	+	48663767	48669174	0	5	28,17,76,6,76,	0,29,1873,1949,5331,



In [None]:
print(format(alignment, "psl"))

162	2	39	0	1	2	3	5204	+	NR_111921.1_modified	220	3	208	chr3	198295559	48663767	48669174	5	28,17,76,6,76,	3,31,48,126,132,	48663767,48663796,48665640,48665716,48669098,



In [None]:
selected_alignments = alignments.search("chr3", 48000000, 49000000)
for alignment in selected_alignments:
    print(alignment.query.id)

NR_111921.1
NR_111921.1_modified


In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ucsc_test.maf", "maf")

In [None]:
alignments.metadata 

{'name': 'euArc',
 'visibility': 'pack',
 'mafDot': 'off',
 'frames': 'multiz28wayFrames',
 'speciesOrder': ['hg16', 'panTro1', 'baboon', 'mm4', 'rn3'],
 'description': 'A sample alignment',
 'MAF Version': '1',
 'Scoring': 'tba.v8',
 'Comments': ['tba.v8 (((human chimp) baboon) (mouse rat))',
  'multiz.v7',
  'maf_project.v5 _tba_right.maf3 mouse _tba_C',
  'single_cov2.v4 single_cov2 /dev/stdin']}

In [None]:
alignment = next(alignments)
alignment.score

23262.0

In [None]:
{seq.id: len(seq) for seq in alignment.sequences} 

{'hg16.chr7': 158545518,
 'panTro1.chr6': 161576975,
 'baboon': 4622798,
 'mm4.chr6': 151104725,
 'rn3.chr4': 187371129}

In [None]:
print(alignment.coordinates)

[[27578828 27578829 27578831 27578831 27578850 27578850 27578866]
 [28741140 28741141 28741143 28741143 28741162 28741162 28741178]
 [  116834   116835   116837   116837   116856   116856   116872]
 [53215344 53215344 53215346 53215347 53215366 53215366 53215382]
 [81344243 81344243 81344245 81344245 81344264 81344267 81344283]]


In [None]:
print(alignment)

hg16.chr7  27578828 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG 27578866
panTro1.c  28741140 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG 28741178
baboon       116834 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG   116872
mm4.chr6   53215344 -AATGGGAATGTTAAGCAAACGA---ATTGTCTCTCAGTGTG 53215382
rn3.chr4   81344243 -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG 81344283



In [None]:
print(format(alignment, "phylip"))

5 42
hg16.chr7 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
panTro1.chAAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
baboon    AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG
mm4.chr6  -AATGGGAATGTTAAGCAAACGA---ATTGTCTCTCAGTGTG
rn3.chr4  -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ucsc_mm9_chr10.maf", "maf")
for i in range(10):
    alignment = next(alignments)

alignment.score

19159.0

In [None]:
print(alignment)

mm9.chr10   3014644 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG   3014689
hg18.chr6 155029206 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT 155029160
panTro2.c 157519257 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT 157519211
calJac1.C      6182 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT      6228
loxAfr1.s      9407 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC      9373



In [None]:
alignment.sequences[0].annotations

{}

In [None]:
alignment.sequences[1].annotations

{'leftStatus': 'I', 'leftCount': 9085, 'rightStatus': 'C', 'rightCount': 0}

In [None]:
alignment.sequences[2].annotations["quality"]

'9999999999999999999999999999999999999999999999'

In [None]:
alignment.sequences[4].annotations["quality"]

'9999969989999999999999998699989997'

In [None]:
alignment.annotations["empty"]

[(SeqRecord(seq=Seq(None, length=498454), id='tupBel1.scaffold_114895.1-498454', name='', description='', dbxrefs=[]),
  (331078, 326933),
  'I'),
 (SeqRecord(seq=Seq(None, length=100002), id='echTel1.scaffold_288249', name='', description='', dbxrefs=[]),
  (87661, 95225),
  'I'),
 (SeqRecord(seq=Seq(None, length=359464), id='otoGar1.scaffold_334.1-359464', name='', description='', dbxrefs=[]),
  (178247, 175316),
  'I'),
 (SeqRecord(seq=Seq(None, length=174210431), id='ponAbe2.chr6', name='', description='', dbxrefs=[]),
  (158048983, 158040939),
  'I')]

In [None]:
print(format(alignment, "MAF"))

a score=19159.000000
s mm9.chr10                         3014644   45 + 129993255 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG
s hg18.chr6                        15870786   46 - 170899992 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
i hg18.chr6                        I 9085 C 0
s panTro2.chr6                     16389355   46 - 173908612 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
q panTro2.chr6                                               99999999999999999999999-9999999999999999999-9999
i panTro2.chr6                     I 9106 C 0
s calJac1.Contig6394                   6182   46 +    133105 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT
i calJac1.Contig6394               N 0 C 0
s loxAfr1.scaffold_75566               1167   34 -     10574 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC
q loxAfr1.scaffold_75566                                     ------------99999699899-9999999999999869998-9997
i loxAfr1.scaffold_75566           N 0 C 0
e tupBel1.scaffold_114895.1-498

In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ucsc_test.bb", "bigmaf")
len(alignments)

3

In [None]:
print(alignments.declaration)

table bedMaf
"Bed3 with MAF block"
(
   string  chrom;         "Reference sequence chromosome or scaffold"
   uint    chromStart;    "Start position in chromosome"
   uint    chromEnd;      "End position in chromosome"
   lstring mafBlock;      "MAF block"
)



In [None]:
alignments.reference

'hg16'

In [None]:
alignments.targets

[SeqRecord(seq=Seq(None, length=158545518), id='hg16.chr7', name='<unknown name>', description='<unknown description>', dbxrefs=[])]

In [None]:
alignment = next(alignments)
alignment.score

23262.0

In [None]:
{seq.id: len(seq) for seq in alignment.sequences}

{'hg16.chr7': 158545518,
 'panTro1.chr6': 161576975,
 'baboon': 4622798,
 'mm4.chr6': 151104725,
 'rn3.chr4': 187371129}

In [None]:
print(alignment.coordinates)

[[27578828 27578829 27578831 27578831 27578850 27578850 27578866]
 [28741140 28741141 28741143 28741143 28741162 28741162 28741178]
 [  116834   116835   116837   116837   116856   116856   116872]
 [53215344 53215344 53215346 53215347 53215366 53215366 53215382]
 [81344243 81344243 81344245 81344245 81344264 81344267 81344283]]


In [None]:
print(alignment)

hg16.chr7  27578828 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG 27578866
panTro1.c  28741140 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG 28741178
baboon       116834 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG   116872
mm4.chr6   53215344 -AATGGGAATGTTAAGCAAACGA---ATTGTCTCTCAGTGTG 53215382
rn3.chr4   81344243 -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG 81344283



In [None]:
print(format(alignment, "phylip"))

5 42
hg16.chr7 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
panTro1.chAAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
baboon    AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG
mm4.chr6  -AATGGGAATGTTAAGCAAACGA---ATTGTCTCTCAGTGTG
rn3.chr4  -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG



In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/ucsc_mm9_chr10.bb", "bigmaf")
for i in range(10):
    alignment = next(alignments)

alignment.score

19159.0

In [None]:
print(alignment)

mm9.chr10   3014644 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG   3014689
hg18.chr6 155029206 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT 155029160
panTro2.c 157519257 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT 157519211
calJac1.C      6182 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT      6228
loxAfr1.s      9407 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC      9373



In [None]:
print(format(alignment, "MAF"))

a score=19159.000000
s mm9.chr10                         3014644   45 + 129993255 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG
s hg18.chr6                        15870786   46 - 170899992 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
i hg18.chr6                        I 9085 C 0
s panTro2.chr6                     16389355   46 - 173908612 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
q panTro2.chr6                                               99999999999999999999999-9999999999999999999-9999
i panTro2.chr6                     I 9106 C 0
s calJac1.Contig6394                   6182   46 +    133105 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT
i calJac1.Contig6394               N 0 C 0
s loxAfr1.scaffold_75566               1167   34 -     10574 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC
q loxAfr1.scaffold_75566                                     ------------99999699899-9999999999999869998-9997
i loxAfr1.scaffold_75566           N 0 C 0
e tupBel1.scaffold_114895.1-498

In [None]:
alignment.sequences[1].annotations

{'leftStatus': 'I', 'leftCount': 9085, 'rightStatus': 'C', 'rightCount': 0}

In [None]:
alignment.sequences[2].annotations["quality"]

'9999999999999999999999999999999999999999999999'

In [None]:
alignment.sequences[4].annotations["quality"]

'9999969989999999999999998699989997'

In [None]:
alignment.annotations["empty"]  

[(SeqRecord(seq=Seq(None, length=498454), id='tupBel1.scaffold_114895.1-498454', name='', description='', dbxrefs=[]),
  (331078, 326933),
  'I'),
 (SeqRecord(seq=Seq(None, length=100002), id='echTel1.scaffold_288249', name='', description='', dbxrefs=[]),
  (87661, 95225),
  'I'),
 (SeqRecord(seq=Seq(None, length=359464), id='otoGar1.scaffold_334.1-359464', name='', description='', dbxrefs=[]),
  (178247, 175316),
  'I'),
 (SeqRecord(seq=Seq(None, length=174210431), id='ponAbe2.chr6', name='', description='', dbxrefs=[]),
  (158048983, 158040939),
  'I')]

In [None]:
selected_alignments = alignments.search("mm9.chr10", 3018000, 3019000)
for alignment in selected_alignments:
    start, end = alignment.coordinates[0, 0], alignment.coordinates[0, -1]
    print(start, end)


3017743 3018161
3018161 3018230
3018230 3018359
3018359 3018482
3018482 3018644
3018644 3018822
3018822 3018932
3018932 3019271


In [None]:
from Bio import Align
alignments = Align.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/psl_34_001.chain", "chain")

In [None]:
for alignment in alignments:
    print(alignment.target.id, alignment.query.id)

chr4 hg18_dna
chr1 hg18_dna
chr2 hg18_dna
chr9 hg19_dna
chr8 hg19_dna
chr22 hg19_dna
chr2 hg19_dna
chr19 hg19_dna
chr18 hg19_dna
chr18 hg19_dna
chr13 hg19_dna
chr1 hg19_dna
chr1 hg19_dna
chr4 hg19_dna
chr22 hg19_dna
chr2 hg19_dna
chr19 hg19_dna
chr19 hg19_dna
chr10 hg19_dna
chr10 hg19_dna
chr1 hg19_dna
chr1 hg19_dna


In [None]:
alignments = iter(alignments)
for i in range(7):
    alignment = next(alignments)


In [None]:
alignment.score

41.0

In [None]:
alignment.annotations["id"]

'7'

In [None]:
print(format(alignment, "chain"))  

chain 41 chr2 243199373 + 183925984 183926028 hg19_dna 50 + 1 49 7
6	0	4
38




In [None]:
alignment.coordinates

array([[183925984, 183925990, 183925990, 183926028],
       [        1,         7,        11,        49]])

In [None]:
print(alignment)

chr2      183925984 ??????----?????????????????????????????????????? 183926028
                  0 ||||||----||||||||||||||||||||||||||||||||||||||        48
hg19_dna          1 ????????????????????????????????????????????????        49



In [None]:
print(format(alignment, "BED"))

chr2	183925984	183926028	hg19_dna	41	+	183925984	183926028	0	2	6,38,	0,6,



In [None]:
print(format(alignment, "PSL"))

44	0	0	0	1	4	0	0	+	hg19_dna	50	1	49	chr2	243199373	183925984	183926028	2	6,38,	1,11,	183925984,183925990,



In [None]:
print(format(alignment, "exonerate"))

vulgar: hg19_dna 1 49 + chr2 183925984 183926028 + 41 M 6 6 G 4 0 M 38 38



In [None]:
print(alignment.format("exonerate", "cigar"))

cigar: hg19_dna 1 49 + chr2 183925984 183926028 + 41 M 6 I 4 M 38



In [None]:
print(format(alignment, "sam")) 

hg19_dna	0	chr2	183925985	255	1S6M4I38M1S	*	0	0	*	*	AS:i:41	id:A:7

