In [10]:
# we will use module - pairwisealigner

from Bio import SeqIO,Align
aligner = Align.PairwiseAligner()  # creates an object which performs various operations
type(aligner)

Bio.Align.PairwiseAligner

In [17]:
# input sequence files

seq_1 = SeqIO.read("./homosapiens_pseudogene_3.fasta","fasta")
seq_2 = SeqIO.read("./homosapiens_pseudogene_4.fasta","fasta")

# align the sequences

seqAligner = aligner.align(seq_1.seq,seq_2.seq)

In [20]:
# show the alignment - only 1 run

for alignment in seqAligner:
    print(alignment)
    break

AGGTCCT-GAGC-AAA-G-G-AGGACTTG--GTATGAA-C-AAG-TGGGTTTG-G----TGC-CG-CCATTGCTTCTGGC-TTG---G-GT----C
----||--|-|--|---|-|-|||--|||--||--||--|-|||-|---|--|-|----|||-|--|||--||--||||--|-|---|-||----|
----CC-CG-G-GA--TGTGGAGG--TTGCAGT--GA-GCCAAGAT---T--GCGCCACTGCAC-TCCA--GC--CTGG-GT-GACAGAGTAAGAC



In [22]:
# show score 

print(f"SCORE : {aligner.score(seq_1.seq,seq_2.seq)} ")

SCORE : 44.0 


## LOCAL ALIGNMENT

In [24]:
aligner.mode = "local"
seqAligner_LOCAL = aligner.align(seq_1.seq,seq_2.seq)

# show the alignment - only 1 run

for alignment in seqAligner_LOCAL:
    print(alignment)
    break

# show score

print(f"SCORE : {aligner.score(seq_1.seq,seq_2.seq)} ")

# type of algorithm

print(f"ALGORITHM USED : {aligner.algorithm}")

AGGTCCT-GAGC-AAA-G-G-AGGACTTG--GTATGAA-C-AAG-TGGGTTTG-G----TGC-CG-CCATTGCTTCTGGC-TTG---G-GT----C
    ||--|-|--|---|-|-|||--|||--||--||--|-|||-|---|--|-|----|||-|--|||--||--||||--|-|---|-||----|
    CC-CG-G-GA--TGTGGAGG--TTGCAGT--GA-GCCAAGAT---T--GCGCCACTGCAC-TCCA--GC--CTGG-GT-GACAGAGTAAGAC

SCORE : 44.0 
ALGORITHM USED : Smith-Waterman


## GLOBAL ALIGNMENT

In [25]:
aligner.mode = "global"
seqAligner_LOCAL = aligner.align(seq_1.seq,seq_2.seq)

# show the alignment - only 1 run

for alignment in seqAligner_LOCAL:
    print(alignment)
    break

# show score

print(f"SCORE : {aligner.score(seq_1.seq,seq_2.seq)} ")

# type of algorithm

print(f"ALGORITHM USED : {aligner.algorithm}")

AGGTCCT-GAGC-AAA-G-G-AGGACTTG--GTATGAA-C-AAG-TGGGTTTG-G----TGC-CG-CCATTGCTTCTGGC-TTG---G-GT----C
----||--|-|--|---|-|-|||--|||--||--||--|-|||-|---|--|-|----|||-|--|||--||--||||--|-|---|-||----|
----CC-CG-G-GA--TGTGGAGG--TTGCAGT--GA-GCCAAGAT---T--GCGCCACTGCAC-TCCA--GC--CTGG-GT-GACAGAGTAAGAC

SCORE : 44.0 
ALGORITHM USED : Needleman-Wunsch


## SUBSTITUTION MATRIX + SCORING

In [26]:
# set scores

aligner.match_score = +5
aligner.mismatch_score = -1
aligner.gap_score = -0.5

# align

seqAligner = aligner.align(seq_1.seq,seq_2.seq)
seqAligner.score


194.0

In [3]:
# import substitution matrix
from Bio.Align import substitution_matrices

# load up substitution matrix

subst_matrix = substitution_matrices.load()

In [8]:
# Load up BLOSUM62

# check if present 
flag = False

matrixName = input("Enter Matrix Name : ")
if matrixName in subst_matrix:
    flag = True
    print("PROCEED FORWARD")
else:
    print("NOT AVAILABLE")

PROCEED FORWARD


In [9]:
# load up matrix

matrixLoad = substitution_matrices.load(matrixName)
print(matrixLoad)

#  Matrix made by matblas from blosum62.iij
#  * column uses minimum score
#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
#  Blocks Database = /data/blocks_5.0/blocks.dat
#  Cluster Percentage: >= 62
#  Entropy =   0.6979, Expected =  -0.5209
     A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V    B    Z    X    *
A  4.0 -1.0 -2.0 -2.0  0.0 -1.0 -1.0  0.0 -2.0 -1.0 -1.0 -1.0 -1.0 -2.0 -1.0  1.0  0.0 -3.0 -2.0  0.0 -2.0 -1.0  0.0 -4.0
R -1.0  5.0  0.0 -2.0 -3.0  1.0  0.0 -2.0  0.0 -3.0 -2.0  2.0 -1.0 -3.0 -2.0 -1.0 -1.0 -3.0 -2.0 -3.0 -1.0  0.0 -1.0 -4.0
N -2.0  0.0  6.0  1.0 -3.0  0.0  0.0  0.0  1.0 -3.0 -3.0  0.0 -2.0 -3.0 -2.0  1.0  0.0 -4.0 -2.0 -3.0  3.0  0.0 -1.0 -4.0
D -2.0 -2.0  1.0  6.0 -3.0  0.0  2.0 -1.0 -1.0 -3.0 -4.0 -1.0 -3.0 -3.0 -1.0  0.0 -1.0 -4.0 -3.0 -3.0  4.0  1.0 -1.0 -4.0
C  0.0 -3.0 -3.0 -3.0  9.0 -3.0 -4.0 -3.0 -3.0 -1.0 -1.0 -3.0 -1.0 -2.0 -3.0 -1.0 -1.0 -2.0 -2.0 -1.0 -3.0 -3.0 -2.0 -4.0
Q -1.0  1.0  0.0  0.

In [11]:
# load up protein fasta

protSeq_1 = SeqIO.read("./prot_homo_sapien_1.fasta","fasta")
protSeq_2 = SeqIO.read("./prot_homo_sapien_2.fasta","fasta")

In [18]:
# initialise aligner

aligner.substitution_matrix = matrixLoad

# alignment show

protAlignment = aligner.align(protSeq_1.seq,protSeq_2.seq)
print("Sequence Alignment : ")

for alignmentShow in protAlignment:
    print(alignmentShow)
    break

# score the amino sequence
 
protScore = aligner.score(protSeq_1.seq,protSeq_2.seq)

print(f"Sequence Alignment Score : \n {protScore}")

Sequence Alignment : 
MEPGAA--A-R--AWS-L----LW---LL----L---P--LLGPVC--A-------S-GPRTLVLL-D-NLN---LRE-T--HSLFFRSLKDRAFELTFKTADDPSLSLI
|---||--|-|--|---|----|----||----|---|--|--|----|-------.-||-------|-|.----|-.-.--|-|---|---|-|----.----------
M---AAVLALRVVA--GLAAAAL-VAMLLEHYGLAGQPSPL--P--RPAPPRRPHPAPGP------GDSNI-FWGL-QISDIH-L---S---R-F----R----------

Sequence Alignment Score : 
 132.0
