In [8]:
# Solving of the sequence alingment problem using divide and conquer 
## This function only calculates the best score, not the alignments 
### Define the costs for match, mismatch, insertion, and deletion
const MATCH_SCORE = 1
const MISMATCH_PENALTY = -1
const GAP_PENALTY = -1

### divide and conquer sequence alignment function
function sequence_alignment(seq1::String, seq2::String, m::Int, n::Int)
    # Base cases
    if m == 0
        return n * GAP_PENALTY
    elseif n == 0
        return m * GAP_PENALTY
    end

    # Recurrence relation
    if seq1[m] == seq2[n]
        match_mismatch = sequence_alignment(seq1, seq2, m-1, n-1) + MATCH_SCORE
    else
        match_mismatch = sequence_alignment(seq1, seq2, m-1, n-1) + MISMATCH_PENALTY
    end

    # Calculate the minimum score considering all operations
    insertion = sequence_alignment(seq1, seq2, m, n-1) + GAP_PENALTY
    deletion = sequence_alignment(seq1, seq2, m-1, n) + GAP_PENALTY

    return max(match_mismatch, insertion, deletion)
end

sequence_alignment (generic function with 1 method)

In [1]:
# Solving sequence alignment using dynamic programming approach
### implementation of needleman_wunsch algorithm
function needleman_wunsch(seq1::String, seq2::String; match_score=1, mismatch_penalty=-1, gap_penalty=-1)
    len1 = length(seq1)
    len2 = length(seq2)

    # Initialize scoring matrix
    score_matrix = zeros(Int, len1+1, len2+1)

    # Initialize gap penalties in scoring matrix
    for i in 1:len1+1
        score_matrix[i, 1] = gap_penalty * (i - 1)
    end
    for j in 1:len2+1
        score_matrix[1, j] = gap_penalty * (j - 1)
    end

    # Fill the scoring matrix
    for i in 2:len1+1
        for j in 2:len2+1
            match = score_matrix[i-1, j-1] + (seq1[i-1] == seq2[j-1] ? match_score : mismatch_penalty)
            delete = score_matrix[i-1, j] + gap_penalty
            insert = score_matrix[i, j-1] + gap_penalty
            score_matrix[i, j] = max(match, delete, insert)
        end
    end

    # Traceback to find the optimal alignment
    align1 = ""
    align2 = ""
    i, j = len1+1, len2+1

    while i > 1 || j > 1
        current_score = score_matrix[i, j]
        if i > 1 && j > 1 && current_score == score_matrix[i-1, j-1] + (seq1[i-1] == seq2[j-1] ? match_score : mismatch_penalty)
            align1 = string(seq1[i-1], align1)
            align2 = string(seq2[j-1], align2)
            i -= 1
            j -= 1
        elseif i > 1 && current_score == score_matrix[i-1, j] + gap_penalty
            align1 = string(seq1[i-1], align1)
            align2 = string("-", align2)
            i -= 1
        else
            align1 = string("-", align1)
            align2 = string(seq2[j-1], align2)
            j -= 1
        end
    end

    return (align1, align2, score_matrix[len1+1, len2+1])
end

needleman_wunsch (generic function with 1 method)

In [2]:
### Special print function to colorcode the match, mismatch and gaps
using Crayons
function print_alignment(align1::String, align2::String)
    red = Crayon(foreground=:light_red)
    magenta = Crayon(foreground=:magenta)
    cyan=Crayon(foreground=:light_cyan)
    for i in 1:length(align1)
        if align1[i] == align2[i]
            print(cyan(string(align1[i])), "  ")
        elseif align1[i] == '-' || align2[i] == '-'
            print(red(string(align1[i])), "  ")
        else
            print(magenta(string(align1[i])), "  ")
        end
        #sleep(0.2)
    end
    println()
    for i in 1:length(align2)
        if align1[i] == align2[i] 
            print(cyan(string(align1[i])), "  ")
        elseif align1[i] == '-' || align2[i] == '-'
            print(red(string(align2[i])), "  ")
        else
            print(magenta(string(align2[i])), "  ")
        end
        sleep(0.1)
    end
    println()
end


print_alignment (generic function with 1 method)

In [12]:
# an example of divide and conquer function
seq1 = "GATAATTAATAA"
seq2 = "GCATGUATGCTTAA"
m = length(seq1)
n = length(seq2)

@time alignment_score = sequence_alignment(seq1, seq2, m, n)
println("Alignment Score: $alignment_score")

  6.962225 seconds
Alignment Score: 2


In [10]:
# an example of Danamic programming function
seq1 = "GATTACAAAAGGGGATTACGATTTACGATTAGGGCCTAAATT"
seq2 = "GCATGCUATGGAAACGGCATGCTTAAGCCCCAAATGTGCCGGT"

@time alignment = needleman_wunsch(seq1, seq2, match_score=1, mismatch_penalty=-1, gap_penalty=-1)
print_alignment(alignment[1], alignment[2])
println("Score: ", alignment[3])

  0.000040 seconds (102 allocations: 19.469 KiB)
[96mG[39m  [91m-[39m  [96mA[39m  [91mT[39m  [96mT[39m  [35mA[39m  [96mC[39m  [91mA[39m  [91mA[39m  [35mA[39m  [96mA[39m  [91mG[39m  [35mG[39m  [96mG[39m  [96mG[39m  [96mA[39m  [91mT[39m  [35mT[39m  [96mA[39m  [96mC[39m  [96mG[39m  [91mA[39m  [91mT[39m  [35mT[39m  [35mT[39m  [96mA[39m  [35mC[39m  [96mG[39m  [35mA[39m  [96mT[39m  [96mT[39m  [96mA[39m  [35mG[39m  [96mG[39m  [35mG[39m  [96mC[39m  [96mC[39m  [35mT[39m  [96mA[39m  [96mA[39m  [96mA[39m  [91m-[39m  [91m-[39m  [96mT[39m  [91m-[39m  [91m-[39m  [91m-[39m  [91m-[39m  [91m-[39m  [96mT[39m  
[96mG[39m  [91mC[39m  [96mA[39m  [91m-[39m  [96mT[39m  [35mG[39m  [96mC[39m  [91m-[39m  [91m-[39m  [35mU[39m  [96mA[39m  [91m-[39m  [35mT[39m  [96mG[39m  [96mG[39m  [96mA[39m  [91m-[39m  [35mA[39m  [96mA[39m  [96mC[39m  [96mG[39m  [91m-[39m  [91m-[39m  