In [1]:
# Install ClustalW
!apt-get -y install clustalw

# Install MAFFT
!apt-get -y install mafft

# Install Muscle
!apt-get -y install muscle


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Suggested packages:
  clustalx seaview
The following NEW packages will be installed:
  clustalw
0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.
Need to get 275 kB of archives.
After this operation, 818 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 clustalw amd64 2.1+lgpl-7 [275 kB]
Fetched 275 kB in 0s (1,992 kB/s)
Selecting previously unselected package clustalw.
(Reading database ... 123632 files and directories currently installed.)
Preparing to unpack .../clustalw_2.1+lgpl-7_amd64.deb ...
Unpacking clustalw (2.1+lgpl-7) ...
Setting up clustalw (2.1+lgpl-7) ...
Processing triggers for man-db (2.10.2-1) ...
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fonts-lato libauthen-sasl-perl libclone-perl libdata-dump-perl

In [4]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.2/3.2 MB[0m [31m123.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m59.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [10]:
from Bio.Align.Applications import ClustalwCommandline, MafftCommandline, MuscleCommandline
from Bio import AlignIO

# Replace 'your_sequences_file.fasta' with the name of your uploaded file
sequences_file = "/content/msa.fasta"

# 1. Running ClustalW
clustalw_cline = ClustalwCommandline("clustalw", infile=sequences_file)
stdout, stderr = clustalw_cline()
alignment_clustalw = AlignIO.read("msa.aln", "clustal")
print("ClustalW Alignment:")
print(alignment_clustalw)

# 2. Running MAFFT
mafft_cline = MafftCommandline("mafft", input=sequences_file)
stdout, stderr = mafft_cline()
alignment_mafft = AlignIO.read("msa.fasta", "fasta")
print("MAFFT Alignment:")
print(alignment_mafft)

# 3. Running Muscle
muscle_cline = MuscleCommandline(input=sequences_file)
stdout, stderr = muscle_cline()
alignment_muscle = AlignIO.read("msa.fasta", "fasta")
print("Muscle Alignment:")
print(alignment_muscle)


ClustalW Alignment:
Alignment with 3 rows and 42 columns
MHTIQTLKVSFYNQTGYSFPKTIRNKYLRGDNLRVAYFVEKA Human_Protein_X
MHTIQTCKVSFYNQTGYSFPKTIRNKYLRGDNLRVAYFVEKA Mouse_Protein_X
MHTIQTCKVSFYNQTGYSFPKTIRNKYLKGNNRRVAYFVEKA ZebraFish_Protein_X
MAFFT Alignment:
Alignment with 3 rows and 42 columns
MHTIQTLKVSFYNQTGYSFPKTIRNKYLRGDNLRVAYFVEKA Human_Protein_X
MHTIQTCKVSFYNQTGYSFPKTIRNKYLRGDNLRVAYFVEKA Mouse_Protein_X
MHTIQTCKVSFYNQTGYSFPKTIRNKYLKGNNRRVAYFVEKA ZebraFish_Protein_X
Muscle Alignment:
Alignment with 3 rows and 42 columns
MHTIQTLKVSFYNQTGYSFPKTIRNKYLRGDNLRVAYFVEKA Human_Protein_X
MHTIQTCKVSFYNQTGYSFPKTIRNKYLRGDNLRVAYFVEKA Mouse_Protein_X
MHTIQTCKVSFYNQTGYSFPKTIRNKYLKGNNRRVAYFVEKA ZebraFish_Protein_X


Comparing the accuracy, time and memory usage.

To compare the accuracy, speed, and memory usage of these methods, we can measure the time it takes for each tool to run and how much memory it consumes.



In [11]:
import time

# For ClustalW
start_time = time.time()
clustalw_cline()
clustalw_time = time.time() - start_time

# For MAFFT
start_time = time.time()
mafft_cline()
mafft_time = time.time() - start_time

# For Muscle
start_time = time.time()
muscle_cline()
muscle_time = time.time() - start_time

print(f"ClustalW Time: {clustalw_time} seconds")
print(f"MAFFT Time: {mafft_time} seconds")
print(f"Muscle Time: {muscle_time} seconds")


ClustalW Time: 0.007634401321411133 seconds
MAFFT Time: 0.14915752410888672 seconds
Muscle Time: 0.0061724185943603516 seconds


In [None]:
!pip install memory-profiler


In [None]:
from memory_profiler import memory_usage

def run_clustalw():
    clustalw_cline()
def run_muscle():
    muscle_cline()
def run_mafft():
    mafft_cline()

mem_usage_clustalw = memory_usage(run_clustalw)
mem_usage_muscle = memory_usage(run_muscle)
mem_usage_mafft = memory_usage(run_mafft)

print(f"ClustalW memory usage: {max(mem_usage_clustalw)} MiB")
print(f"Muscle memory usage: {max(mem_usage_muscle)} MiB")
print(f"MAFFT  memory usage: {max(mem_usage_mafft)} MiB")
