In [1]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [2]:

from Bio.Blast import NCBIWWW, NCBIXML
#code is just from chatgpt to demonstrate using Biopython to connect to NCBI db,
#runs blast search, constraint here is just to make sure that constraint is in the
#given input sequence. It is just an option. We can customize with our alignment algorithm later on.
def blast_search(sequence, constraint):
    if constraint in sequence:
        print(f"The constraint '{constraint}' is present in the sequence. Proceeding with BLAST search...\n")

        try:
            print("Performing BLAST search on the input sequence. This may take a few moments...")
            result_handle = NCBIWWW.qblast("blastp", "nr", sequence)
            print("BLAST search complete. Parsing results...")

            blast_records = NCBIXML.parse(result_handle)
            results_found = False

            for record in blast_records:
                if not record.alignments:
                    print("No significant alignments found.")
                    return

                results_found = True
                print("\n--- Top BLAST Matches ---")
                for i, alignment in enumerate(record.alignments[:3], start=1):
                    print(f"\nAlignment {i} Title: {alignment.title}")
                    for hsp in alignment.hsps:
                        print("Score:", hsp.score)
                        print("E-value:", hsp.expect)
                        print("Query Alignment:", hsp.query[:60])
                        print("Match Alignment:", hsp.match[:60])
                        print("Subject Alignment:", hsp.sbjct[:60])
                        print("\n---\n")
            if not results_found:
                print("No significant matches found.")

        except Exception as e:
            print("Error during BLAST search:", e)
    else:
        print(f"The constraint '{constraint}' is not found in the provided sequence. No BLAST search performed.")

sequence_input = input("Enter the sequence to search for (example: 'MVLSPADKTNVKAAW'): ")
motif_input = input("Enter the constraint that should be present (example: 'PADK'): ")

blast_search(sequence_input, motif_input)

Enter the sequence to search for (example: 'MVLSPADKTNVKAAW'): GIVEQCCTSICSLYQLENYCN
Enter the constraint that should be present (example: 'PADK'): CCTS
The constraint 'CCTS' is present in the sequence. Proceeding with BLAST search...

Performing BLAST search on the input sequence. This may take a few moments...
BLAST search complete. Parsing results...

--- Top BLAST Matches ---

Alignment 1 Title: ref|XP_026338704.1| insulin [Ursus arctos] >ref|XP_040495583.1| insulin [Ursus maritimus]
Score: 114.0
E-value: 1.33462e-05
Query Alignment: GIVEQCCTSICSLYQLENYCN
Match Alignment: GIVEQCCTSICSLYQLENYCN
Subject Alignment: GIVEQCCTSICSLYQLENYCN

---


Alignment 2 Title: ref|XP_039081000.1| insulin [Hyaena hyaena]
Score: 114.0
E-value: 1.39052e-05
Query Alignment: GIVEQCCTSICSLYQLENYCN
Match Alignment: GIVEQCCTSICSLYQLENYCN
Subject Alignment: GIVEQCCTSICSLYQLENYCN

---


Alignment 3 Title: ref|XP_045626705.1| insulin [Ursus americanus]
Score: 113.0
E-value: 1.56948e-05
Query Alignment: GIVEQCC