# Scoring benchmark


## Params

In [1]:
MUTATIONS = [0.4, 0.2, 0.1, 0.05, 0.01]
REMOVALS = [0.4 , 0.2, 0.1, 0.05, 0.01]
SEQUENCE_LENGTH = 1000

## Imports

In [2]:
import sys
import os
import numpy as np

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)
from src.mutations import mutate_seq, random_base

In [3]:
!pip install numba



## Base sequences

In [4]:
base_sequence = []
for j in range(SEQUENCE_LENGTH):
    base_sequence.append(random_base())

mutated_sequences = []
for i in MUTATIONS:
    amount_mutations = int(SEQUENCE_LENGTH * i)
    mutated_sequences.append(list(mutate_seq(base_sequence, amount_mutations)[0]))

shortened_sequences = []
for j in REMOVALS:
    amount_removals = int(SEQUENCE_LENGTH * j)
    # We remove random positions from the base sequence
    shortened_seq = base_sequence.copy()
    for k in range(amount_removals):
        pos_to_remove = np.random.randint(0, len(shortened_seq))
        shortened_seq.pop(pos_to_remove)
    shortened_sequences.append(shortened_seq)



In [5]:
print(type(base_sequence))
print(type(mutated_sequences))
print(type(mutated_sequences[0]))
print(type(shortened_sequences))

<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>


## Import algorithims

In [6]:
from tests.algorithms_all import needleman_wunsch, needleman_wunsch_diag_numpy, needleman_wunsch_numba, needleman_wunsch_diagbounded_numpy

In [7]:
algorithims = [
    needleman_wunsch,
    needleman_wunsch_diag_numpy,
    needleman_wunsch_numba,
    needleman_wunsch_diagbounded_numpy
]

results = {}

for algorithm in algorithims:
    print(f"Benchmarking {algorithm.__name__} with mutations:")
    for seq in mutated_sequences:
        score = algorithm(base_sequence, seq)
        print(f"Score: {score[0]}")
        results.setdefault(algorithm.__name__, []).append(score[0])
    
    print(f"Benchmarking {algorithm.__name__} with removals:")
    for seq in shortened_sequences:
        score = algorithm(base_sequence, seq)
        print(f"Score: {score[0]}")
        results.setdefault(algorithm.__name__, []).append(score[0])

Benchmarking needleman_wunsch with mutations:
Score: -7090
Score: -7048
Score: -7036
Score: -7014
Score: -6994
Benchmarking needleman_wunsch with removals:
Score: -200
Score: 400
Score: 700
Score: 850
Score: 970
Benchmarking needleman_wunsch_diag_numpy with mutations:
Score: -7090.0
Score: -7048.0
Score: -7036.0
Score: -7014.0
Score: -6994.0
Benchmarking needleman_wunsch_diag_numpy with removals:
Score: -200.0
Score: 400.0
Score: 700.0
Score: 850.0
Score: 970.0
Benchmarking needleman_wunsch_numba with mutations:
Score: -7090
Score: -7048
Score: -7036
Score: -7014
Score: -6994
Benchmarking needleman_wunsch_numba with removals:
Score: -200
Score: 400
Score: 700
Score: 850
Score: 970
Benchmarking needleman_wunsch_diagbounded_numpy with mutations:
Score: -7683.0
Score: -7687.0
Score: -7697.0
Score: -7681.0
Score: -7667.0
Benchmarking needleman_wunsch_diagbounded_numpy with removals:
Score: -200.0
Score: 400.0
Score: 700.0
Score: 850.0
Score: 970.0


## Compare results

In [8]:
!pip install pandas



In [9]:
import pandas as pd

# Recreate DataFrame from results (ensures consistent ordering)
df = pd.DataFrame.from_dict(results, orient='index')

# Build column names: first len(MUTATIONS) are mutation cases, next len(REMOVALS) are removal cases
mut_cols = [f"mut-{v}" for v in MUTATIONS]
rem_cols = [f"rem-{v}" for v in REMOVALS]
new_cols = mut_cols + rem_cols

# Handle potential length mismatch defensively
if len(new_cols) != df.shape[1]:
    if len(new_cols) > df.shape[1]:
        new_cols = new_cols[: df.shape[1]]
    else:
        extra = [f"case_{i+1}" for i in range(len(new_cols), df.shape[1])]
        new_cols = new_cols + extra

df.columns = new_cols

# Display the table
display(df)


Unnamed: 0,mut-0.4,mut-0.2,mut-0.1,mut-0.05,mut-0.01,rem-0.4,rem-0.2,rem-0.1,rem-0.05,rem-0.01
needleman_wunsch,-7090.0,-7048.0,-7036.0,-7014.0,-6994.0,-200.0,400.0,700.0,850.0,970.0
needleman_wunsch_diag_numpy,-7090.0,-7048.0,-7036.0,-7014.0,-6994.0,-200.0,400.0,700.0,850.0,970.0
needleman_wunsch_numba,-7090.0,-7048.0,-7036.0,-7014.0,-6994.0,-200.0,400.0,700.0,850.0,970.0
needleman_wunsch_diagbounded_numpy,-7683.0,-7687.0,-7697.0,-7681.0,-7667.0,-200.0,400.0,700.0,850.0,970.0
