# Testing Find_Best_Dataset

This notebook should allow the testing of improvements to the `_find_best_dataset` function.  Make local changes to the function in this notebook, add more `TestPair`s of sequences (good, bad, mismatched, etc), and run the tests to see if the returned alignment is actually the best alignment.

## Imports and setup

In [1]:
# Allow import of module from directory above
import sys
sys.path.append('..')
from pyllelic import quma

# Other imports
from typing import Tuple, NamedTuple

In [2]:
# Initialize dummy object to gain access to class functions
quma_obj = quma.Quma(">name\nGTGCTGTCGTC\n", ">query\nGTGTCTGTGCCACACA\n")

In [3]:
class TestPair(NamedTuple):  # Create named tuple for holding sequences to test in pairs
    gseq: str
    qseq: str

## Function to change and test

In [4]:
# TODO: Method to change/fix
def _find_best_dataset(ffres: quma.Result, frres: quma.Result) -> Tuple[quma.Result, int]:
        """Helper to find best data returned.

        Args:
            ffres (Result): quma result from forward alignment
            frres (Result): quma result from reverse alignment

        Returns:
            Tuple[Result, int]: best quma result and direction
        """

        # Find best dataset: FIXME

        fres: quma.Result
        fdir: int
        if ffres.aliLen > frres.aliLen:
            fres = ffres
            fdir = 1
        else:
            fres = frres
            fdir = -1
        # print(f"Forward:\n{ffres}\n\nRevese:\n{frres}\n")
        # if ffres["aliMis"] > frres["aliMis"]:
        #     print("Cond 1")
        #     fres = frres
        #     fdir = -1
        # elif ffres["aliMis"] < frres["aliMis"]:
        #     print("Cond 2")
        #     fres = ffres
        #     fdir = 1
        # elif ffres["perc"] > frres["perc"]:
        #     print("Cond 3")
        #     fres = ffres
        #     fdir = 1
        # elif ffres["perc"] < frres["perc"]:
        #     print("Cond 4")
        #     fres = frres
        #     fdir = -1
        # elif ffres["unconv"] > frres["unconv"]:
        #     print("Cond 5")
        #     fres = frres
        #     fdir = -1
        # elif ffres["unconv"] < frres["unconv"]:
        #     print("Cond 6")
        #     fres = ffres
        #     fdir = 1
        # elif ffres["pconv"] < frres["pconv"]:
        #     print("Cond 7")
        #     fres = frres
        #     fdir = -1
        # elif ffres["pconv"] > frres["pconv"]:
        #     print("Cond 8")
        #     fres = ffres
        #     fdir = 1
        # else:
        #     print("Cond 9")
        #     fres = ffres
        #     fdir = 1

        # print(f"fres: {fres},\nfdir: {fdir}")
        return fres, fdir


## Test Data

In [5]:
# TODO: Extend with a variety of tests with various mismatch conditions, etc

tests = {
    "good-fwd": TestPair(gseq="GTGCTGTCGTC", qseq="GTGTCTGTGCCACACA"),
    "good-rev": TestPair(gseq="GTGCTGTCGTC", qseq="TGTGTGGCACAGACAC"),
    "good-fwd-converted": TestPair(gseq="GCGCGCGTGCTGTCGTC", qseq="GCGCGTGTGTCTGTGCCACACA"),
}

## Function to perform test of modified _find_best_dataset function

In [6]:
def test_sequence_pair(g_sequence, q_sequence, quma_obj):
    qfilepF = quma_obj._fasta_make(q_sequence, "queryF")
    qfilepR = quma_obj._fasta_make(quma_obj._rev_comp(q_sequence), "queryR")
    gfilepF = quma_obj._fasta_make(g_sequence, "genomeF")

    fwd_result = quma_obj._align_seq_and_generate_stats(qfilepF, gfilepF)
    rev_result = quma_obj._align_seq_and_generate_stats(qfilepR, gfilepF)

    # Use local _find_best_dataset from above, not quma_obj method
    result, final_direction = _find_best_dataset(fwd_result, rev_result)

    if final_direction == 1:
        return f"Forward is best\n{repr(result)}"
    return f"Reverse is best\n{repr(result)}"

## Run Tests

In [7]:
for k, v in tests.items():
    print(f"Testing: {k}")
    print(test_sequence_pair(v.gseq, v.qseq, quma_obj))

Testing: good-fwd
Forward is best
Result(qAli='GTG-CTGTCGTC', gAli='GTGTCTGT-GCC', val='-', perc=83.3, pconv=0, gap=1, menum=0, unconv=0, conv=0, match=10, aliMis=2, aliLen=12)
Testing: good-rev
Reverse is best
Result(qAli='GTG-CTGTCGTC', gAli='GTGTCTGT-GCC', val='-', perc=83.3, pconv=0, gap=1, menum=0, unconv=0, conv=0, match=10, aliMis=2, aliLen=12)
Testing: good-fwd-converted
Forward is best
Result(qAli='GCGCGCGTG-CTGTCGTC', gAli='GCGCGTGTGTCTGT-GCC', val='11', perc=83.3, pconv=100.0, gap=1, menum=2, unconv=0, conv=2, match=15, aliMis=3, aliLen=18)
