In [1]:
%%writefile preprocessing.h

#pragma once
#ifndef PREPROCESS_H
#define PREPROCESS_H

void preprocess(char* RefSeq, char* ReadSeq, int ReadLength)
{
    int i, index = 0;

#define ENCODE_BASE(b) \
        ((b) == 'A' || (b) == 'a' ? 0b0001 : \
         (b) == 'C' || (b) == 'c' ? 0b0010 : \
         (b) == 'G' || (b) == 'g' ? 0b0011 : \
         (b) == 'T' || (b) == 't' ? 0b0100 : \
         (b) == 'N' || (b) == 'n' ? 0b0101 : 0x00)

    for (i = 0; i < ReadLength; i += 2) {
        unsigned char base1_r = ENCODE_BASE(ReadSeq[i]);
        unsigned char base1_f = ENCODE_BASE(RefSeq[i]);

        unsigned char base2_r = 0;
        unsigned char base2_f = 0;

        if (i + 1 < ReadLength) {
            base2_r = ENCODE_BASE(ReadSeq[i + 1]);
            base2_f = ENCODE_BASE(RefSeq[i + 1]);
        }
        else {
            base2_r = 0x0F; // Padding for odd length
            base2_f = 0x0F; // Padding for odd length
        }

        // Pack two 4-bit bases into one byte
        ReadSeq[index] = (char)((base1_r << 4) | base2_r);
        RefSeq[index] = (char)((base1_f << 4) | base2_f);
        index++;
    }

#undef ENCODE_BASE
}

#endif // PREPROCESS_H

Overwriting preprocessing.h


In [2]:
%%writefile checkpointfull.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/time.h>
#include "preprocessing.h"

// External assembly function with new signature
extern uint64_t SneakySnake(uint64_t ReadLength, uint8_t* RefSeq, 
                             uint8_t* ReadSeq, uint64_t EditThreshold,
                             uint64_t IterationNo);

// External counters
extern uint64_t global_counter;
extern uint64_t checkpoint_base;
extern uint64_t processed_counter;
extern uint64_t best_diagonal_score;
extern uint64_t main_diagonal_length;
extern uint64_t safety_counter;
extern uint64_t best_edit_distance;
extern uint64_t current_batch_edits;

// Count actual mismatches
int count_mismatches(const char* seq1, const char* seq2, int length) {
    int count = 0;
    for (int i = 0; i < length; i++) {
        if (seq1[i] != seq2[i]) count++;
    }
    return count;
}

void print_detailed_trace(const char* read_seq, const char* ref_seq,
                         uint64_t result, int seq_num, int actual_mismatches,
                         int edit_threshold) {
 //   printf("\n========== Sequence #%d ==========\n", seq_num);
 //   printf("Read: %.50s...\n", read_seq);
 //   printf("Ref:  %.50s...\n", ref_seq);
 //   printf("Length: %lu\n", (uint64_t)strlen(read_seq));
    
    // Check if actually perfect
    int perfect = (actual_mismatches == 0);
 //   printf("\n=== ACTUAL vs REPORTED ===\n");
//    printf("Actual mismatches: %d\n", actual_mismatches);
 //   printf("Actual perfect match: %s\n", perfect ? "YES" : "NO");
 //   printf("Reported edit distance: %lu\n", best_edit_distance);
 //   printf("Match: %s\n", 
   //        (actual_mismatches == best_edit_distance) ? "✓ CORRECT" : "❌ WRONG!");
    
 //   printf("\nCheckpoint State:\n");
  //  printf("  checkpoint_base: %lu\n", checkpoint_base);
  //  printf("  best_diagonal_score: %lu\n", best_diagonal_score);
  //  printf("  current_batch_edits: %lu\n", current_batch_edits);
 //   printf("  best_edit_distance: %lu\n", best_edit_distance);
  //  printf("  processed_counter: %lu (checkpoints passed)\n", processed_counter);
  //  printf("  main_diagonal_length: %lu\n", main_diagonal_length);
  //  printf("  global_counter: %lu\n", global_counter);
  //  printf("  safety_counter: %lu\n", safety_counter);
    
   // printf("\nResult: %s\n", result ? "✓ ACCEPT" : "✗ REJECT");
    
  //  if (best_diagonal_score < strlen(read_seq)) {
 //       printf("  ⚠ Did not reach end (stopped at %lu/%lu)\n",
 //              best_diagonal_score, (uint64_t)strlen(read_seq));
  //  }
    
    // Bug detection - FIXED to respect threshold
//    if (result == 1 && actual_mismatches > edit_threshold) {
 //       printf("\n❌ BUG DETECTED: Accepted sequence with %d mismatches (threshold: %d)!\n", 
  //             actual_mismatches, edit_threshold);
 //   }
 //   if (result == 1 && best_edit_distance != actual_mismatches) {
  //      printf("\n❌ BUG: Edit distance mismatch! Reported %lu but actual is %d\n",
// best_edit_distance, actual_mismatches);
 //   }
}

void process_dataset_debug(const char* filename, int EditThreshold, 
                          int IterationNo, int limit) {
    FILE *file = fopen(filename, "r");
    if (!file) {
        printf("Error: Cannot open file %s\n", filename);
        return;
    }
    
    char line[512];
    int total_pairs = 0;
    int accepted = 0;
    int rejected = 0;
    int bug_count = 0;
    
    printf("========================================\n");
    printf("DEBUG MODE: Checking first %d sequences\n", limit);
    printf("Edit Threshold: %d\n", EditThreshold);
    printf("Max Iterations: %d\n", IterationNo);
    printf("========================================\n");
    
    while (fgets(line, sizeof(line), file)) {
        line[strcspn(line, "\r\n")] = 0;
        
        if (strlen(line) == 0 || line[0] == '#') {
            continue;
        }
        
        char *read_seq = line;
        char *ref_seq = NULL;
        
        char *tab_pos = strchr(line, '\t');
        if (tab_pos) {
            *tab_pos = '\0';
            ref_seq = tab_pos + 1;
        } else {
            char *space_pos = strchr(line, ' ');
            if (space_pos) {
                *space_pos = '\0';
                ref_seq = space_pos + 1;
            }
        }
        
        if (!ref_seq) continue;
        
        int len = strlen(read_seq);
        if (strlen(ref_seq) != len || len > 128 || len == 0) {
            continue;
        }
        
        total_pairs++;
        
        // Check limit
        if (total_pairs > limit) {
            break;
        }
        
        // Count actual mismatches BEFORE preprocessing
        int actual_mismatches = count_mismatches(read_seq, ref_seq, len);
        
        // Create copies
        char* read_copy = malloc(len + 64);
        char* ref_copy = malloc(len + 64);
        char* read_orig = strdup(read_seq);
        char* ref_orig = strdup(ref_seq);
        strcpy(read_copy, read_seq);
        strcpy(ref_copy, ref_seq);
        
        // Preprocess
        preprocess(ref_copy, read_copy, len);
        
        // Reset counters
        global_counter = 0;
        checkpoint_base = 0;
        processed_counter = 0;
        best_diagonal_score = 0;
        main_diagonal_length = 0;
        safety_counter = 0;
        best_edit_distance = 999;
        current_batch_edits = 0;
        
        // Call assembly with new signature:
        // SneakySnake(ReadLength, RefSeq, ReadSeq, EditThreshold, IterationNo)
        uint64_t result = SneakySnake(len,
                                      (uint8_t*)ref_copy, 
                                      (uint8_t*)read_copy,
                                      EditThreshold,
                                      IterationNo);
        
        if (result == 1) {
            accepted++;
        } else {
            rejected++;
        }
        
        // Check for bugs - FIXED to respect threshold
        int has_bug = 0;
        if (result == 1 && actual_mismatches > EditThreshold) {
            has_bug = 1;
            bug_count++;
        }
        if (result == 1 && best_edit_distance != actual_mismatches) {
            has_bug = 1;
            bug_count++;
        }
        
        // Show ALL accepted or sequences with bugs
        if (result == 1 || has_bug) {
            print_detailed_trace(read_orig, ref_orig, result, total_pairs, 
                               actual_mismatches, EditThreshold);
        }
        
        free(read_copy);
        free(ref_copy);
        free(read_orig);
        free(ref_orig);
    }
    
    fclose(file);
    
    // Summary
    printf("\n\n========================================\n");
    printf("DEBUG SUMMARY\n");
    printf("========================================\n");
    printf("Sequences tested: %d\n", total_pairs);
    printf("Accepted: %d\n", accepted);
    printf("Rejected: %d\n", rejected);
    printf("Bugs detected: %d\n", bug_count);
    
    if (bug_count > 0) {
        printf("\n❌ BUGS FOUND! Check output above.\n");
    } else {
        printf("\n✓ No bugs detected in tested sequences.\n");
    }
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        printf("Usage: %s <dataset_file> [edit_threshold] [iteration_no] [limit]\n", argv[0]);
        printf("Example: %s dataset.txt 0 200 100\n", argv[0]);
        printf("  Tests first 100 sequences with threshold=0, max 200 iterations\n");
        return 1;
    }
    
    const char* filename = argv[1];
    int EditThreshold = 0;     // Default to 0 for strict testing
    int IterationNo = 200;     // Default to 200 max iterations
    int limit = 100;           // Default to 100 sequences
    
    if (argc >= 3) {
        EditThreshold = atoi(argv[2]);
    }
    if (argc >= 4) {
        IterationNo = atoi(argv[3]);
    }
    if (argc >= 5) {
        limit = atoi(argv[4]);
    }
    
    process_dataset_debug(filename, EditThreshold, IterationNo, limit);
    
    return 0;
}

Overwriting checkpointfull.c


In [3]:
%%writefile checkpointavxfull.asm

; SneakySnake.asm with IterationNo parameter
; New signature: SneakySnake(ReadLength, RefSeq, ReadSeq, EditThreshold, IterationNo)

default rel
bits 64

section .data
global SneakySnake
global global_counter
global checkpoint_base
global processed_counter
global best_diagonal_score
global main_diagonal_length
global safety_counter
global best_edit_distance
global current_batch_edits

global_counter dq 0
checkpoint_base dq 0
processed_counter dq 0
best_diagonal_score dq 0        
main_diagonal_length dq 0
safety_counter dq 0
best_edit_distance dq 999
current_batch_edits dq 0

section .rodata
align 64
four_mask:  times 64 db 0x0F
f0_mask:    times 64 db 0xF0

section .text
global SneakySnake

SneakySnake:
    push    rbp
    mov     rbp, rsp
    push    rbx
    push    r12
    push    r13
    push    r14
    push    r15
    sub     rsp, 32

    ; NEW PARAMETER ORDER:
    ; rdi = ReadLength
    ; rsi = RefSeq
    ; rdx = ReadSeq
    ; rcx = EditThreshold
    ; r8  = IterationNo
    
    mov     r13, rdi              ; ReadLength
    mov     r12, rsi              ; RefSeq
    mov     r11, rdx              ; ReadSeq
    mov     r10, rcx              ; EditThreshold
    mov     [rbp-8], r8           ; IterationNo (save to stack for later access)

    xor     rax, rax
    mov     [checkpoint_base], rax
    mov     [processed_counter], rax
    mov     [global_counter], rax
    mov     [best_diagonal_score], rax
    mov     [main_diagonal_length], rax
    mov     [safety_counter], rax
    mov     [current_batch_edits], rax
    
    mov     rax, 999
    mov     [best_edit_distance], rax

    mov     r15, r13
    shr     r15, 1
    
    xor     r9, r9
    
.mainloop:
    inc     qword [safety_counter]
    mov     rax, [safety_counter]
    mov     rbx, [rbp-8]          ; Load IterationNo from stack
    cmp     rax, rbx              ; Compare against IterationNo instead of hardcoded 200
    jg      .safety_exit
    
    cmp     r9, r15
    jae     .handle_tail

    xor     rax, rax
    mov     [main_diagonal_length], rax

    vmovdqu8 zmm0, [r11 + r9]
    vmovdqu8 zmm1, [r12 + r9]

    ; ========== MAIN DIAGONAL ==========
    vpandd   zmm4, zmm0, [f0_mask]
    vpandd   zmm5, zmm1, [f0_mask]
    vpcmpeqb k4, zmm4, zmm5
    
    vpandd   zmm2, zmm0, [four_mask]
    vpandd   zmm3, zmm1, [four_mask]
    vpcmpeqb k3, zmm2, zmm3
    
    knotq    k4, k4
    knotq    k3, k3
     
    korq     k5, k4, k3
    ktestq   k5, k5
    jz       .all_matched_main
    
    kmovq    rbx, k4
    tzcnt    rbx, rbx
    shl      rbx, 1

    kmovq    r8, k3
    tzcnt    r8, r8
    lea      r8, [r8*2+1]

    cmp      rbx, r8
    cmovnb   rbx, r8
    
    mov      [global_counter], rbx
    mov      [main_diagonal_length], rbx
    
    mov      rax, r9
    shl      rax, 1
    add      rax, rbx
    
    cmp      rax, r13
    jbe      .main_pos_ok
    mov      rax, r13
.main_pos_ok:
    
    mov      rcx, [best_diagonal_score]
    cmp      rax, rcx
    ja       .main_better
    jb       .skip_main_update
    
    mov      rcx, [best_edit_distance]
    mov      rdx, [current_batch_edits]
    cmp      rdx, rcx
    jae      .skip_main_update
    
.main_better:
    mov      [best_diagonal_score], rax
    mov      rdx, [current_batch_edits]
    mov      [best_edit_distance], rdx
    
.skip_main_update:
    jmp      .check_edits

.all_matched_main:
    mov      rax, r9
    shl      rax, 1
    mov      rcx, r13
    sub      rcx, rax
    
    mov      rbx, r15
    sub      rbx, r9
    shl      rbx, 1
    
    cmp      rcx, rbx
    cmova    rcx, rbx
    cmp      rcx, 128
    jbe      .cap_main
    mov      rcx, 128
.cap_main:
    
    test     rcx, rcx
    jz       .min_advance
    
    mov      [global_counter], rcx
    mov      [main_diagonal_length], rcx
    
    mov      rax, r9
    shl      rax, 1
    add      rax, rcx
    
    cmp      rax, r13
    jbe      .pos_ok_main
    mov      rax, r13
.pos_ok_main:
    
    mov      rbx, [best_diagonal_score]
    cmp      rax, rbx
    ja       .main_all_better
    jb       .skip_main_all
    
    mov      rbx, [best_edit_distance]
    mov      rdx, [current_batch_edits]
    cmp      rdx, rbx
    jae      .skip_main_all
    
.main_all_better:
    mov      [best_diagonal_score], rax
    mov      rdx, [current_batch_edits]
    mov      [best_edit_distance], rdx
    
.skip_main_all:
    jmp      .update_position

.check_edits:
    mov      r14, 1
    
    mov      rax, [main_diagonal_length]
    mov      rsi, r9
    shl      rsi, 1
    add      rsi, rax
    inc      rsi
    
.scan_loop:
    cmp      rsi, r13
    jae      .scan_complete
    
    cmp      r14, 200
    jge      .scan_stopped
    
    mov      rdi, rsi
    shr      rdi, 1
    cmp      rdi, r15
    jae      .scan_stopped
    
    movzx    eax, byte [r11 + rdi]
    movzx    ecx, byte [r12 + rdi]
    
    test     sil, 1
    jz       .scan_even
    
    and      al, 0x0F
    and      cl, 0x0F
    jmp      .scan_compare
    
.scan_even:
    shr      al, 4
    shr      cl, 4
    
.scan_compare:
    cmp      al, cl
    je       .scan_match
    
    inc      r14
    
    mov      rax, [current_batch_edits]
    add      rax, r14
    cmp      rax, r10
    jg       .scan_stopped
    
.scan_match:
    inc      rsi
    jmp      .scan_loop
    
.scan_complete:
    mov      rax, [current_batch_edits]
    add      rax, r14
    
    cmp      rax, r10
    jg       .scan_stopped
    
    mov      [best_diagonal_score], r13
    mov      [best_edit_distance], rax
    
    ; FIX: Check threshold immediately
    cmp      rax, r10
    jg       .rejected
    
    mov      rcx, r13
    mov      rdx, r9
    shl      rdx, 1
    sub      rcx, rdx
    mov      [global_counter], rcx
    
    jmp      .try_standard_diagonals
    
.scan_stopped:
    cmp      rsi, r13
    jbe      .stopped_pos_ok
    mov      rsi, r13
.stopped_pos_ok:
    
    mov      rax, [current_batch_edits]
    add      rax, r14
    
    cmp      rax, r10
    jg       .rejected
    
    mov      [best_edit_distance], rax
    
    mov      rcx, [best_diagonal_score]
    cmp      rsi, rcx
    ja       .stopped_better
    jmp      .try_standard_diagonals
    
.stopped_better:
    mov      [best_diagonal_score], rsi
    
    mov      rcx, rsi
    mov      rdx, r9
    shl      rdx, 1
    sub      rcx, rdx
    cmp      rcx, 0
    jge      .stopped_gc_ok
    xor      rcx, rcx
.stopped_gc_ok:
    mov      [global_counter], rcx
    
.try_standard_diagonals:
    mov      rax, [best_diagonal_score]
    cmp      rax, r13
    jae      .update_position
    
    mov      rdi, [current_batch_edits]
    mov      rax, r10
    sub      rax, rdi
    jle      .update_position
    
    mov      r14, rax
    vmovdqa64 zmm10, zmm4
    vmovdqa64 zmm11, zmm2
    
    xor      rdi, rdi

.edit_loop:
    inc      rdi
    cmp      rdi, r14
    jg       .update_position
    
    mov      rax, [current_batch_edits]
    add      rax, rdi
    cmp      rax, r10
    jg       .update_position
    
    mov      rbx, rdi
    shr      rbx, 1
    mov      r8d, edi
    and      r8d, 1

    ; RIGHT DIAGONAL
    cmp      r9, 0
    jne      .right_not_first
    
    cmp      r8d, 1
    je       .right_first_odd
    
    sub      rsp, 64
    xor      rsi, rsi
    
.right_first_even_pad:
    cmp      rsi, rbx
    jae      .right_first_even_data
    mov      byte [rsp + rsi], 0xFF
    inc      rsi
    jmp      .right_first_even_pad
    
.right_first_even_data:
    cmp      rsi, 64
    jae      .right_first_even_done
    mov      rax, rsi
    sub      rax, rbx
    movzx    ecx, byte [r11 + rax]
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .right_first_even_data
    
.right_first_even_done:
    vmovdqu8 zmm6, [rsp]
    add      rsp, 64
    jmp      .right_compare
    
.right_first_odd:
    cmp      rbx, 0
    je       .right_odd_zero
    
    sub      rsp, 64
    xor      rsi, rsi
    
.right_first_odd_pad:
    cmp      rsi, rbx
    jae      .right_first_odd_nibble
    mov      byte [rsp + rsi], 0xFF
    inc      rsi
    jmp      .right_first_odd_pad
    
.right_first_odd_nibble:
    movzx    eax, byte [r11]
    shr      eax, 4
    or       al, 0xF0
    mov      byte [rsp + rsi], al
    inc      rsi
    
.right_first_odd_data:
    cmp      rsi, 63
    jae      .right_first_odd_done
    
    mov      rax, rsi
    sub      rax, rbx
    dec      rax
    
    movzx    ecx, byte [r11 + rax]
    movzx    r8d, byte [r11 + rax + 1]
    
    and      ecx, 0x0F
    shr      r8d, 4
    and      r8d, 0x0F
    shl      ecx, 4
    or       ecx, r8d
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .right_first_odd_data
    
.right_first_odd_done:
    vmovdqu8 zmm6, [rsp]
    add      rsp, 64
    jmp      .right_compare
    
.right_odd_zero:
    sub      rsp, 64
    movzx    eax, byte [r11 + r9]
    shr      eax, 4
    or       al, 0xF0
    mov      byte [rsp], al
    mov      rsi, 1
    
.right_odd_zero_loop:
    cmp      rsi, 64
    jae      .right_odd_zero_done
    
    mov      rax, r9
    add      rax, rsi
    dec      rax
    
    lea      r8, [rax + 1]
    cmp      r8, r15
    jae      .right_odd_zero_last
    
    movzx    ecx, byte [r11 + rax]
    movzx    r8d, byte [r11 + rax + 1]
    
    and      ecx, 0x0F
    shr      r8d, 4
    and      r8d, 0x0F
    shl      ecx, 4
    or       ecx, r8d
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .right_odd_zero_loop

.right_odd_zero_last:
    movzx    ecx, byte [r11 + rax]
    and      ecx, 0x0F
    or       cl, 0xF0
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .right_odd_zero_loop
    
.right_odd_zero_done:
    vmovdqu8 zmm6, [rsp]
    add      rsp, 64
    jmp      .right_compare

.right_not_first:
    cmp      r9, rbx
    jb       .right_zero_fill
    
    cmp      r8d, 1
    jne      .right_even_load
    cmp      rbx, 0
    je       .right_odd_zero
    
    sub      rsp, 64
    xor      rsi, rsi
    
.right_odd_shift:
    cmp      rsi, 64
    jae      .right_odd_done
    
    mov      rax, r9
    sub      rax, rbx
    add      rax, rsi
    
    cmp      rax, r15
    jae      .right_odd_last
    
    movzx    ecx, byte [r11 + rax]
    movzx    r8d, byte [r11 + rax + 1]
    
    shr      ecx, 4
    and      ecx, 0x0F
    and      r8d, 0x0F
    shl      ecx, 4
    or       ecx, r8d
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .right_odd_shift

.right_odd_last:
    movzx    ecx, byte [r11 + rax]
    shr      ecx, 4
    or       cl, 0xF0
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .right_odd_shift
    
.right_odd_done:
    vmovdqu8 zmm6, [rsp]
    add      rsp, 64
    jmp      .right_compare
    
.right_even_load:
    mov      rax, r9
    sub      rax, rbx
    vmovdqu8 zmm6, [r11 + rax]
    jmp      .right_compare

.right_zero_fill:
    vpxord   zmm6, zmm6, zmm6
    
.right_compare:
    vpandd   zmm7, zmm6, [f0_mask]
    vpandd   zmm8, zmm6, [four_mask]
    
    vpcmpeqb k6, zmm7, zmm10
    vpcmpeqb k7, zmm8, zmm11
    
    knotq    k6, k6
    knotq    k7, k7
    
    korq     k5, k6, k7
    ktestq   k5, k5
    jz       .right_all_matched
    
    kmovq    rax, k6
    tzcnt    rax, rax
    shl      rax, 1
    
    kmovq    rcx, k7
    tzcnt    rcx, rcx
    lea      rcx, [rcx*2+1]
    
    cmp      rax, rcx
    cmovnb   rax, rcx
    
    mov      rcx, [global_counter]
    cmp      rax, rcx
    jae      .skip_right_gc
    mov      [global_counter], rax
.skip_right_gc:
    
    mov      rcx, r9
    shl      rcx, 1
    add      rcx, rax
    cmp      rcx, r13
    jbe      .right_pos_ok
    mov      rcx, r13
.right_pos_ok:
    
    mov      rax, [current_batch_edits]
    add      rax, rdi
    
    mov      rdx, [best_diagonal_score]
    cmp      rcx, rdx
    ja       .right_better
    jb       .left_diagonal
    
    mov      rdx, [best_edit_distance]
    cmp      rax, rdx
    jae      .left_diagonal
    
.right_better:
    mov      [best_diagonal_score], rcx
    mov      [best_edit_distance], rax
    
    ; FIX: Check threshold immediately
    cmp      rax, r10
    jg       .rejected
    
    jmp      .left_diagonal

.right_all_matched:
    mov      rax, r9
    shl      rax, 1
    mov      rcx, r13
    sub      rcx, rax
    
    mov      rbx, r15
    sub      rbx, r9
    shl      rbx, 1
    
    cmp      rcx, rbx
    cmova    rcx, rbx
    cmp      rcx, 128
    jbe      .right_cap
    mov      rcx, 128
.right_cap:
    
    test     rcx, rcx
    jz       .left_diagonal
    
    mov      rax, [global_counter]
    cmp      rcx, rax
    jbe      .skip_right_all_gc
    mov      [global_counter], rcx
.skip_right_all_gc:
    
    mov      rax, r9
    shl      rax, 1
    add      rax, rcx
    cmp      rax, r13
    jbe      .right_all_pos_ok
    mov      rax, r13
.right_all_pos_ok:
    
    mov      rcx, [current_batch_edits]
    add      rcx, rdi
    
    mov      rdx, [best_diagonal_score]
    cmp      rax, rdx
    ja       .right_all_better
    jb       .left_diagonal
    
    mov      rdx, [best_edit_distance]
    cmp      rcx, rdx
    jae      .left_diagonal
    
.right_all_better:
    mov      [best_diagonal_score], rax
    mov      [best_edit_distance], rcx
    
    ; FIX: Check threshold immediately
    cmp      rcx, r10
    jg       .rejected

.left_diagonal:
    lea      rax, [r9 + rbx]
    cmp      rax, r15
    jae      .left_zero_fill
    
    cmp      r8d, 0
    je       .left_even_load
    
    sub      rsp, 64
    xor      rsi, rsi
    
.left_odd_shift:
    cmp      rsi, 63
    jae      .left_odd_done
    
    lea      rax, [r9 + rbx]
    add      rax, rsi
    
    movzx    ecx, byte [r11 + rax]
    movzx    r8d, byte [r11 + rax + 1]
    
    and      ecx, 0x0F
    shr      r8d, 4
    and      r8d, 0x0F
    shl      ecx, 4
    or       ecx, r8d
    mov      byte [rsp + rsi], cl
    inc      rsi
    jmp      .left_odd_shift

.left_odd_done:
    vmovdqu8 zmm6, [rsp]
    add      rsp, 64
    jmp      .left_compare
    
.left_even_load:
    vmovdqu8 zmm6, [r11 + rax]
    jmp      .left_compare

.left_zero_fill:
    vpxord   zmm6, zmm6, zmm6
    
.left_compare:
    vpandd   zmm7, zmm6, [f0_mask]
    vpandd   zmm8, zmm6, [four_mask]
    
    vpcmpeqb k6, zmm7, zmm10
    vpcmpeqb k7, zmm8, zmm11
    
    knotq    k6, k6
    knotq    k7, k7
    
    korq     k5, k6, k7
    ktestq   k5, k5
    jz       .left_all_matched
    
    kmovq    rax, k6
    tzcnt    rax, rax
    shl      rax, 1
    
    kmovq    rcx, k7
    tzcnt    rcx, rcx
    lea      rcx, [rcx*2+1]
    
    cmp      rax, rcx
    cmovnb   rax, rcx
    
    mov      rcx, [global_counter]
    cmp      rax, rcx
    jae      .skip_left_gc
    mov      [global_counter], rax
.skip_left_gc:
    
    mov      rcx, r9
    shl      rcx, 1
    add      rcx, rax
    cmp      rcx, r13
    jbe      .left_pos_ok
    mov      rcx, r13
.left_pos_ok:
    
    mov      rax, [current_batch_edits]
    add      rax, rdi
    
    mov      rdx, [best_diagonal_score]
    cmp      rcx, rdx
    ja       .left_better
    jb       .edit_loop
    
    mov      rdx, [best_edit_distance]
    cmp      rax, rdx
    jae      .edit_loop
    
.left_better:
    mov      [best_diagonal_score], rcx
    mov      [best_edit_distance], rax
    
    ; FIX: Check threshold immediately
    cmp      rax, r10
    jg       .rejected
    
    jmp      .edit_loop

.left_all_matched:
    mov      rax, r9
    shl      rax, 1
    mov      rcx, r13
    sub      rcx, rax
    
    mov      rbx, r15
    sub      rbx, r9
    shl      rbx, 1
    
    cmp      rcx, rbx
    cmova    rcx, rbx
    cmp      rcx, 128
    jbe      .left_cap
    mov      rcx, 128
.left_cap:
    
    test     rcx, rcx
    jz       .edit_loop
    
    mov      rax, [global_counter]
    cmp      rcx, rax
    jbe      .skip_left_all_gc
    mov      [global_counter], rcx
.skip_left_all_gc:
    
    mov      rax, r9
    shl      rax, 1
    add      rax, rcx
    cmp      rax, r13
    jbe      .left_all_pos_ok
    mov      rax, r13
.left_all_pos_ok:
    
    mov      rcx, [current_batch_edits]
    add      rcx, rdi
    
    mov      rdx, [best_diagonal_score]
    cmp      rax, rdx
    ja       .left_all_better
    jb       .edit_loop
    
    mov      rdx, [best_edit_distance]
    cmp      rcx, rdx
    jae      .edit_loop
    
.left_all_better:
    mov      [best_diagonal_score], rax
    mov      [best_edit_distance], rcx
    
    ; FIX: Check threshold immediately
    cmp      rcx, r10
    jg       .rejected
    
    jmp      .edit_loop

.update_position:
    inc      qword [processed_counter]
    
    mov      rax, [global_counter]
    test     rax, rax
    jz       .min_advance
    
    cmp      rax, 128
    jbe      .advance_ok
    mov      rax, 2
.advance_ok:
    
    inc      rax
    shr      rax, 1
    add      r9, rax
    
    mov      rax, r9
    shl      rax, 1
    cmp      rax, r13
    cmova    rax, r13
    mov      [checkpoint_base], rax
    
    mov      rax, [checkpoint_base]
    mov      rbx, [best_diagonal_score]
    cmp      rax, rbx
    jbe      .update_batch_edits
    
    jmp      .mainloop
    
.update_batch_edits:
    mov      rax, [best_edit_distance]
    cmp      rax, 999
    je       .mainloop
    
    ; CRITICAL FIX: Check if best_edit_distance exceeds threshold
    cmp      rax, r10
    jg       .rejected
    
    mov      [current_batch_edits], rax
    jmp      .mainloop

.min_advance:
    inc      r9
    mov      rax, r9
    shl      rax, 1
    cmp      rax, r13
    cmova    rax, r13
    mov      [checkpoint_base], rax
    
    mov      rax, [checkpoint_base]
    mov      rbx, [best_diagonal_score]
    cmp      rax, rbx
    jbe      .min_update_batch_edits
    jmp      .mainloop
    
.min_update_batch_edits:
    mov      rax, [best_edit_distance]
    cmp      rax, 999
    je       .mainloop
    
    ; CRITICAL FIX: Check threshold here too
    cmp      rax, r10
    jg       .rejected
    
    mov      [current_batch_edits], rax
    jmp      .mainloop

.handle_tail:
    ; Check if we reached the end
    mov      rax, [best_diagonal_score]
    cmp      rax, r13
    jb       .rejected
    
    ; Check if within threshold
    mov      rbx, [best_edit_distance]
    cmp      rbx, r10
    jle      .accepted

.rejected:
    xor      rax, rax
    jmp      .end

.safety_exit:
    xor      rax, rax
    jmp      .end

.accepted:
    mov      rax, 1

.end:
    add      rsp, 32
    pop      r15
    pop      r14
    pop      r13
    pop      r12
    pop      rbx
    leave
    ret

Overwriting checkpointavxfull.asm


In [4]:
!nasm -f elf64 checkpointavxfull.asm -o checkpointavxfull.o && echo "Assembly OK"
!gcc -c checkpointfull.c -o checkpointfull.o -mavx512f -mavx512bw && echo "C compilation OK"
!gcc checkpointavxfull.o checkpointfull.o -o checkpoint1 -mavx512f -mavx512bw && echo "Linking OK"
!./checkpoint1 ERR240727_1_E2_30000Pairs.txt 0 200 30000
!./checkpoint1 ERR240727_1_E2_30000Pairs.txt 1 200 30000

Assembly OK
C compilation OK
Linking OK
DEBUG MODE: Checking first 30000 sequences
Edit Threshold: 0
Max Iterations: 200


DEBUG SUMMARY
Sequences tested: 30000
Accepted: 243
Rejected: 29757
Bugs detected: 0

✓ No bugs detected in tested sequences.
DEBUG MODE: Checking first 30000 sequences
Edit Threshold: 1
Max Iterations: 200


DEBUG SUMMARY
Sequences tested: 30000
Accepted: 674
Rejected: 29326
Bugs detected: 0

✓ No bugs detected in tested sequences.
