In [69]:
import pysam

def compute_bam_accuracy(bam_path):
  bam = pysam.AlignmentFile(bam_path, "rb")
  accuracies = []

  for read in bam:
    if read.is_unmapped:
      continue

    nm = read.get_tag("NM")  # edit distance
    aln_len = read.query_alignment_length  # aligned length (excludes soft clips)
    
    if aln_len == 0:
      continue
    
    acc = 1 - nm / aln_len
    accuracies.append(acc)

  bam.close()

  if accuracies:
    avg_acc = sum(accuracies) / len(accuracies)
    print(f"Average alignment accuracy: {avg_acc:.6f}")
    return avg_acc
  else:
    print("No aligned reads found.")
    return 0.0


In [72]:
compute_bam_accuracy("/vol/bitbucket/bl1821/new1/hac.bam")

Average alignment accuracy: 0.920452


0.9204516111431507

In [71]:
import pysam

def compute_bam_accuracy(bam_path):
  def get_cigar_aln_length(cigar_tuples):
    # CIGAR operation codes:
    # 0: M (alignment match)
    # 1: I (insertion to reference)
    # 2: D (deletion from reference)
    # 7: = (sequence match)
    # 8: X (sequence mismatch)
    aligned_ops = {0, 1, 2, 7, 8}
    return sum(length for op, length in cigar_tuples if op in aligned_ops)

  bam = pysam.AlignmentFile(bam_path, "rb")
  accuracies = []

  for read in bam:
    if read.is_unmapped:
      continue
    if not read.has_tag("NM") or read.cigartuples is None:
      continue

    nm = read.get_tag("NM")
    aln_len = get_cigar_aln_length(read.cigartuples)

    if aln_len == 0:
      continue

    acc = 1 - nm / aln_len
    accuracies.append(acc)

  bam.close()

  if accuracies:
    avg_acc = sum(accuracies) / len(accuracies)
    print(f"Average alignment accuracy: {avg_acc:.6f}")
    return avg_acc
  else:
    print("No aligned reads with valid CIGAR/NM tags.")
    return 0.0
