# Script to create tables needed to load sfigures

**Author:** Mercedes Dalman

# sfigure 6 

In [1]:
import pysam
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
import pysam
import numpy as np
import pickle

bam = "mapped_to_CONC_PE/SAMPLE_100261_1_CONC.PE.sorted.bam"
ref = "CON_C"
n_bins = 10

frag_starts = []
frag_spans = []

with pysam.AlignmentFile(bam, "rb") as bf:
    ref_len = bf.get_reference_length(ref)

    for r in bf.fetch(ref):
        if not r.is_read1:
            continue
        if r.is_unmapped or r.mate_is_unmapped:
            continue
        if r.is_secondary or r.is_supplementary:
            continue

        span = abs(r.template_length)
        if span == 0:
            continue
        if span < 20 or span > 1000:
            continue

        frag_start = min(r.reference_start, r.next_reference_start)
        frag_starts.append(frag_start)
        frag_spans.append(span)

frag_starts = np.array(frag_starts, dtype=np.int32)
frag_spans  = np.array(frag_spans, dtype=np.int32)

print(f"Fragments used: {len(frag_spans):,}")

# Save
with open("../tables/CON_C_fragments.pkl", "wb") as f:
    pickle.dump(
        {
            "frag_starts": frag_starts,
            "frag_spans": frag_spans,
            "ref_len": ref_len,
            "ref": ref,
        },
        f,
        protocol=pickle.HIGHEST_PROTOCOL,
    )

print("Saved to CON_C_fragments.pkl")

Fragments used: 464,039
Saved to CON_C_fragments.pkl


In [2]:
import pickle

with open("../tables/CON_C_fragments.pkl", "rb") as f:
    data = pickle.load(f)

frag_starts = data["frag_starts"]
frag_spans  = data["frag_spans"]
ref_len     = data["ref_len"]
ref         = data["ref"]

print(f"Loaded {len(frag_spans):,} fragments from pickle")

Loaded 464,039 fragments from pickle
