In [13]:
import pandas as pd
import pod5 as p5

def load_and_merge_data(tsv_file, pod_file):
    """Læser tsv- og pod5-filer, og returnerer en flettet DataFrame."""
    polyA_df = pd.read_csv(tsv_file, sep='\t')

    with p5.Reader(pod_file) as reader:
        pod_data = [(str(read.read_id), read.signal) for read in reader.reads()]

    pod_df = pd.DataFrame(pod_data, columns=["read_id", "signal"])
    return polyA_df.merge(pod_df, on="read_id")

In [14]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

def plot_polyA_signals_to_pdf(df, output_filename, max_plots, max_length):
    """Plotte polyA-signaler og gemme dem i en PDF."""
    num_plots = min(len(df), max_plots)
    rows, cols = 5, 3
    pages = (num_plots + (rows * cols) - 1) // (rows * cols)

    global_max_length = 0

    with PdfPages(output_filename) as pdf:
        for page in range(pages):
            fig, axes = plt.subplots(rows, cols, figsize=(15, 9))
            fig.suptitle(f'PolyA Signals (Page {page+1})', fontsize=16)
            axes = axes.flatten()

            for i in range(rows * cols):
                idx = page * (rows * cols) + i
                if idx >= num_plots:
                    axes[i].axis('off')
                    continue

                signal_slice = df['signal'][idx][df['start'][idx]:df['end'][idx]]
                max_length_sliced = len(signal_slice)
                global_max_length = max(global_max_length, max_length_sliced)

                axes[i].plot(signal_slice)
                axes[i].set_title(f'Row {idx+1}')
                axes[i].set_xlim(-100, max_length + 100)
                axes[i].set_ylim(-10, 1000)

            plt.tight_layout(rect=[0, 0, 1, 0.96])
            pdf.savefig(fig)
            plt.close(fig)

    print(f"PDF saved as '{output_filename}'")
    print(f"Max signal length: {global_max_length}")

In [15]:
data_a120_2mod = load_and_merge_data("a120_2mod_polyA_position.tsv", "egfp_a120_2mod_polya_reads.pod5")
plot_polyA_signals_to_pdf(data_a120_2mod, "raw_polyA_signals_a120_2mod.pdf", 60, 11508)

PDF saved as 'raw_polyA_signals_a120_2mod.pdf'
Max signal length: 11508


In [16]:
data_a120_1mod = load_and_merge_data("a120_1mod_polyA_position.tsv", "egfp_a120_1mod_polya_reads.pod5")
plot_polyA_signals_to_pdf(data_a120_1mod, "raw_polyA_signals_a120_1mod.pdf", 60, 3824)

PDF saved as 'raw_polyA_signals_a120_1mod.pdf'
Max signal length: 3824
