In [None]:
import pandas as pd
import pod5 as p5

tsv_file = "a120_2mod_polyA_position.tsv"
pod_file = "egfp_a120_2mod_polya_reads.pod5"

polyA_df = pd.read_csv(tsv_file, sep='\t')

with p5.Reader(pod_file) as reader:
    pod_data = [(str(read.read_id), read.signal) for read in reader.reads()]

pod_df = pd.DataFrame(pod_data, columns=["read_id","signal"])
df = polyA_df.merge(pod_df, on="read_id")

#df.to_csv("a60_30.tsv", sep='\t', index=False)

In [None]:
print(df.shape)
print(df.head())

print(df.shape[0] / 15) # antal sider, hvis der er 15 plots på hver side

(69373, 4)
                                read_id  start   end  \
0  0155d34b-a8b7-45c2-bc2b-31e2804401b1   2646  3942   
1  017d4f44-e7bc-41db-8b0d-adf556515b92   2418  5402   
2  040ee389-76f4-4836-938a-36152049fe48   2617  4456   
3  0506befe-be43-4083-8a7c-f19335a7d11e   2852  4092   
4  082d2704-d59d-4870-b7cf-35bd08dc5ee4   3099  4552   

                                              signal  
0  [265, 267, 284, 281, 282, 273, 259, 261, 272, ...  
1  [360, 329, 332, 327, 309, 322, 324, 315, 321, ...  
2  [334, 300, 307, 287, 306, 315, 305, 304, 306, ...  
3  [625, 289, 279, 304, 289, 304, 294, 290, 292, ...  
4  [603, 314, 310, 324, 314, 278, 282, 276, 275, ...  
4624.866666666667


In [40]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

def plot_polyA_signals_to_pdf(test_data, output_filename, max_plots):
    num_plots = min(len(test_data), max_plots)
    rows, cols = 5, 3  # 5 rows, 3 columns per page
    pages = (num_plots + (rows * cols) - 1) // (rows * cols)  # Compute total pages

    global_max_length = 0

    with PdfPages(output_filename) as pdf:
        for page in range(pages):
            fig, axes = plt.subplots(rows, cols, figsize=(15, 9))
            fig.suptitle(f'PolyA Signals (Page {page+1})', fontsize=16)
            axes = axes.flatten()  # Flatten to 1D array for easier iteration
            
            for i in range(rows * cols):
                idx = page * (rows * cols) + i
                if idx >= num_plots:
                    axes[i].axis('off')  # Hide unused subplots
                    continue

                signal_slice = test_data['signal'][idx][test_data['start'][idx]:test_data['end'][idx]]

                # Compute local values
                max_length_sliced = len(signal_slice)

                # Update global max/min values
                global_max_length = max(global_max_length, max_length_sliced)

                # Plot the signal
                axes[i].plot(signal_slice)
                axes[i].set_title(f'Row {idx+1}')
                axes[i].set_xlim(-100, 11508 + 100)  # Fixed x-axis range
                axes[i].set_ylim(-10, 1000)  # Fixed y-axis range

            plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust layout to fit title
            pdf.savefig(fig)  # Save the figure to the PDF
            plt.close(fig)  # Free memory

    # Print final global max/min values
    print(f"PDF saved as '{output_filename}'")
    print(f"Max signal length: {global_max_length}")

# Usage: (data, pdf_name, number_of_plots)
plot_polyA_signals_to_pdf(df, "raw_polyA_signals_a120_2mod.pdf", 480)

PDF saved as 'raw_polyA_signals_a120_2mod.pdf'
Max signal length: 11508
