# Supplementary Figure 4

---

<br>

## Import libraries

In [1]:
import pandas as pd
import numpy as np

import pysam

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

## Settings for saving figures as pdf

In [2]:
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

## Set environmental constants

In [3]:
# Set path where files are stored
path = '/Volumes/elegans/rna_sequencing'

---

<br>

# Functions

In [4]:
# Function for averaging values from a nested list
# Allows to get the mean base quality at any given position

def avgNestedLists(nested_vals):
    
    output = []
    maximum = 0
    for lst in nested_vals:
        if len(lst) > maximum:
            maximum = len(lst)
    
    for index in range(maximum): # Go through each index of longest list
        temp = []
        for lst in nested_vals: # Go through each list
            if index < len(lst): # If not an index error
                temp.append(lst[index])
        output.append(np.nanmean(temp))
    return output

In [5]:
# Function for: 
# 1) extracting base quality value 
# 2) plotting average base quality value per position (relative to alignment start)

def extract_base_quality(alignments):
    
    total = 0
    aligned = []
    unaligned = []
    
    # loop over alignments
    for read in alignments:
        
        # only look primary alignments
        if not read.is_unmapped and not read.is_secondary and not read.is_supplementary and read.seq is not None:
            
            quali = read.query_qualities
            
            # filter for antisense reads
            if quali is not None and read.is_reverse:

                start = read.query_alignment_start
                end = read.query_alignment_end

                # filter for long soft-clips
                if start > 80:

                    # aligned region
                    aligned_qual = list(quali[start:end])
                    aligned.append(aligned_qual)

                    # unaligned (= softclip) region
                    sc_qual = list(quali[:start])
                    sc_qual.reverse()
                    unaligned.append(sc_qual)
                    
                    # count reads
                    total = total+1
                    

    # averaged values
    avgAligned = avgNestedLists(aligned)
    
    avgUnaligned = avgNestedLists(unaligned)
    avgUnaligned.reverse()

    # base quality on Y axis
    avg = avgUnaligned + avgAligned

    # X axis values (= base position relative to alignment start)
    xUnaligned = [i for i in range(-len(avgUnaligned), 1)]
    xAligned = [i for i in range(0, len(avgAligned))]
    
    # Y axis values (= average value)
    yUnaligned = avg[:len(avgUnaligned)+1]
    yAligned = avg[len(avgUnaligned):]
    
    return total, xUnaligned, xAligned, yUnaligned, yAligned


In [6]:
def plotting_base_quality(xUnaligned, yUnaligned, xAligned, yAligned, total, ID, output):

    %matplotlib widget

    sns.set_style('whitegrid', {'axes.edgecolor': '.15', 'axes.linewidth': '1.5', "xtick.major.size": 8, "ytick.major.size": 8, "xtick.minor.size": 5, "ytick.minor.size": 5})

    fig, ax = plt.subplots(figsize=(9.5,3), nrows=1, ncols=3, sharey=True)

    # unaligned region
    ax[0].plot(xUnaligned, yUnaligned, color='#658cbb', alpha=1, label='Unaligned region',linewidth=2)
    ax[0].set_xlim(-1000,-50)
    ax[0].tick_params(axis='y', which='both', left=True)
    ax[0].yaxis.set_minor_locator(MultipleLocator(1))
    ax[0].tick_params(axis='x', which='major', bottom=True)
    ax[0].set_ylabel('Base quality score (PHRED)', weight='bold', size=12)

    # transition region
    ax[1].plot(xUnaligned[-51:], yUnaligned[-51:], color='#658cbb', alpha=1, label='Unaligned region',linewidth=2)
    ax[1].plot(xAligned[:51], yAligned[:51], color='#856798', alpha=1, label='Aligned region', linewidth=2)
    ax[1].vlines(0, 0, 60, 'black', alpha=1, linewidth=1.5, linestyle='dotted')
    ax[1].set_xlim(-50, 50)
    ax[1].tick_params(axis='x', which='major', bottom=True)
    ax[1].set_xlabel('Base position relative to alignment start', weight='bold', size=12)

    # aligned region
    ax[2].plot(xAligned[51:], yAligned[51:], color='#856798', alpha=1, label='Aligned region',linewidth=2)
    ax[2].set_xlim(50, 1000)
    ax[2].tick_params(axis='x', which='major', bottom=True)

    # plot settings
    plt.ylim(0, 45)
    plt.subplots_adjust(left=0.1, bottom=None, right=0.98, top=None, wspace=0.01, hspace=None)
    plt.suptitle(f'{ID} (n = {total} reads)', y=0.95)
    fig.tight_layout()
    
    # save plot
    plt.savefig(output, dpi=800, bbox_inches='tight')
    
    plt.close()

<br>

# Generating plots

The following code will generate base quality plots for each experiment ID present in **runs** \
Plots are not directly shown below as it is very computationally heavy.

**SSP experiments:** ['SSP_1', 'SSP_2', 'SSP_3', 'SSP_4', 'SSP_5', 'SSP_6'] \
**SL1 experiment:** ['SL1_1'] \
**NP experiments:** ['NP_1', 'NP_2', 'NP_3', 'NP_4', 'NP_5']

In [None]:
runs = ['SSP_1','SSP_2', 'SSP_3', 'SSP_4', 'SSP_5', 'SSP_6', 'SL1_1','NP_1', 'NP_2', 'NP_3', 'NP_4', 'NP_5']

for ID in runs:
    
    # open corresponding file
    file = f'{path}/{ID}/{ID}-transcriptome_sorted.bam'
    alignment = pysam.AlignmentFile(file,'rb')
    
    # measure base quality across all reads
    total, xUnaligned, xAligned, yUnaligned, yAligned = extract_base_quality(alignment)
    
    # generate and save plots
    output = f'SupFig4-average_base_quality_({ID}).pdf'
    plotting_base_quality(xUnaligned, yUnaligned, xAligned, yAligned, total, ID, output)

<font size="4">**Sup. Fig. 4:**<br></font>
We measured the average base quality value over all the Nanopore reads obtained in each sequencing experiment. 5’ soft-clip region is represented in blue and aligned region (primary alignment) is represented in purple.