# subsamplingReadsVsProteinCoding.ipynb
## Marcus Viscardi      June 21, 2022

### Abstract:
Goal is to produce a plot to assess sequencing saturation of the polyA and totalRNA libraries. The plot will have "total number of reads" on the X axis and "total number of protein coding reads" on the Y axis.
Initially the plot will only have 4 points for the four libraries, but from there I can subsample each library (fractionally?) to produce curves coming off each point and going towards the origin.

### Data Structure:
1. Table with all reads, with columns for:
    - Read_id because it would be good to be sure stuff isn't duplicate!
    - If read mapped to a protein coding gene
        - This won't have to have come from featureCounts, in this analysis we don't care about non-protein-coding things... eventually we will thou...
        - This could initially just be gene_id, which I can convert to protein_coding or not with a GTF file
2. From Table 1, subsample (over and over) to get counts of protein_coding reads and overall reads

The easy source for Table 1 is going to be the mergedOnReads dataframe (the version that didn't drop unmapped reads), the RIGHT source will probably be featureCounts output... I COULD try it with both, and compare? Honestly, probably the move.

In [4]:
import numpy as np
import pandas as pd
pd.set_option('display.width', 200)
pd.set_option('display.max_columns', None)

import scipy.stats as stats

import nanoporePipelineCommon as npCommon

from tqdm.notebook import tqdm
from pprint import pprint

import seaborn as sea
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
print("imports done!")

imports done!


In [22]:
libs_to_get = ["polyA2", "polyA3", "totalRNA2", "totalRNA3"]
reads_df_genes_raw, compressed_df_genes_raw = npCommon.load_and_merge_lib_parquets(libs_to_get, drop_sub_n=1, add_tail_groupings=False, drop_failed_polya=False, group_by_t5=False, drop_unassigned=False)
print("library load done.")


# lib_path_dict = npCommon.pick_libs_return_paths_dict(["polyA2", "polyA3", "totalRNA2", "totalRNA3"])
# lib_dict_raw = {lib: pd.read_parquet(path) for lib, path in lib_path_dict.items()}
# print("loading done!")

Loading readAssignments file from: /data16/marcus/genomes/elegansRelease100/Caenorhabditis_elegans.WBcel235.100.allChrs.parquet... Done.
Looking for files for libraries: ['polyA2', 'polyA3', 'totalRNA2', 'totalRNA3']
Looking for file for totalRNA2, at /data16/marcus/working/210720_nanoporeRun_totalRNA_0639_L3_replicate/output_dir/merge_files/*_mergedOnReads.parquet... File Found.
Looking for file for polyA2, at /data16/marcus/working/210719_nanoporeRun_polyA_0639_L3_replicate/output_dir/merge_files/*_mergedOnReads.parquet... File Found.
Looking for file for polyA3, at /data16/marcus/working/220131_nanoporeRun_polyA_0639_L3_third/output_dir/merge_files/*_mergedOnReads.parquet... File Found.
Looking for file for totalRNA3, at /data16/marcus/working/220131_nanoporeRun_totalRNA_0639_L3_third/output_dir/merge_files/*_mergedOnReads.parquet... File Found.
Loading parquet for totalRNA2 lib... Done.
'original_chr_pos' column already found in dataframe, skipping adjustment for 5'ends!
Loading pa

Counting reads per gene: 100%|██████████| 53363/53363 [00:00<00:00, 67057.32it/s] 


Gene counts pre sub-1 gene_hits drop:  53363
Gene counts post sub-1 gene_hits drop:  53363
library load done.


### First rough attempt:
Taking advantage of the idea that the joshMethod ONLY assigns reads to protein coding genes, so if it's assigned: it's protein coding!

In [24]:
reads_df = reads_df_genes_raw.copy()
group_df = reads_df.groupby(reads_df.lib)
lib_dict_raw = {}
for lib in libs_to_get:
    # lib_dict_raw[lib] = reads_df.loc[reads_df.lib == lib]
    lib_dict_raw[lib] = group_df.get_group(lib)

In [25]:
lib_dict = {lib: df.copy() for lib, df in lib_dict_raw.items()}
for lib, df in lib_dict.items():
    df['assigned_by_joshMethod'] = ~df.gene_name.isna()
    try:
        df['assigned_by_featureCounts'] = ~df.gene_name_fromFeatureCounts.isna()
    except AttributeError:
        print(lib, df.columns)
    df['protein_coding'] = df['assigned_by_joshMethod']

polyA2 Index(['lib', 'read_id', 'chr_id', 'chr_pos', 'original_chr_pos', 'gene_id', 'gene_name', 'cigar', 'sequence', 'polya_length', 'strand', 'read_length', 'assigned_by_joshMethod'], dtype='object')
polyA3 Index(['lib', 'read_id', 'chr_id', 'chr_pos', 'original_chr_pos', 'gene_id', 'gene_name', 'cigar', 'sequence', 'polya_length', 'strand', 'read_length', 'assigned_by_joshMethod'], dtype='object')
totalRNA2 Index(['lib', 'read_id', 'chr_id', 'chr_pos', 'original_chr_pos', 'gene_id', 'gene_name', 'cigar', 'sequence', 'polya_length', 'strand', 'read_length', 'assigned_by_joshMethod'], dtype='object')
totalRNA3 Index(['lib', 'read_id', 'chr_id', 'chr_pos', 'original_chr_pos', 'gene_id', 'gene_name', 'cigar', 'sequence', 'polya_length', 'strand', 'read_length', 'assigned_by_joshMethod'], dtype='object')


In [40]:
number_of_subsamples_per_step = 100
number_of_steps = 100

output_array_dict = {}

for lib, df in lib_dict.items():
    simple_df = df[["read_id", "protein_coding", "gene_id"]]

    total_reads_array = np.zeros([number_of_subsamples_per_step, number_of_steps], np.uint32)
    protein_coding_array = np.zeros([number_of_subsamples_per_step, number_of_steps], np.uint32)
    
    library_iterator = tqdm(range(100, 0, -(100 // number_of_steps)), desc=f"Subsampling for {lib}")
    for numerator in library_iterator:
        y_index = numerator // (100 // number_of_steps) - 1
        subsample_iterator = tqdm(range(0, number_of_subsamples_per_step), desc=f"Calculating subsamples for fractional size: {numerator/100:}", leave=False)
        for x_index in subsample_iterator:
            df = simple_df.sample(frac=(numerator/100))
            total_reads = df.shape[0]
            # protein_coding = df.protein_coding.sum()
            protein_coding = df.gene_id.nunique()
            total_reads_array[x_index, y_index] = total_reads
            protein_coding_array[x_index, y_index] = protein_coding
    output_array_dict[lib] = (total_reads_array, protein_coding_array)
pprint(output_array_dict)

Subsampling for polyA2:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 1.0:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.99:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.98:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.97:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.96:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.95:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.94:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.93:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.92:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.91:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.9:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.89:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.88:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.87:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.86:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.85:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.84:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.83:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.82:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.81:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.8:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.79:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.78:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.77:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.76:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.75:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.74:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.73:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.72:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.71:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.7:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.69:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.68:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.67:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.66:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.65:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.64:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.63:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.62:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.61:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.6:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.59:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.58:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.57:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.56:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.55:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.54:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.53:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.52:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.51:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.5:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.49:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.48:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.47:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.46:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.45:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.44:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.43:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.42:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.41:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.4:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.39:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.38:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.37:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.36:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.35:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.34:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.33:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.32:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.31:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.3:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.29:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.28:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.27:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.26:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.25:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.24:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.23:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.22:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.21:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.2:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.19:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.18:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.17:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.16:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.15:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.14:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.13:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.12:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.11:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.1:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.09:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.08:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.07:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.06:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.05:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.04:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.03:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.02:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.01:   0%|          | 0/100 [00:00<?, ?it/s]

Subsampling for polyA3:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 1.0:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.99:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.98:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.97:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.96:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.95:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.94:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.93:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.92:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.91:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.9:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.89:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.88:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.87:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.86:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.85:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.84:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.83:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.82:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.81:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.8:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.79:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.78:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.77:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.76:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.75:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.74:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.73:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.72:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.71:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.7:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.69:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.68:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.67:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.66:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.65:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.64:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.63:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.62:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.61:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.6:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.59:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.58:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.57:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.56:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.55:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.54:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.53:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.52:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.51:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.5:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.49:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.48:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.47:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.46:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.45:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.44:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.43:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.42:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.41:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.4:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.39:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.38:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.37:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.36:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.35:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.34:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.33:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.32:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.31:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.3:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.29:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.28:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.27:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.26:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.25:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.24:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.23:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.22:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.21:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.2:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.19:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.18:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.17:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.16:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.15:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.14:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.13:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.12:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.11:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.1:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.09:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.08:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.07:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.06:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.05:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.04:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.03:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.02:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.01:   0%|          | 0/100 [00:00<?, ?it/s]

Subsampling for totalRNA2:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 1.0:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.99:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.98:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.97:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.96:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.95:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.94:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.93:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.92:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.91:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.9:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.89:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.88:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.87:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.86:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.85:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.84:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.83:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.82:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.81:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.8:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.79:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.78:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.77:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.76:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.75:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.74:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.73:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.72:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.71:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.7:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.69:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.68:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.67:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.66:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.65:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.64:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.63:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.62:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.61:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.6:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.59:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.58:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.57:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.56:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.55:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.54:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.53:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.52:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.51:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.5:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.49:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.48:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.47:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.46:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.45:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.44:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.43:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.42:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.41:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.4:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.39:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.38:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.37:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.36:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.35:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.34:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.33:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.32:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.31:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.3:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.29:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.28:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.27:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.26:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.25:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.24:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.23:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.22:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.21:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.2:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.19:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.18:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.17:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.16:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.15:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.14:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.13:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.12:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.11:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.1:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.09:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.08:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.07:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.06:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.05:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.04:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.03:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.02:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.01:   0%|          | 0/100 [00:00<?, ?it/s]

Subsampling for totalRNA3:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 1.0:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.99:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.98:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.97:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.96:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.95:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.94:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.93:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.92:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.91:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.9:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.89:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.88:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.87:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.86:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.85:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.84:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.83:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.82:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.81:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.8:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.79:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.78:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.77:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.76:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.75:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.74:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.73:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.72:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.71:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.7:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.69:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.68:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.67:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.66:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.65:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.64:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.63:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.62:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.61:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.6:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.59:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.58:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.57:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.56:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.55:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.54:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.53:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.52:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.51:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.5:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.49:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.48:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.47:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.46:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.45:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.44:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.43:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.42:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.41:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.4:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.39:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.38:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.37:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.36:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.35:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.34:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.33:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.32:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.31:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.3:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.29:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.28:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.27:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.26:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.25:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.24:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.23:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.22:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.21:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.2:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.19:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.18:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.17:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.16:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.15:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.14:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.13:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.12:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.11:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.1:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.09:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.08:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.07:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.06:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.05:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.04:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.03:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.02:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating subsamples for fractional size: 0.01:   0%|          | 0/100 [00:00<?, ?it/s]

{'polyA2': (array([[  13380,   26760,   40140, ..., 1311244, 1324624, 1338004],
       [  13380,   26760,   40140, ..., 1311244, 1324624, 1338004],
       [  13380,   26760,   40140, ..., 1311244, 1324624, 1338004],
       ...,
       [  13380,   26760,   40140, ..., 1311244, 1324624, 1338004],
       [  13380,   26760,   40140, ..., 1311244, 1324624, 1338004],
       [  13380,   26760,   40140, ..., 1311244, 1324624, 1338004]],
      dtype=uint32),
            array([[ 2755,  4174,  4979, ..., 13653, 13669, 13678],
       [ 2646,  4032,  5048, ..., 13647, 13661, 13678],
       [ 2662,  4128,  5063, ..., 13649, 13666, 13678],
       ...,
       [ 2706,  4068,  5021, ..., 13644, 13666, 13678],
       [ 2674,  4112,  5045, ..., 13631, 13664, 13678],
       [ 2811,  4104,  5081, ..., 13643, 13665, 13678]], dtype=uint32)),
 'polyA3': (array([[  17482,   34964,   52447, ..., 1713260, 1730742, 1748224],
       [  17482,   34964,   52447, ..., 1713260, 1730742, 1748224],
       [  17482,   34

In [34]:
variability_function_dict = {"SEM": stats.sem,
                             "STD": np.std}
variability_function_to_use = "SEM"

plotting_dict = {}
for lib, (total_reads, protein_coding_reads) in output_array_dict.items():
    protein_coding_variability = variability_function_dict[variability_function_to_use](protein_coding_reads, axis=0)
    protein_coding_mean = np.mean(protein_coding_reads, axis=0)
    total_reads_counts = np.mean(total_reads, axis=0)  # This just collapses the total reads, since they're consistent
    plotting_dict[lib] = (total_reads_counts, protein_coding_mean, protein_coding_variability)

In [76]:
library_name_conversion = {
    "polyA3": "Selected-1",
    "polyA2": "Selected-2",
    "totalRNA3": "Unselected-1",
    "totalRNA2": "Unselected-2"}

plot_list = []
for index, (lib, (x, y, sem_or_std)) in enumerate(plotting_dict.items()):
    lib = library_name_conversion[lib]
    plot_list.append(
        go.Scatter(
            name=f"{lib}",
            x=x,
            y=y,
            mode="lines+markers",
            marker=dict(size=2, symbol="circle"),
            line=dict(color=px.colors.qualitative.G10[index])
        )
    )
    # plot_list.append(
    #     go.Scatter(
    #         name=f"{lib} Mean + {variability_function_to_use}",
    #         x=x,
    #         y=y + sem_or_std,
    #         mode="lines",
    #         marker=dict(color="#444"),
    #         line=dict(width=0),
    #         showlegend=False,
    #     )
    # )
    # plot_list.append(
    #     go.Scatter(
    #         name=f"{lib} Mean - {variability_function_to_use}",
    #         x=x,
    #         y=y - sem_or_std,
    #         mode="lines",
    #         marker=dict(color="#444"),
    #         line=dict(width=0),
    #         fillcolor='rgba(68, 68, 68, 0.3)',
    #         fill='tonexty',
    #         showlegend=False,
    #     )
    # )
    print(lib, x[-1], y[-1])
    plot_list.append(
        go.Scatter(
            name=f"{lib}",
            x=x[-1:],
            y=y[-1:],
            mode="markers",
            marker=dict(size=7, symbol="square", color=px.colors.qualitative.G10[index]),
            showlegend=False
        )
    )


fig = go.Figure(plot_list)

# fig.update_xaxes(range=[5000, 15000])

fig.update_layout(
    yaxis_title='Number of Unique<br>Genes Sequenced',
    xaxis_title='Total Number of Reads',
    title=f'<b>Read depth vs. Protein Coding</b><br>Subsamples per point: {number_of_subsamples_per_step}',
    hovermode="x",
    template="plotly_white",
    width=500, height=375,
    legend=dict(orientation='v',
                yanchor="bottom",
                y=0.02,
                xanchor="right",
                x=1
                ),
)
fig.write_image(f"/home/marcus/Insync/mviscard@ucsc.edu/Google Drive/insync_folder/polyAvsTotalRNA_ReviewEditsAndPlots/final_SVGs/{npCommon.get_dt(for_file=True)}_subsampling.svg")
fig.show()

Selected-2 1338004.0 13678.0
Selected-1 1748224.0 13812.0
Unselected-2 699091.0 12577.0
Unselected-1 1230670.0 13296.0


0 polyA2             lib                               read_id chr_id   chr_pos  original_chr_pos         gene_id  gene_name                                              cigar  \
1259949  polyA2  f1877eb9-abed-4b95-af5f-736668b84278      I     10172              4110  WBGene00022277     homt-1  5S6M1D27M1D7M1D12M1D38M1D35M2I25M1D30M1D18M1I1...   
1259950  polyA2  990236cc-9f2e-478c-acd6-6efa900d4023      I     10230              4115  WBGene00022277     homt-1  8S50M2D9M1D56M2D2M1I28M2D24M1I17M1I23M1I17M2D9...   
1259951  polyA2  89829750-b0f2-4ebf-b669-6d0be010703e      I     10227              4115  WBGene00022277     homt-1  4S13M2D14M1D4M3D14M1I10M1I58M1I10M1D36M3D6M2D4...   
1259952  polyA2  289f00a3-cdd4-4586-a7ac-d0e84e44f01a      I     10178              4115  WBGene00022277     homt-1  72S21M1D5M2D10M3D5M2D6M1I16M1I18M1D19M3D13M2I1...   
1259953  polyA2  d276a9e2-817f-400e-a908-8e3ddf2c4d18      I     10233              4118             NaN        NaN  67S34M1I6M2D5M3D15M2I12M