# Short read barcode extraction and analysis

In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import seaborn as sns
import os

sns.set_style('darkgrid')
pioneer_colors = ['#FF8633', '#423759', '#314942', '#FFA632', '#F7F3ED']
sns.set_palette(sns.color_palette(pioneer_colors))
pio.templates.default = 'seaborn'
pio.templates['seaborn'].layout.colorway = px.colors.qualitative.Bold


## Setup and load results

### Load results

In [6]:
# Has summary stats for each sample.  If you ran the barcode correction this will have unique barcodes per sample.
# Without correction you can load the number of unique barcodes from the "all_uniq_barcodes" file.
# Meta can be added, joining on the 'Sample' column.
stats = pd.read_csv('all_stats.csv')

# using the filtered version here (counts > 5)
counts = pd.read_csv('all_barcodes_freq_5.csv')

### Summary  

In [None]:
stats.style.format(precision = 2, thousands = ",")

In [14]:
total_columns = ['Total reads', 'Barcodes extracted', 'Barcodes passed size filters']
barcode_columns = ['Unique Barcodes', 'Num True Barcodes', 'Barcodes Corrected']

In [None]:
px.bar(
       stats.melt(id_vars='Sample').query('variable in @total_columns'),
       x = 'Sample', y='value', color = 'variable',
       barmode = 'group',
       labels={'value': 'Count', 'variable':''}
)

In [None]:
px.bar(
       stats.melt(id_vars='Sample').query('variable in @barcode_columns'),
       x = 'Sample', y='value', color = 'variable',
       barmode = 'group',
       labels={'value': 'Count', 'variable':''}
)

## Barcode distribution

In [None]:
ncols = int(np.ceil(len(counts['sample'].unique()) / 2))
px.histogram(counts, x = 'n', facet_col = 'sample', facet_col_wrap = ncols, log_y = True)