# DBS-Pro Analysis Report

## Dataprocessing
### Load data

In [None]:
%matplotlib inline
# Imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#Data import 
df_umi = pd.read_csv("umi_counts.tsv", sep="\t").set_index("BC")
df_reads = pd.read_csv("read_counts.tsv", sep="\t").set_index("BC")

#List names of targets
target_names = df_umi.columns.values

### Merge data

In [None]:
df_umi_melt = pd.melt(df_umi.reset_index(), var_name="Target", value_name="UMI", id_vars="BC")
df_reads_melt = pd.melt(df_reads.reset_index(), var_name="Target", value_name="Reads", id_vars="BC")
df = pd.merge(df_umi_melt,df_reads_melt, on=["BC","Target"])
df.head()

### Filter data

In [None]:
FILTER_THRESHOLD = 1

df_filt = df[df.Reads > FILTER_THRESHOLD]
df_filt.head()

## Results

### Box plots

In [None]:
ax = sns.boxplot(data=df, x="Target", y="UMI").set_title("UMI count (Unfiltered)")

In [None]:
ax = sns.boxplot(data=df_filt, x="Target", y="UMI").set_title("UMI count (Filtered)")

In [None]:
ax = sns.boxplot(data=df, x="Target", y="Reads").set_title("Read count (Unfiltered)")

In [None]:
ax = sns.boxplot(data=df_filt, x="Target", y="Reads").set_title("Read count (Filtered)")

### Paired plots

In [None]:
def pair_heatmap(df, title=None):
    # Based of https://stackoverflow.com/questions/43924280/pair-plot-with-heat-maps-possibly-logarithmic
    from matplotlib.colors import LogNorm
    sns.set(style="white")
    g = sns.PairGrid(df)
    g.map_diag(plt.hist, bins=20)
    
    # Set title
    plt.subplots_adjust(top=0.9)
    g.fig.suptitle(title)
    
    def pairgrid_heatmap(x, y, **kws):
        cmap = sns.light_palette(kws.pop("color"), as_cmap=True)
        plt.hist2d(x, y, cmap=cmap, cmin=1, **kws)

    g.map_offdiag(pairgrid_heatmap, bins=20, norm=LogNorm())

In [None]:
# Plot unfiltered data for UMIs
pair_heatmap(df_umi, title="UMI count (Unfiltered)")

In [None]:
# Plot filtered data for UMIs
df_filt_umi = df_filt.pivot(index="BC", columns="Target", values="UMI").fillna(0)
pair_heatmap(df_filt_umi, title="UMI count (Filtered)")

In [None]:
# Plot unfiltered data for reads
pair_heatmap(df_reads, title="Read count (Unfiltered)")

In [None]:
# Plot filtered data for reads
df_filt_reads = df_filt.pivot(index="BC", columns="Target", values="Reads").fillna(0)
pair_heatmap(df_filt_reads, title="Read count (Filtered)")