# MRSA workflow notebook

In this notebook we will run a Snakemake workflow and explore its results.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%config InlineBackend.figure_format = 'svg'
%matplotlib inline

In [None]:
def read_cov_tables(files):
    """Reads coverage tables for bedtools genomecov output"""
    import os
    df = pd.DataFrame()
    samples = []
    for i, f in enumerate(files):
        sample = os.path.basename(f).rstrip(".sorted.bam.cov.gz")
        _df = pd.read_csv(f, sep="\t", header=None, index_col=0, usecols=[1,2], names=["position",sample])
        if i==0:
            df = _df.copy()
        else:
            df = pd.merge(df, _df, left_index=True, right_index=True, how="outer")
    return df

def sliding_window(df, window=10000):
    """Sums aligned reads in a sliding window"""
    start, end = df.index[0], df.index[-1]
    prev = start
    windows = {}
    for x in list(range(start, end, window)):
        if prev == x:
            continue
        windows[x] = df.loc[prev:x].sum().to_dict()
        prev = x
    # Add final window
    if not end < list(range(start, end, window))[-1]:
        windows[end] = df.loc[prev:end].sum().to_dict()
    return pd.DataFrame(windows).T