In [None]:
# %load ../snippets/basic_settings.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path
import seaborn as sns
import sys
import plotly.express as px
import yaml

sns.set_context("notebook", font_scale=1.1)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)
plt.rcParams["figure.figsize"] = (16, 12)
plt.rcParams['savefig.dpi'] = 200
plt.rcParams['figure.autolayout'] = False
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['text.usetex'] = False  # True activates latex output in fonts!
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"
pd.set_option('display.float_format', lambda x: '{:,.2f}'.format(x))

In [None]:
import subprocess
import shlex

In [None]:
config_file = "../nguyenb_config.yaml"
with open(config_file) as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    configs = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
# Run locally:
root = Path(configs['root']['local'])
scratchDir = configs['scratchDir']['local']

In [None]:
# Run on server:
#root = Path(configs['root']['server'])
#scratchDir = configs['scratchDir']['server']

# Set up data files

In [None]:
mapDir = root/configs['mapDir']
countDir = root/configs['countDir']
resultDir = root/configs['resultDir']
sampleData = pd.read_table(root/configs['sampleDataOriginal'], header=None)
sampleData.columns = ['demux_code', 'library', 'experiment', 'mouse', 'day', 'tissue', 'dnaid', 'sampleID']
sampleData = sampleData[['sampleID', 'mouse', 'day', 'tissue', 'experiment', 'dnaid', 'library', 'demux_code']]

In [None]:
sampleData.to_csv(root/"14-04-22-sample-data.csv", index=False)

In [None]:
sampleData

# Merge counts by library

In [None]:
for library, df in sampleData.groupby('library'):
    print(library)
    files = [str(countDir/f"{f}_mbarq_counts.csv") for f in df.sampleID.values]
    files = ",".join(files)
    cmd = f"mbarq merge -i {files} -a Name -o {root/'counts_by_library'} -n {library}"
    subprocess.call(shlex.split(cmd))
    

# Merge all library maps

In [None]:
maps = [f for f in mapDir.iterdir() if 'annotated.csv' in f.name]

In [None]:
fmap = pd.concat([pd.read_csv(f).assign(library=f.stem.split('.')[0]) for f in maps])

In [None]:
fmap.to_csv(mapDir/"14-04-22-concatenated_map.csv", index=False)