# Introduction

We want to see how the pseudo mappers work on our C1 data, there's enough runs that it'd be annoying to generate by hand.

In [1]:
import pandas
from pathlib import Path
import os
import sys
from glob import glob
import re

In [2]:
LRSC = str(Path('~/proj/long-rna-seq-condor').expanduser())
if LRSC not in sys.path:
    sys.path.append(LRSC)
from woldrnaseq.models import load_experiments, load_library_tables
from woldrnaseq.common import find_fastqs

In [3]:
if not os.listdir('/woldlab/castor/home/sau'):
    raise('Runtime makes sure you have Seans home directory mounted in singularity -B <src>:<dest>')

In [4]:
c1_remap_root = Path('~/proj/C1_mouse_limb_combined/all_analysis_M21')

# Calculate kallisto fragment and sd parameters

From C1_Fluidigm_BioAnalyzer_avgcDNA_and_CV_October14_2020.xlsx

In [5]:
sampled = pandas.DataFrame([[3,770,78.9],
[3,1164,59.6],
[3,1214,59.5],
[3,1132,62.4],
[3,1144,63.3],
[4,2099,75.9],
[4,1819,81],
[4,1700,92.1],
[4,1835,82.7],
[4,1699,100],
[4,1883,95.6],
[4,1955,84.6],
[4,1723,93.8],
[4,1900,84.7],
[4,1919,81.7],
[5,1947,81.3],
[5,2052,81.2],
[5,1812,81.7],
[5,1622,88.6],
[5,1927,81.6],
[5,1846,85.4],
[5,1806,85.3],
[5,1911,84.2],
[6,1856,79.1],
[6,1770,76.7],
[6,1558,85.7],
[6,1115,91.5],
[6,1853,78.3],
[6,1838,79.9],
[6,1794,79.2],
[6,1767,80.6],
[6,1782,77.6],
[6,1791,77.9],
[7,1870,71.6],
[7,1860,74.4],
[7,1900,71.2],
[7,1883,72.3],
[7,1764,76.6],
[7,1885,74.3],
[7,1641,82.2],
[7,1800,74.6],
[7,1882,74.1],
[7,1480,87.5],
[8,1270,80.4],
[8,1808,71.9],
[8,1867,70.5],
[8,1864,68.3],
[8,1902,68.2],
[8,1822,76.7],
[8,1375,83.7],
[8,1932,70.8],
[8,1954,71.3],
[9,1486,69.3],
[9,1475,71.5],
[9,1555,69.8],
[9,1622,66.3],
[9,1496,54.3],
[9,1495,65.5],
[9,1298,78.1],
[9,916,82],
[9,1360,79.2],
[9,1573,78.3],
[10,1814,60.9],
[10,1736,62.8],
[10,1776,63.9],
[10,1798,62.8],
[10,1774,62],
[10,1795,64.8],
[10,1710,65.7],
[10,1671,59],
[11,1658,62.1],
[11,1777,64.1],
[11,1726,64.3],
[11,1798,62],
[11,1711,61.2],
[11,1761,61.2],
[11,1696,58.3],
[11,1863,62.5],
[12,1804,61.6],
[12,1720,65.6],
[12,1657,64.9],
[12,1752,63.6],
[12,1788,62.9],
[12,1549,63.9],
[12,1800,61.8],
[12,1736,62.4],
[12,1441,64.7],
[13,1729,63.5],
[13,1796,61.4],
[13,1758,63.5],
[13,1783,64.7],
[13,1660,63.5],
[13,1841,65.8],
[14,1760,63.6],
[14,1762,64.5],
[14,1912,64.6],
[14,1832,62.1],
[14,1792,65.4],
[14,1755,62.4],
[14,1820,62.4],
[14,1819,66.9],
[14,1280,70.7],
[14,1686,65.9],
[15,1261,73.6],
[15,1768,67.1],
[15,1921,63.9],
[15,1977,64],
[15,1457,85.3],
[15,1812,67.6],
[15,845,50.4],
[15,931,69.5],
[16,1760,66.7],
[16,1785,66.5],
[16,1907,65],
[16,1885,67.8],
[16,1904,62.6],
[17,1753,64.9],
[17,1705,67.3],
[17,1620,67.8],
[17,1941,67.3],
[17,1848,68.9],
[17,1581,68.7],
[17,1914,67.2],], columns=['Run','avg_cDNA','CV%'])
sampled['sd'] = sampled['CV%']/100 * sampled['avg_cDNA']
sampled.head()

Unnamed: 0,Run,avg_cDNA,CV%,sd
0,3,770,78.9,607.53
1,3,1164,59.6,693.744
2,3,1214,59.5,722.33
3,3,1132,62.4,706.368
4,3,1144,63.3,724.152


In [6]:
sampled_mean = sampled.groupby('Run').mean()
sampled_mean.head()

Unnamed: 0_level_0,avg_cDNA,CV%,sd
Run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,1084.8,64.74,690.8248
4,1853.2,87.21,1609.6151
5,1865.375,83.6625,1558.140875
6,1712.4,80.65,1372.3649
7,1796.5,75.88,1357.0012


# Load all Libraries

In [7]:
libraries = load_library_tables([c1_remap_root / 'libraries-passing.tsv'])
libraries.shape

(1037, 8)

In [8]:
experiments = load_experiments([c1_remap_root / 'experiments-by-run-passing.tsv'])
runs = []
for experiment, row in experiments.iterrows():
    match = re.search('run(?P<run>[\d]+)', experiment)
    runs.append(int(match.group('run')))

experiments['run'] = runs
experiments

Unnamed: 0_level_0,replicates,analysis_dir,run
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C1_mouse_e13.5_limb_mesenchyme_mm10_clean_run4,"[18251_A1, 18251_A10, 18251_A11, 18251_A12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,4
C1_mouse_e11.0_limb_mesenchyme_mm10_clean_run5,"[18258_A1, 18258_A10, 18258_A11, 18258_A12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,5
C1_mouse_e11.5_limb_mesenchyme_mm10_clean_run6,"[18263_A1, 18263_A10, 18263_A11, 18263_A12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,6
C1_mouse_e12.5_limb_mesenchyme_mm10_clean_run7,"[18270_A1, 18270_A10, 18270_A11, 18270_A12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,7
C1_mouse_e13.5_limb_mesenchyme_mm10_clean_run8,"[18311_A1, 18311_A10, 18311_A11, 18311_A12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,8
C1_mouse_e10.5_limb_mm10_clean_run1,"[18042_A1, 18042_A10, 18042_A11, 18042_A12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,1
C1_mouse_e10.5_limb_mm10_clean_run2,"[17327_A1, 17327_A10, 17327_A11, 17327_A12, 17...",~/proj/C1_mouse_limb_combined/all_analysis_M21,2
C1_mouse_e10.5_limb_mm10_clean_run3,"[18087_F1, 18087_F10, 18087_F11, 18087_F12, 18...",~/proj/C1_mouse_limb_combined/all_analysis_M21,3
C1_mouse_e14.0_forelimb_run13_December14_2017,"[20039_A1, 20039_A10, 20039_A11, 20039_A12, 20...",~/proj/C1_mouse_limb_combined/all_analysis_M21,13
C1_mouse_e12.5_forelimb_run9_March8_2017_rearray_March152018,"[20090_C1, 20090_C10, 20090_C11, 20090_C12, 20...",~/proj/C1_mouse_limb_combined/all_analysis_M21,9


In [9]:
libraries_with_good_spikes = []
library_runs = {}
experiments_to_skip = {'C1_mouse_e10.5_limb_mm10_clean_run1', 'C1_mouse_e10.5_limb_mm10_clean_run2'}
for experiment_name, row in experiments.iterrows():
    if experiment_name not in experiments_to_skip:
        libraries_with_good_spikes.extend(row.replicates)
        for library_id in row.replicates:
            library_runs[library_id] = row.run

In [10]:
libraries_to_run = libraries.loc[libraries_with_good_spikes]

In [11]:
library_runs = pandas.Series(library_runs, name='run')

In [12]:
libraries_with_cdna = libraries_to_run.merge(library_runs, left_index=True, right_index=True).merge(sampled_mean, left_on='run', right_index=True)
libraries_with_cdna.head()

Unnamed: 0,analysis_dir,genome,annotation,sex,read_1,reference_prefix,analysis_name,stranded,run,avg_cDNA,CV%,sd
18251_A1,~/proj/C1_mouse_limb_combined/all_analysis_M21...,mm10,M21,male,[/woldlab/castor/home/sau/flowcells/C1_mouse_l...,-,18251_A1,unstranded,4,1853.2,87.21,1609.6151
18251_A10,~/proj/C1_mouse_limb_combined/all_analysis_M21...,mm10,M21,male,[/woldlab/castor/home/sau/flowcells/C1_mouse_l...,-,18251_A10,unstranded,4,1853.2,87.21,1609.6151
18251_A11,~/proj/C1_mouse_limb_combined/all_analysis_M21...,mm10,M21,male,[/woldlab/castor/home/sau/flowcells/C1_mouse_l...,-,18251_A11,unstranded,4,1853.2,87.21,1609.6151
18251_A12,~/proj/C1_mouse_limb_combined/all_analysis_M21...,mm10,M21,male,[/woldlab/castor/home/sau/flowcells/C1_mouse_l...,-,18251_A12,unstranded,4,1853.2,87.21,1609.6151
18251_A2,~/proj/C1_mouse_limb_combined/all_analysis_M21...,mm10,M21,male,[/woldlab/castor/home/sau/flowcells/C1_mouse_l...,-,18251_A2,unstranded,4,1853.2,87.21,1609.6151


In [13]:
normalized_fastqs = []
for library_id, row in libraries_with_cdna.iterrows():
    normalized_row = []
    for fastq in row.read_1:
        normalized_row.append(fastq.replace('../all_analysis_vdir', str(c1_remap_root.expanduser() / '..' / 'all_analysis_vdir')))
    normalized_fastqs.append(normalized_row)
libraries_with_cdna['read_1'] = normalized_fastqs

Make sure we can find all the fastqs. There should be no warnings for this call

In [14]:
fastqs = list(find_fastqs(libraries_with_cdna, 'read_1'))

In [15]:
c1_pseudo = Path('c1_pseudo')

In [16]:
c1_kallisto = c1_pseudo / 'kallisto'
c1_salmon = c1_pseudo / 'salmon'
c1_salmon_decoy = c1_pseudo / 'salmon_decoy'
c1_kallisto_minimal = c1_pseudo / 'kallisto_minimal'
c1_salmon_minimal = c1_pseudo / 'salmon_minimal'
c1_kallisto_minimal_f50  = c1_pseudo / 'kallisto_minimal_f50'
c1_kallisto_minimal_f100 = c1_pseudo / 'kallisto_minimal_f100'
c1_kallisto_minimal_f200 = c1_pseudo / 'kallisto_minimal_f200'
c1_kallisto_minimal_f300 = c1_pseudo / 'kallisto_minimal_f300'
c1_kallisto_minimal_f400 = c1_pseudo / 'kallisto_minimal_f400'
c1_kallisto_minimal_f600 = c1_pseudo / 'kallisto_minimal_f600'
c1_kallisto_minimal_f800 = c1_pseudo / 'kallisto_minimal_f800'
c1_kallisto_minimal_long = c1_pseudo / 'kallisto_minimal_long'

In [17]:
analysis_dirs = [
    c1_kallisto, 
    c1_salmon,
    c1_salmon_decoy, 
    c1_kallisto_minimal, 
    c1_salmon_minimal, 
    c1_kallisto_minimal_f50,
    c1_kallisto_minimal_f100,
    c1_kallisto_minimal_f200,
    c1_kallisto_minimal_f300,
    c1_kallisto_minimal_f400,
    c1_kallisto_minimal_f600,
    c1_kallisto_minimal_f800,
    c1_kallisto_minimal_long,
]

In [18]:
for p in analysis_dirs:
    if not p.exists():
        p.mkdir()

In [19]:
for result_dir in analysis_dirs:
    log = result_dir / 'logs'
    if not log.exists():
        log.mkdir()

    for library_id in libraries_with_cdna.index:
        analysis_dir = result_dir / library_id
        if not analysis_dir.exists():
            analysis_dir.mkdir()

In [20]:
def kallisto_condor(libraries):
    assert 'avg_cDNA' in libraries.columns, "Didn't receive updated library table"
    header = """#!/usr/bin/condor_submit

universe=vanilla
log=logs/kallisto_c1_quant.log
output=logs/kallisto_c1_quant.$(process).out
error=logs/kallisto_c1_quant.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
ENCODE_GENOME=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21-male

request_cpus=$(THREADS)
executable=/woldlab/loxcyc/home/diane/proj/kallisto/build/src/kallisto 
"""
    gtf = Path('~/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21_minimal-male/mm10-M21_minimal_det-male.gtf').expanduser()
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        fragment = row['avg_cDNA']
        sd = row['sd']
        fastqs = ' '.join(read_1[library_id])
        # --genomebam --gtf {gtf}
        lines.append(f'arguments="quant -t $(THREADS) -i $(ENCODE_GENOME)/mm10-M21-male-kallisto-0.46.2.idx -o {library_id} --single --fragment-length={fragment} --sd={sd} {fastqs}"')
        lines.append('queue')
            
    return '\n'.join(lines)

In [21]:
with open(c1_kallisto / 'kallisto-quant.condor', 'wt') as outstream:
    outstream.write(kallisto_condor(libraries_with_cdna))

In [22]:
def salmon_condor(libraries):
    assert 'avg_cDNA' in libraries.columns, "Didn't receive updated library table"
    header = """#!/usr/bin/condor_submit

universe=vanilla
log=logs/salmon_c1_quant.log
output=logs/salmon_c1_quant.$(process).out
error=logs/salmon_c1_quant.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
PROJECT=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline
SINGULARITY_IMAGE=$(PROJECT)/salmon-container/salmon-unstable.simg 
ENCODE_GENOME=$(PROJECT)/genome/mm10-M21-male/

request_cpus=$(THREADS)
executable=/usr/bin/singularity
"""
    binds = "-B /woldlab/castor/home/sau/flowcells:/woldlab/castor/home/sau/flowcells"
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        fastqs = ' '.join(read_1[library_id])
        lines.append(f'arguments="run {binds} $(SINGULARITY_IMAGE) quant -p $(THREADS) -i $(ENCODE_GENOME)/salmon_index -o {library_id} --libType U -r {fastqs}"')
        lines.append('queue')
            
    return '\n'.join(lines)

In [23]:
with open(c1_salmon / 'salmon-quant.condor', 'wt') as outstream:
    outstream.write(salmon_condor(libraries_with_cdna))

In [24]:
def salmon_decoy_condor(libraries):
    assert 'avg_cDNA' in libraries.columns, "Didn't receive updated library table"
    header = """#!/usr/bin/condor_submit

universe=vanilla
log=logs/salmon_decoy_c1_quant.log
output=logs/salmon_decoy_c1_quant.$(process).out
error=logs/salmon_decoy_c1_quant.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
PROJECT=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline
SINGULARITY_IMAGE=$(PROJECT)/salmon-container/salmon-unstable.simg 
ENCODE_GENOME=$(PROJECT)/genome/mm10-M21-male/

request_cpus=$(THREADS)
executable=/usr/bin/singularity
"""
    binds = "-B /woldlab/castor/home/sau/flowcells:/woldlab/castor/home/sau/flowcells"
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        fastqs = ' '.join(read_1[library_id])
        lines.append(f'arguments="run {binds} $(SINGULARITY_IMAGE) quant -p $(THREADS) -i $(ENCODE_GENOME)/salmon_index_decoy -o {library_id} --libType U -r {fastqs}"')
        lines.append('queue')
            
    return '\n'.join(lines)

In [25]:
with open(c1_salmon_decoy / 'salmon-decoy-quant.condor', 'wt') as outstream:
    outstream.write(salmon_decoy_condor(libraries_with_cdna))

# Run with minimal annotation set

In [26]:
def kallisto_minimal_condor(libraries):
    assert 'avg_cDNA' in libraries.columns, "Didn't receive updated library table"
    header = """#!/usr/bin/condor_submit

universe=vanilla
log=logs/kallisto_c1_minimal_quant.log
output=logs/kallisto_c1_minimal_quant.$(process).out
error=logs/kallisto_c1_minimal_quant.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
ENCODE_GENOME=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21_minimal-male

request_cpus=$(THREADS)
executable=/woldlab/loxcyc/home/diane/proj/kallisto/build/src/kallisto 
"""
    gtf = Path('~/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21_minimal-male/mm10-M21_minimal_det-male.gtf').expanduser()
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        fragment = row['avg_cDNA']
        sd = row['sd']
        fastqs = ' '.join(read_1[library_id])
        # --genomebam --gtf {gtf} 
        lines.append(f'arguments="quant -t $(THREADS) -i $(ENCODE_GENOME)/mm10-M21_minimal-male-kallisto-0.46.2.idx -o {library_id} --single --fragment-length={fragment} --sd={sd} {fastqs}"')
        lines.append('queue')
            
    return '\n'.join(lines)

In [27]:
with open(c1_kallisto_minimal / 'kallisto-minimal-quant.condor', 'wt') as outstream:
    outstream.write(kallisto_minimal_condor(libraries_with_cdna))

In [28]:
def salmon_minimal_condor(libraries):
    assert 'avg_cDNA' in libraries.columns, "Didn't receive updated library table"
    header = """#!/usr/bin/condor_submit

universe=vanilla
log=logs/salmon_c1_minimal_quant.log
output=logs/salmon_c1_minimal_quant.$(process).out
error=logs/salmon_c1_minimal_quant.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
PROJECT=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline
SINGULARITY_IMAGE=$(PROJECT)/salmon-container/salmon-unstable.simg 
ENCODE_GENOME=$(PROJECT)/genome/mm10-M21_minimal-male/

request_cpus=$(THREADS)
executable=/usr/bin/singularity
"""
    binds = "-B /woldlab/castor/home/sau/flowcells:/woldlab/castor/home/sau/flowcells"
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        fastqs = ' '.join(read_1[library_id])
        lines.append(f'arguments="run {binds} $(SINGULARITY_IMAGE) quant -p $(THREADS) -i $(ENCODE_GENOME)/salmon_index -o {library_id} --libType U -r {fastqs}"')
        lines.append('queue')
            
    return '\n'.join(lines)

In [29]:
with open(c1_salmon_minimal / 'salmon-quant.condor', 'wt') as outstream:
    outstream.write(salmon_minimal_condor(libraries_with_cdna))

# Parameter sweep of fragment length for kallisto minimal 

In [30]:
# ever get that feeling you should've been less lazy and stopped copying and pasting?
# Then after you refactor everything do you think it was worth it?

def kallisto_minimal_fragment_condor(libraries, length):
    assert 'avg_cDNA' in libraries.columns, "Didn't receive updated library table"
    header = f"""#!/usr/bin/condor_submit

universe=vanilla
log=logs/kallisto_c1_quant_f{length}.log
output=logs/kallisto_c1_quant_f{length}.$(process).out
error=logs/kallisto_c1_quant_f{length}.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
ENCODE_GENOME=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21-male

request_cpus=$(THREADS)
executable=/woldlab/loxcyc/home/diane/proj/kallisto/build/src/kallisto 
"""
    gtf = Path('~/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21_minimal-male/mm10-M21_minimal_det-male.gtf').expanduser()
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        sd = length * .2
        fastqs = ' '.join(read_1[library_id])
        # No genome bams now. --genomebam --gtf {gtf}
        lines.append(f'arguments="quant -t $(THREADS) -i $(ENCODE_GENOME)/mm10-M21-male-kallisto-0.46.2.idx -o {library_id} --single --fragment-length={length} --sd={sd} {fastqs}"')
        lines.append('queue')
        
    return "\n".join(lines)

def write_kallisto_minimal_fragment_condor(target_dir, libraries, length):
    with open(target_dir / f'kallisto-minimal-f{length}-quant.condor', 'wt') as outstream:
        outstream.write(kallisto_minimal_fragment_condor(libraries, length))
    

In [31]:
length_sweep = [
    (c1_kallisto_minimal_f50, 50),
    (c1_kallisto_minimal_f100, 100),
    (c1_kallisto_minimal_f200, 200),
    (c1_kallisto_minimal_f300, 300),
    (c1_kallisto_minimal_f400, 400),
    (c1_kallisto_minimal_f600, 600),
    (c1_kallisto_minimal_f800, 800),
]

for target_dir, length in length_sweep:
    write_kallisto_minimal_fragment_condor(target_dir, libraries_with_cdna, length)

# Prepare kallisto long

In [32]:
def kallisto_minimal_long_condor(libraries):
    header = """#!/usr/bin/condor_submit

universe=vanilla
log=logs/kallisto_c1_quant_long.log
output=logs/kallisto_c1_quant_long.$(process).out
error=logs/kallisto_c1_quant_long.$(process).out

THREADS=16
HOME=/woldlab/loxcyc/home/diane
ENCODE_GENOME=$(HOME)/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21-male

request_cpus=$(THREADS)
executable=/woldlab/loxcyc/home/diane/proj/kallisto-long/build/src/kallisto 
"""
    gtf = Path('~/proj/encode-202006-jamboree-detrout-rna-sc-pipeline/genome/mm10-M21_minimal-male/mm10-M21_minimal_det-male.gtf').expanduser()
    read_1 = dict(find_fastqs(libraries, 'read_1'))
    lines = [header]
    for library_id, row in libraries.iterrows():
        fastqs = ' '.join(read_1[library_id])
        # No genome bams now. --genomebam --gtf {gtf}
        lines.append(f'arguments="quant -t $(THREADS) -i $(ENCODE_GENOME)/mm10-M21-male-kallisto-0.46.2.idx -o {library_id} --long {fastqs}"')
        lines.append('queue')
        
    return "\n".join(lines)


In [33]:
with open(c1_kallisto_minimal_long / 'kallisto-minimal-long-quant.condor', 'wt') as outstream:
    outstream.write(kallisto_minimal_long_condor(libraries_with_cdna))


# Prepare standard ENCODE pipeline run with minimal index

In [34]:
libraries.columns

Index(['analysis_dir', 'genome', 'annotation', 'sex', 'read_1',
       'reference_prefix', 'analysis_name', 'stranded'],
      dtype='object')

In [35]:
libraries_minimal = libraries.copy().drop('analysis_name', axis=1)
libraries_minimal['analysis_dir'] = libraries.index
libraries_minimal['read_1'] = libraries_minimal['read_1'].apply(lambda x: ','.join(x))
libraries_minimal['annotation'] = 'M21_minimal'

In [36]:
libraries_minimal.to_csv(c1_pseudo / 'rsem_minimal' / 'libraries.tsv', sep='\t')

In [37]:
experiments.to_csv(c1_pseudo / 'rsem_minimal' / 'experiments.tsv', sep='\t')