In [1]:
from snakemake.io import *
import snakemake
import pandas as pd

In [2]:
samples = (
    pd.read_csv("samples.tsv", sep="\t", dtype={"sample_name": str})
    .set_index("sample_name", drop=False)
    .sort_index()
)

In [3]:
samples

Unnamed: 0_level_0,sample_name,treatment_1,treatment_2,jointly_handled
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,A,untreated,untreated,1
B,B,untreated,treated,1
C,C,treated,untreated,1
D,D,treated,untreated,2
E,E,treated,treated,2


In [4]:
units = (
    pd.read_csv("units.tsv", sep="\t", dtype={"sample_name": str, "unit_name": str})
    .set_index(["sample_name", "unit_name"], drop=False)
    .sort_index()
)

In [5]:
units

Unnamed: 0_level_0,Unnamed: 1_level_0,sample_name,unit_name,fq1,fq2,sra,adapters,strandedness
sample_name,unit_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,lane1,A,lane1,A.1.fq.gz,A.2.fq.gz,,,
A,lane2,A,lane2,A2.1.fq.gz,A2.2.fq.gz,,,
B,lane1,B,lane1,B.1.fq.gz,B.2.fq.gz,,,
C,lane1,C,lane1,C.1.fq.gz,C.2.fq.gz,,,
D,lane1,D,lane1,D.1.fq.gz,D.2.fq.gz,,,
E,lane1,E,lane1,E.1.fq.gz,E.2.fq.gz,,,


In [9]:
samples = expand("{sample}_{unit}", sample = ["A", "B"], unit = ["L001", "L002"])

In [10]:
samples

['A_L001', 'A_L002', 'B_L001', 'B_L002']

In [13]:
wildcards = Wildcards

In [45]:
wildcards.sample = "A"
wildcards.unit = "lane3"

In [46]:
units.loc[wildcards.sample].loc[wildcards.unit]

KeyError: 'lane3'

In [33]:

def get_cutadapt_input(wildcards):
    unit = units.loc[wildcards.sample].loc[wildcards.unit]

    if pd.isna(unit["fq1"]):
        # SRA sample (always paired-end for now)
        accession = unit["sra"]
        return expand("sra/{accession}_{read}.fastq", accession=accession, read=[1, 2])

    if unit["fq1"].endswith("gz"):
        ending = ".gz"
    else:
        ending = ""

    if pd.isna(unit["fq2"]):
        # single end local sample
        return "pipe/cutadapt/{S}/{U}.fq1.fastq{E}".format(
            S=unit.sample_name, U=unit.unit_name, E=ending
        )
    else:
        # paired end local sample
        return expand(
            "pipe/cutadapt/{S}/{U}.{{read}}.fastq{E}".format(
                S=unit.sample_name, U=unit.unit_name, E=ending
            ),
            read=["fq1", "fq2"],
        )


In [34]:
get_cutadapt_input(wildcards)

['pipe/cutadapt/A/lane1.fq1.fastq.gz', 'pipe/cutadapt/A/lane1.fq2.fastq.gz']

In [39]:

def get_fq(wildcards):

    # no trimming, use raw reads
    u = units.loc[(wildcards.sample, wildcards.unit)]
    if pd.isna(u["fq1"]):
        # SRA sample (always paired-end for now)
        accession = u["sra"]
        return dict(
            zip(
                ["fq1", "fq2"],
                expand(
                    "sra/{accession}_{group}.fastq",
                    accession=accession,
                    group=["R1", "R2"],
                ),
            )
        )

    return {"fq1": f"{u.fq1}", "fq2": f"{u.fq2}"}

In [40]:
get_fq(wildcards)

{'fq1': 'A.1.fq.gz', 'fq2': 'A.2.fq.gz'}