Skip to content
This repository has been archived by the owner on Aug 22, 2023. It is now read-only.

Commit

Permalink
Merge 7c77fcd into 8c5a864
Browse files Browse the repository at this point in the history
  • Loading branch information
barrystokman committed Mar 4, 2021
2 parents 8c5a864 + 7c77fcd commit 23fd83a
Show file tree
Hide file tree
Showing 21 changed files with 1,093 additions and 137 deletions.
14 changes: 8 additions & 6 deletions demux/cli/basemask.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ def create_basemask(sheet):
""" create the bcl2fastq basemask """
run_parameters_file = "runParameters.xml"
run_params_tree = parse_run_parameters(run_parameters_file)
read1_len = int(run_params_tree.findtext("Setup/IndexRead1"))
read2_len = int(run_params_tree.findtext("Setup/IndexRead2"))
read1 = int(run_params_tree.findtext("Setup/Read1"))
read2 = int(run_params_tree.findtext("Setup/Read2"))
indexread1 = int(run_params_tree.findtext("Setup/IndexRead1"))
indexread2 = int(run_params_tree.findtext("Setup/IndexRead2"))

lines = [line for line in sheet.lines_per_column("lane", lane)]

Expand All @@ -53,19 +55,19 @@ def create_basemask(sheet):
index2 = lines[0]["index2"] if "index2" in lines[0] else EMPTY_STRING

# index1 basemask
index1_n = "n" * (read1_len - len(index1))
index1_n = "n" * (indexread1 - len(index1))
basemask_index1 = "I" + str(len(index1)) + index1_n

# index2 basemask
if read2_len == 0:
if read2 == 0:
click.echo(f"Y151,{basemask_index1},Y151")
else:
index2_n = "n" * (read2_len - len(index2))
index2_n = "n" * (indexread2 - len(index2))
if len(index2) > 0:
basemask_index2 = "I" + str(len(index2)) + index2_n
else:
basemask_index2 = index2_n
click.echo(f"Y151,{basemask_index1},{basemask_index2},Y151")
click.echo(f"Y{read1},{basemask_index1},{basemask_index2},Y{read2}")

def create_novaseq_basemask():
""" create the bcl2fastq basemask for novaseq flowcells"""
Expand Down
30 changes: 8 additions & 22 deletions demux/cli/samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
HiSeq2500Samplesheet,
MiseqSamplesheet,
CreateNovaseqSamplesheet,
Create2500Samplesheet,
)

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -161,29 +162,14 @@ def get_project(project):

# ... fix some 2500 specifics
if application == "wes":
# this is how the data is keyed when it gets back from LIMS
lims_keys = [
"fcid",
"lane",
"sample_id",
"sample_ref",
"index",
"description",
"control",
"recipe",
"operator",
"project",
]
header = [HiSeq2500Samplesheet.header_map[head] for head in lims_keys]
demux_samplesheet = Create2500Samplesheet(
flowcell, index_length, raw_samplesheet
).construct_samplesheet()

if index_length:
raw_samplesheet = [
line
for line in raw_samplesheet
if len(line["index"].replace("-", "")) == int(index_length)
]
for line in raw_samplesheet:
line["description"] = line["sample_id"]
# add [section] header
click.echo("[Data]")
click.echo(demux_samplesheet)
return

# ... fix some X specifics
if application == "wgs":
Expand Down
19 changes: 19 additions & 0 deletions demux/constants/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
""" Constants for demultiplexing """

SPACE = " "
DASH = "-"
COMMA = ","

LIMS_KEYS = [
"fcid",
"lane",
"sample_id",
"sample_ref",
"index",
"index2",
"sample_name",
"control",
"recipe",
"operator",
"project",
]
1 change: 1 addition & 0 deletions demux/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
iseqSampleSheet,
)
from .novaseq_samplesheet import CreateNovaseqSamplesheet
from .hiseq2500_samplesheet import Create2500Samplesheet
55 changes: 55 additions & 0 deletions demux/utils/hiseq2500_samplesheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
""" Create a samplesheet for 2500 flowcells """

from ..constants.constants import DASH, COMMA, LIMS_KEYS
from .samplesheet import Samplesheet


class Create2500Samplesheet:
""" Create a raw sample sheet for 2500 flowcells """

def __init__(self, flowcell: str, index_length: int, raw_samplesheet: list):
self.flowcell = flowcell
self.index_length = index_length
self.raw_samplesheet = raw_samplesheet

@property
def header(self) -> list:
""" Create the sample sheet header """
return list(Samplesheet.header_map.values())

@staticmethod
def is_dual_index(index: str, delimiter=DASH) -> bool:
""" Determines if an index in the raw samplesheet is dual index or not """
return delimiter in index

def remove_unwanted_indexes(self, raw_samplesheet: list) -> list:
""" Remove indexes with length unequal to index_length"""
raw_samplesheet = [
line
for line in raw_samplesheet
if len(line["index"].replace("-", "")) == self.index_length
]

return raw_samplesheet

def split_dual_indexes(self, raw_samplesheet: list) -> list:
""" Splits dual indexes"""
for line in raw_samplesheet:
if self.is_dual_index(line["index"]):
index1, index2 = line["index"].split("-")
line["index"], line["index2"] = index1, index2
return raw_samplesheet

def construct_samplesheet(self, end="\n", delimiter=COMMA) -> str:
""" Construct the sample sheet """
demux_samplesheet = [delimiter.join(self.header)]
raw_samplesheet = self.raw_samplesheet
raw_samplesheet = self.remove_unwanted_indexes(raw_samplesheet)
raw_samplesheet = self.split_dual_indexes(raw_samplesheet)
for line in raw_samplesheet:
line["sample_name"] = line["project"]
demux_samplesheet.append(
delimiter.join([str(line[lims_key]) for lims_key in LIMS_KEYS])
)

return end.join(demux_samplesheet)
6 changes: 3 additions & 3 deletions demux/utils/samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,13 +594,13 @@ class HiSeq2500Samplesheet(Samplesheet):
"lane": "Lane",
"sample_id": "SampleID",
"sample_ref": "SampleRef",
"index": "Index",
"index": "index",
"index2": "index2",
"sample_name": "SampleName",
"control": "Control",
"recipe": "Recipe",
"operator": "Operator",
"description": "Description",
"project": "SampleProject",
"project": "Project",
}


Expand Down
104 changes: 104 additions & 0 deletions scripts/2500/NIPT/sendnipt.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/bin/bash
# script to send run results

set -ue

VERSION=5.4.2
echo "Version $VERSION"

##########
# PARAMS #
##########

NIPTRUNS=/home/proj/production/flowcells/2500/nipt/
NIPTOUT=/srv/nipt_analysis_output/
MAILTO=clinical-demux@scilifelab.se,nipt.karolinska@sll.se
MAILTO_RERUN=agne.lieden@ki.se,clinical-demux@scilifelab.se
MAILTO_ERR=clinical-demux@scilifelab.se
NIPTCONF=/home/proj/production/servers/config/hasta.scilifelab.se/.niptrc

if [[ -r $NIPTCONF ]]; then
. $NIPTCONF
else
echo "NIPT config not found!" | mail -s "NIPT config not found on $(hostname)" ${MAILTO_ERR}
fi

#############
# FUNCTIONS #
#############

failed() {
echo "Fail to send ${RUN}. Error on line nr: $(caller)" | mail -s "ERROR sending NIPT $(hostname):${RUN}" ${MAILTO_ERR}
}
trap failed ERR

#######
# RUN #
#######

for RUN in $(ls ${NIPTRUNS}); do
NOW=$(date +"%Y%m%d%H%M%S")
if [[ ${RUN} =~ 'TEST' ]]; then
echo [${NOW}] [${RUN}] TEST run, skipping ...
continue # skip test runs
fi
echo [${NOW}] [${RUN}] Checking ...
if [[ -e ${NIPTRUNS}/${RUN}/delivery.txt ]]; then
while read line; do echo [${NOW}] [${RUN}] Delivered on $line; done < ${NIPTRUNS}/${RUN}/delivery.txt
continue
fi

OUTDIR=$(find ${NIPTOUT} -name "${RUN}_*" -type d)
if [[ ! -d ${OUTDIR} ]]; then
echo [${NOW}] [${RUN}] Not finished yet ...
else
echo [${NOW}] [${RUN}] Mailing!

INVESTIGATOR_NAME=$(sed 's/^M/\n/g' ${NIPTRUNS}/${RUN}/SampleSheet.csv | grep 'Investigator Name' - | cut -d, -f2)
EXPERIMENT_NAME=$(sed 's/^M/\n/g' ${NIPTRUNS}/${RUN}/SampleSheet.csv | grep 'Experiment Name' - | cut -d, -f2)
INVESTIGATOR_NAME=${INVESTIGATOR_NAME%$EXPERIMENT_NAME}
INVESTIGATOR_NAME=${INVESTIGATOR_NAME%_} # remove possible ending _

RESULTS_FILE_NAME=$(basename ${NIPTOUT}/${RUN}_*/*_NIPT_RESULTS.csv)
RESULTS_FILE_NAME="${INVESTIGATOR_NAME}_NIPT_RESULTS.csv"

# gather following files in a dir
# tar them
# mail!

TMP_OUTDIR=`mktemp -d`

cp -R ${NIPTRUNS}/${RUN}/InterOp ${TMP_OUTDIR}
cp ${NIPTRUNS}/${RUN}/runParameters.xml ${TMP_OUTDIR}
cp ${NIPTRUNS}/${RUN}/SampleSheet.csv ${TMP_OUTDIR}
cp ${NIPTRUNS}/${RUN}/RunInfo.xml ${TMP_OUTDIR}
cp ${OUTDIR}/*_MISINDEXED_RESULTS.csv ${TMP_OUTDIR}
cp ${OUTDIR}/*_NIPT_RESULTS.csv ${TMP_OUTDIR}/${RESULTS_FILE_NAME}
cp ${OUTDIR}/REPORT.Complete.txt ${TMP_OUTDIR}

SUBJECT="${INVESTIGATOR_NAME}_${EXPERIMENT_NAME}"
RESULTS_FILE="results_${SUBJECT}.tgz"

cd ${TMP_OUTDIR}
tar -czf ${RESULTS_FILE} *
cd -

IFS=_ read -ra RUN_PARTS <<< "${RUN}"
unset IFS
DATE=${RUN_PARTS[0]}
if [[ $DATE > 161121 ]]; then
mail -s "Results ${SUBJECT}" -a ${TMP_OUTDIR}/${RESULTS_FILE} ${MAILTO} < ${NIPTOUT}/${RUN}_*/REPORT.Complete.txt
# FTP the results file
NOW=$(date +"%Y%m%d%H%M%S")
lftp sftp://$NIPTSFTP_USER:$NIPTSFTP_PASSWORD@$NIPTSFTP_HOST -e "cd SciLife_Till_StarLims; put ${TMP_OUTDIR}/${RESULTS_FILE_NAME}; bye"
else
mail -s "Results ${SUBJECT}" -a ${TMP_OUTDIR}/${RESULTS_FILE} ${MAILTO_RERUN} < ${NIPTOUT}/${RUN}_*/REPORT.Complete.txt
fi

# clean up
echo "rm -Rf ${TMP_OUTDIR}"
rm -Rf ${TMP_OUTDIR}

date +'%Y%m%d%H%M%S' > ${NIPTRUNS}/${RUN}/delivery.txt
fi
done
92 changes: 92 additions & 0 deletions scripts/2500/NIPT/syncnipt.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash
# script to rsync a run to the NIPT server

shopt -s nullglob
set -eu

VERSION=5.4.2
echo "Version $VERSION"

##########
# PARAMS #
##########

RUNBASE=/home/proj/production/flowcells/2500/runs/
NIPTBASE=/home/proj/production/flowcells/2500/nipt/
NIPTOUTPATH=/srv/nipt_runs/
MAILTO_ERR=clinical-demux@scilifelab.se

#############
# FUNCTIONS #
#############

failed() {
echo "Fail to sync ${RUN}" | mail -s "ERROR syncing NIPT $(hostname):${RUN}" ${MAILTO_ERR}
}
trap failed ERR

#######
# RUN #
#######

for RUN in ${RUNBASE}/*; do
RUN=$(basename ${RUN})
NOW=$(date +"%Y%m%d%H%M%S")
if [[ ! -e ${NIPTBASE}${RUN} ]]; then
# simple NIPT detection
if grep -qs Description,cfDNAHiSeqv1.0 ${RUNBASE}${RUN}/SampleSheet.csv; then
if [[ ! -e ${RUNBASE}${RUN}/SampleSheet.ori ]]; then
cp ${RUNBASE}${RUN}/SampleSheet.csv ${RUNBASE}${RUN}/SampleSheet.ori
fi

# transform SampleSheet from Mac/Windows to Unix
if grep -qs $'\r' ${RUNBASE}${RUN}/SampleSheet.csv; then
sed -i 's/
/\n/g' ${RUNBASE}${RUN}/SampleSheet.csv
fi
sed -i '/^$/d' ${RUNBASE}${RUN}/SampleSheet.csv

# validate
if ! demux sheet validate -a nipt ${RUNBASE}${RUN}/SampleSheet.csv; then
NOW=$(date +"%Y%m%d%H%M%S")
echo [${NOW}] ${RUN} has badly formatted SampleSheet!
cat ${RUNBASE}${RUN}/SampleSheet.csv | mail -s "NIPT ${RUN} has a badly formatted SampleSheet!" ${MAILTO_ERR}

continue
fi

# make SampleSheet NIPT ready
demux sheet massage ${RUNBASE}${RUN}/SampleSheet.csv > ${RUNBASE}${RUN}/SampleSheet.mas
mv ${RUNBASE}${RUN}/SampleSheet.mas ${RUNBASE}${RUN}/SampleSheet.csv
cp ${RUNBASE}${RUN}/SampleSheet.csv ${RUNBASE}${RUN}/Data/Intensities/BaseCalls/

# sync run to NIPT-TT server
if [ -f ${RUNBASE}${RUN}/RTAComplete.txt ]; then
echo [${NOW}] ${RUN} is finished, linking
cp -al ${RUNBASE}${RUN} ${NIPTBASE}
NOW=$(date +"%Y%m%d%H%M%S")
echo [${NOW}] ${RUN} linking is finished, starting sync
rsync -r --exclude RTAComplete.txt --exclude SampleSheet.csv --exclude Data/Intensities/BaseCalls/SampleSheet.csv ${NIPTBASE}${RUN} ${NIPTOUTPATH} && \
cp ${NIPTBASE}/${RUN}/SampleSheet.csv ${NIPTOUTPATH}/${RUN}/
cp ${NIPTBASE}/${RUN}/SampleSheet.csv ${NIPTOUTPATH}/${RUN}/Data/Intensities/BaseCalls/
cp ${NIPTBASE}/${RUN}/RTAComplete.txt ${NIPTOUTPATH}/${RUN}/

if [[ $? == 0 ]]; then
NOW=$(date +"%Y%m%d%H%M%S")
echo [${NOW}] ${RUN} has finished syncing
else
NOW=$(date +"%Y%m%d%H%M%S")
echo [${NOW}] ${RUN} has FAILED syncing
fi
else
echo [${NOW}] ${RUN} is not finished yet
fi
else
NOW=$(date +"%Y%m%d%H%M%S")
echo [$NOW] ${RUN} is not a NIPT run!
fi
else
NOW=$(date +"%Y%m%d%H%M%S")
echo [$NOW] ${RUN} has already synced
fi
done

0 comments on commit 23fd83a

Please sign in to comment.