Merge 7c77fcd into 8c5a864

Clinical-Genomics · Mar 4, 2021 · 23fd83a · 23fd83a
2 parents 8c5a864 + 7c77fcd
commit 23fd83a
Show file tree

Hide file tree

Showing 21 changed files with 1,093 additions and 137 deletions.
diff --git a/demux/cli/basemask.py b/demux/cli/basemask.py
@@ -43,8 +43,10 @@ def create_basemask(sheet):
         """ create the bcl2fastq basemask """
         run_parameters_file = "runParameters.xml"
         run_params_tree = parse_run_parameters(run_parameters_file)
-        read1_len = int(run_params_tree.findtext("Setup/IndexRead1"))
-        read2_len = int(run_params_tree.findtext("Setup/IndexRead2"))
+        read1 = int(run_params_tree.findtext("Setup/Read1"))
+        read2 = int(run_params_tree.findtext("Setup/Read2"))
+        indexread1 = int(run_params_tree.findtext("Setup/IndexRead1"))
+        indexread2 = int(run_params_tree.findtext("Setup/IndexRead2"))
 
         lines = [line for line in sheet.lines_per_column("lane", lane)]
 
@@ -53,19 +55,19 @@ def create_basemask(sheet):
         index2 = lines[0]["index2"] if "index2" in lines[0] else EMPTY_STRING
 
         # index1 basemask
-        index1_n = "n" * (read1_len - len(index1))
+        index1_n = "n" * (indexread1 - len(index1))
         basemask_index1 = "I" + str(len(index1)) + index1_n
 
         # index2 basemask
-        if read2_len == 0:
+        if read2 == 0:
             click.echo(f"Y151,{basemask_index1},Y151")
         else:
-            index2_n = "n" * (read2_len - len(index2))
+            index2_n = "n" * (indexread2 - len(index2))
             if len(index2) > 0:
                 basemask_index2 = "I" + str(len(index2)) + index2_n
             else:
                 basemask_index2 = index2_n
-            click.echo(f"Y151,{basemask_index1},{basemask_index2},Y151")
+            click.echo(f"Y{read1},{basemask_index1},{basemask_index2},Y{read2}")
 
     def create_novaseq_basemask():
         """ create the bcl2fastq basemask for novaseq flowcells"""

diff --git a/demux/cli/samplesheet.py b/demux/cli/samplesheet.py
@@ -14,6 +14,7 @@
     HiSeq2500Samplesheet,
     MiseqSamplesheet,
     CreateNovaseqSamplesheet,
+    Create2500Samplesheet,
 )
 
 LOG = logging.getLogger(__name__)
@@ -161,29 +162,14 @@ def get_project(project):
 
     # ... fix some 2500 specifics
     if application == "wes":
-        # this is how the data is keyed when it gets back from LIMS
-        lims_keys = [
-            "fcid",
-            "lane",
-            "sample_id",
-            "sample_ref",
-            "index",
-            "description",
-            "control",
-            "recipe",
-            "operator",
-            "project",
-        ]
-        header = [HiSeq2500Samplesheet.header_map[head] for head in lims_keys]
+        demux_samplesheet = Create2500Samplesheet(
+            flowcell, index_length, raw_samplesheet
+        ).construct_samplesheet()
 
-        if index_length:
-            raw_samplesheet = [
-                line
-                for line in raw_samplesheet
-                if len(line["index"].replace("-", "")) == int(index_length)
-            ]
-        for line in raw_samplesheet:
-            line["description"] = line["sample_id"]
+        # add [section] header
+        click.echo("[Data]")
+        click.echo(demux_samplesheet)
+        return
 
     # ... fix some X specifics
     if application == "wgs":

diff --git a/demux/constants/constants.py b/demux/constants/constants.py
@@ -0,0 +1,19 @@
+""" Constants for demultiplexing """
+
+SPACE = " "
+DASH = "-"
+COMMA = ","
+
+LIMS_KEYS = [
+    "fcid",
+    "lane",
+    "sample_id",
+    "sample_ref",
+    "index",
+    "index2",
+    "sample_name",
+    "control",
+    "recipe",
+    "operator",
+    "project",
+]
diff --git a/demux/utils/__init__.py b/demux/utils/__init__.py
@@ -10,3 +10,4 @@
     iseqSampleSheet,
 )
 from .novaseq_samplesheet import CreateNovaseqSamplesheet
+from .hiseq2500_samplesheet import Create2500Samplesheet
diff --git a/demux/utils/hiseq2500_samplesheet.py b/demux/utils/hiseq2500_samplesheet.py
@@ -0,0 +1,55 @@
+""" Create a samplesheet for 2500 flowcells """
+
+from ..constants.constants import DASH, COMMA, LIMS_KEYS
+from .samplesheet import Samplesheet
+
+
+class Create2500Samplesheet:
+    """ Create a raw sample sheet for 2500 flowcells """
+
+    def __init__(self, flowcell: str, index_length: int, raw_samplesheet: list):
+        self.flowcell = flowcell
+        self.index_length = index_length
+        self.raw_samplesheet = raw_samplesheet
+
+    @property
+    def header(self) -> list:
+        """ Create the sample sheet header """
+        return list(Samplesheet.header_map.values())
+
+    @staticmethod
+    def is_dual_index(index: str, delimiter=DASH) -> bool:
+        """ Determines if an index in the raw samplesheet is dual index or not """
+        return delimiter in index
+
+    def remove_unwanted_indexes(self, raw_samplesheet: list) -> list:
+        """ Remove indexes with length unequal to index_length"""
+        raw_samplesheet = [
+            line
+            for line in raw_samplesheet
+            if len(line["index"].replace("-", "")) == self.index_length
+        ]
+
+        return raw_samplesheet
+
+    def split_dual_indexes(self, raw_samplesheet: list) -> list:
+        """ Splits dual indexes"""
+        for line in raw_samplesheet:
+            if self.is_dual_index(line["index"]):
+                index1, index2 = line["index"].split("-")
+                line["index"], line["index2"] = index1, index2
+        return raw_samplesheet
+
+    def construct_samplesheet(self, end="\n", delimiter=COMMA) -> str:
+        """ Construct the sample sheet """
+        demux_samplesheet = [delimiter.join(self.header)]
+        raw_samplesheet = self.raw_samplesheet
+        raw_samplesheet = self.remove_unwanted_indexes(raw_samplesheet)
+        raw_samplesheet = self.split_dual_indexes(raw_samplesheet)
+        for line in raw_samplesheet:
+            line["sample_name"] = line["project"]
+            demux_samplesheet.append(
+                delimiter.join([str(line[lims_key]) for lims_key in LIMS_KEYS])
+            )
+
+        return end.join(demux_samplesheet)
diff --git a/demux/utils/samplesheet.py b/demux/utils/samplesheet.py
@@ -594,13 +594,13 @@ class HiSeq2500Samplesheet(Samplesheet):
         "lane": "Lane",
         "sample_id": "SampleID",
         "sample_ref": "SampleRef",
-        "index": "Index",
+        "index": "index",
+        "index2": "index2",
         "sample_name": "SampleName",
         "control": "Control",
         "recipe": "Recipe",
         "operator": "Operator",
-        "description": "Description",
-        "project": "SampleProject",
+        "project": "Project",
     }
 
 

diff --git a/scripts/2500/NIPT/sendnipt.bash b/scripts/2500/NIPT/sendnipt.bash
@@ -0,0 +1,104 @@
+#!/bin/bash
+# script to send run results
+
+set -ue
+
+VERSION=5.4.2
+echo "Version $VERSION"
+
+##########
+# PARAMS #
+##########
+
+NIPTRUNS=/home/proj/production/flowcells/2500/nipt/
+NIPTOUT=/srv/nipt_analysis_output/
+MAILTO=clinical-demux@scilifelab.se,nipt.karolinska@sll.se
+MAILTO_RERUN=agne.lieden@ki.se,clinical-demux@scilifelab.se
+MAILTO_ERR=clinical-demux@scilifelab.se
+NIPTCONF=/home/proj/production/servers/config/hasta.scilifelab.se/.niptrc
+
+if [[ -r $NIPTCONF ]]; then
+    . $NIPTCONF
+else
+    echo "NIPT config not found!" | mail -s "NIPT config not found on $(hostname)" ${MAILTO_ERR}
+fi
+
+#############
+# FUNCTIONS #
+#############
+
+failed() {
+    echo "Fail to send ${RUN}. Error on line nr: $(caller)" | mail -s "ERROR sending NIPT $(hostname):${RUN}" ${MAILTO_ERR}
+}
+trap failed ERR
+
+#######
+# RUN #
+#######
+
+for RUN in $(ls ${NIPTRUNS}); do
+    NOW=$(date +"%Y%m%d%H%M%S")
+    if [[ ${RUN} =~ 'TEST' ]]; then
+        echo [${NOW}] [${RUN}] TEST run, skipping ...
+        continue # skip test runs
+    fi
+    echo [${NOW}] [${RUN}] Checking ...
+    if [[ -e ${NIPTRUNS}/${RUN}/delivery.txt ]]; then
+        while read line; do echo [${NOW}] [${RUN}] Delivered on $line; done < ${NIPTRUNS}/${RUN}/delivery.txt
+        continue
+    fi
+
+    OUTDIR=$(find ${NIPTOUT} -name "${RUN}_*" -type d)
+    if [[ ! -d ${OUTDIR} ]]; then
+        echo [${NOW}] [${RUN}] Not finished yet ...
+    else
+        echo [${NOW}] [${RUN}] Mailing!
+
+        INVESTIGATOR_NAME=$(sed 's/^M/\n/g' ${NIPTRUNS}/${RUN}/SampleSheet.csv  | grep 'Investigator Name' - | cut -d, -f2)
+        EXPERIMENT_NAME=$(sed 's/^M/\n/g' ${NIPTRUNS}/${RUN}/SampleSheet.csv  | grep 'Experiment Name' - | cut -d, -f2)
+        INVESTIGATOR_NAME=${INVESTIGATOR_NAME%$EXPERIMENT_NAME}
+        INVESTIGATOR_NAME=${INVESTIGATOR_NAME%_} # remove possible ending _
+
+        RESULTS_FILE_NAME=$(basename ${NIPTOUT}/${RUN}_*/*_NIPT_RESULTS.csv)
+        RESULTS_FILE_NAME="${INVESTIGATOR_NAME}_NIPT_RESULTS.csv"
+
+        # gather following files in a dir
+        # tar them
+        # mail!
+
+        TMP_OUTDIR=`mktemp -d`
+
+        cp -R ${NIPTRUNS}/${RUN}/InterOp ${TMP_OUTDIR}
+        cp ${NIPTRUNS}/${RUN}/runParameters.xml ${TMP_OUTDIR}
+        cp ${NIPTRUNS}/${RUN}/SampleSheet.csv ${TMP_OUTDIR}
+        cp ${NIPTRUNS}/${RUN}/RunInfo.xml ${TMP_OUTDIR}
+        cp ${OUTDIR}/*_MISINDEXED_RESULTS.csv ${TMP_OUTDIR}
+        cp ${OUTDIR}/*_NIPT_RESULTS.csv ${TMP_OUTDIR}/${RESULTS_FILE_NAME}
+        cp ${OUTDIR}/REPORT.Complete.txt ${TMP_OUTDIR}
+
+        SUBJECT="${INVESTIGATOR_NAME}_${EXPERIMENT_NAME}"
+        RESULTS_FILE="results_${SUBJECT}.tgz"
+
+        cd ${TMP_OUTDIR}
+        tar -czf ${RESULTS_FILE} *
+        cd -
+
+        IFS=_ read -ra RUN_PARTS <<< "${RUN}"
+        unset IFS
+        DATE=${RUN_PARTS[0]}
+        if [[ $DATE > 161121 ]]; then
+            mail -s "Results ${SUBJECT}" -a ${TMP_OUTDIR}/${RESULTS_FILE} ${MAILTO} < ${NIPTOUT}/${RUN}_*/REPORT.Complete.txt 
+            # FTP the results file
+            NOW=$(date +"%Y%m%d%H%M%S")
+            lftp sftp://$NIPTSFTP_USER:$NIPTSFTP_PASSWORD@$NIPTSFTP_HOST -e "cd SciLife_Till_StarLims; put ${TMP_OUTDIR}/${RESULTS_FILE_NAME}; bye"
+        else
+            mail -s "Results ${SUBJECT}" -a ${TMP_OUTDIR}/${RESULTS_FILE} ${MAILTO_RERUN} < ${NIPTOUT}/${RUN}_*/REPORT.Complete.txt 
+        fi
+
+        # clean up
+        echo "rm -Rf ${TMP_OUTDIR}"
+        rm -Rf ${TMP_OUTDIR}
+
+        date +'%Y%m%d%H%M%S' > ${NIPTRUNS}/${RUN}/delivery.txt
+    fi
+done
diff --git a/scripts/2500/NIPT/syncnipt.bash b/scripts/2500/NIPT/syncnipt.bash
@@ -0,0 +1,92 @@
+#!/bin/bash
+# script to rsync a run to the NIPT server
+
+shopt -s nullglob
+set -eu
+
+VERSION=5.4.2
+echo "Version $VERSION"
+
+##########
+# PARAMS #
+##########
+
+RUNBASE=/home/proj/production/flowcells/2500/runs/
+NIPTBASE=/home/proj/production/flowcells/2500/nipt/
+NIPTOUTPATH=/srv/nipt_runs/
+MAILTO_ERR=clinical-demux@scilifelab.se
+
+#############
+# FUNCTIONS #
+#############
+
+failed() {
+    echo "Fail to sync ${RUN}" | mail -s "ERROR syncing NIPT $(hostname):${RUN}" ${MAILTO_ERR}
+}
+trap failed ERR
+
+#######
+# RUN #
+#######
+
+for RUN in ${RUNBASE}/*; do
+  RUN=$(basename ${RUN})
+  NOW=$(date +"%Y%m%d%H%M%S")
+  if [[ ! -e ${NIPTBASE}${RUN} ]]; then
+    # simple NIPT detection
+    if grep -qs Description,cfDNAHiSeqv1.0 ${RUNBASE}${RUN}/SampleSheet.csv; then
+      if [[ ! -e ${RUNBASE}${RUN}/SampleSheet.ori ]]; then
+        cp ${RUNBASE}${RUN}/SampleSheet.csv ${RUNBASE}${RUN}/SampleSheet.ori
+      fi
+
+      # transform SampleSheet from Mac/Windows to Unix
+      if grep -qs $'\r' ${RUNBASE}${RUN}/SampleSheet.csv; then
+          sed -i 's/
+/\n/g' ${RUNBASE}${RUN}/SampleSheet.csv
+      fi
+      sed -i '/^$/d' ${RUNBASE}${RUN}/SampleSheet.csv
+
+      # validate
+      if ! demux sheet validate -a nipt ${RUNBASE}${RUN}/SampleSheet.csv; then
+          NOW=$(date +"%Y%m%d%H%M%S")
+          echo [${NOW}] ${RUN} has badly formatted SampleSheet!
+          cat ${RUNBASE}${RUN}/SampleSheet.csv | mail -s "NIPT ${RUN} has a badly formatted SampleSheet!" ${MAILTO_ERR}
+
+          continue
+      fi
+
+      # make SampleSheet NIPT ready
+      demux sheet massage ${RUNBASE}${RUN}/SampleSheet.csv > ${RUNBASE}${RUN}/SampleSheet.mas
+      mv ${RUNBASE}${RUN}/SampleSheet.mas ${RUNBASE}${RUN}/SampleSheet.csv
+      cp ${RUNBASE}${RUN}/SampleSheet.csv ${RUNBASE}${RUN}/Data/Intensities/BaseCalls/
+
+      # sync run to NIPT-TT server
+      if [ -f ${RUNBASE}${RUN}/RTAComplete.txt ]; then
+        echo [${NOW}] ${RUN} is finished, linking
+        cp -al ${RUNBASE}${RUN} ${NIPTBASE}
+        NOW=$(date +"%Y%m%d%H%M%S")
+        echo [${NOW}] ${RUN} linking is finished, starting sync
+        rsync -r --exclude RTAComplete.txt --exclude SampleSheet.csv --exclude Data/Intensities/BaseCalls/SampleSheet.csv ${NIPTBASE}${RUN} ${NIPTOUTPATH} && \
+        cp ${NIPTBASE}/${RUN}/SampleSheet.csv ${NIPTOUTPATH}/${RUN}/
+        cp ${NIPTBASE}/${RUN}/SampleSheet.csv ${NIPTOUTPATH}/${RUN}/Data/Intensities/BaseCalls/
+        cp ${NIPTBASE}/${RUN}/RTAComplete.txt ${NIPTOUTPATH}/${RUN}/
+
+        if [[ $? == 0 ]]; then
+          NOW=$(date +"%Y%m%d%H%M%S")
+          echo [${NOW}] ${RUN} has finished syncing
+        else
+          NOW=$(date +"%Y%m%d%H%M%S")
+          echo [${NOW}] ${RUN} has FAILED syncing
+        fi
+      else
+        echo [${NOW}] ${RUN} is not finished yet
+      fi
+    else
+      NOW=$(date +"%Y%m%d%H%M%S")
+      echo [$NOW] ${RUN} is not a NIPT run!
+    fi
+  else
+    NOW=$(date +"%Y%m%d%H%M%S")
+    echo [$NOW] ${RUN} has already synced
+  fi
+done