From b81f61ba32ab6c8e54325ade61839c44b8fdfe1e Mon Sep 17 00:00:00 2001 From: Kevin Liao Date: Mon, 10 Jun 2024 13:52:19 -0400 Subject: [PATCH 1/3] use min(config.hwe_maf, sqrt(5/n)) for hwe computing --- .../workflow/sub_workflows/subject_qc.smk | 42 +++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk b/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk index 1c338f4b..6d73fb3b 100644 --- a/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk +++ b/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk @@ -5,6 +5,7 @@ from more_itertools import flatten from cgr_gwas_qc import load_config from cgr_gwas_qc.workflow.scripts import subject_qc_table import shutil +import math cfg = load_config() @@ -654,11 +655,12 @@ def _get_controls(wildcards): ] -rule population_controls_Subject_IDs: +checkpoint population_controls_Subject_IDs: input: rules.subject_qc_table.output[0], output: "subject_level/{population}/controls.txt", + "subject_level/{population}/hwe_maf_thres.txt", params: "subject_level/{population}/test_controls.txt", "subject_level/{population}/test_case.txt", @@ -722,6 +724,15 @@ rule population_controls_Subject_IDs: "threshold, using control and case samples for HWE", ) shutil.copyfile(params[2], output[0]) + with open(output[1], "w") as f: + f.write( + str( + round( + min(cfg.config.software_params.maf_for_hwe, math.sqrt(5 / num_lines2)), + 2, + ) + ) + ) else: print( num_lines0, @@ -730,6 +741,15 @@ rule population_controls_Subject_IDs: "threshold for HWE", ) shutil.copyfile(params[0], output[0]) + with open(output[1], "w") as f: + f.write( + str( + round( + min(cfg.config.software_params.maf_for_hwe, math.sqrt(5 / num_lines0)), + 2, + ) + ) + ) use rule keep_ids from plink as pull_population_unrelated_controls with: @@ -817,13 +837,21 @@ use rule hwe from plink as population_level_unrelated_controls_hwe with: "{population}_controls_filter" +def compute_maf(wildcards): + with open( + checkpoints.population_controls_Subject_IDs.get(population=wildcards.population).output[1] + ) as f: + value = f.read().strip() + return expand( + rules.population_level_unrelated_controls_hwe.output[0], + maf=value, + allow_missing=True, + ) + + rule plot_hwe: input: - expand( - rules.population_level_unrelated_controls_hwe.output[0], - maf=cfg.config.software_params.maf_for_hwe, - allow_missing=True, - ), + compute_maf, params: population="{population}", output: @@ -838,7 +866,7 @@ def _control_plots(wildcards): if not populations: return [] - return expand(rules.plot_hwe.output[0], population=populations) + return expand(rules.plot_hwe.output, population=populations) rule agg_control_plots: From 7c0a2b4c6224429f6d2cd552c6f8c75220dabec2 Mon Sep 17 00:00:00 2001 From: Kevin Liao Date: Mon, 10 Jun 2024 13:55:19 -0400 Subject: [PATCH 2/3] fixed index bug on rule output --- src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk b/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk index 6d73fb3b..a4a71143 100644 --- a/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk +++ b/src/cgr_gwas_qc/workflow/sub_workflows/subject_qc.smk @@ -866,7 +866,7 @@ def _control_plots(wildcards): if not populations: return [] - return expand(rules.plot_hwe.output, population=populations) + return expand(rules.plot_hwe.output[0], population=populations) rule agg_control_plots: From 14a7b74f612f276c0ebcf9a9099f918947f57829 Mon Sep 17 00:00:00 2001 From: Kevin Liao Date: Wed, 12 Jun 2024 14:56:54 -0400 Subject: [PATCH 3/3] docs: update subject QC workflow for new HWE handling --- docs/sub_workflows/subject_qc.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sub_workflows/subject_qc.rst b/docs/sub_workflows/subject_qc.rst index 0275b5ad..def14c1a 100644 --- a/docs/sub_workflows/subject_qc.rst +++ b/docs/sub_workflows/subject_qc.rst @@ -133,9 +133,9 @@ Hardy Weinberg - ``subject_level//controls_unrelated_maf{maf_for_hwe}_snps_autosomes.hwe`` -Here we pull out only control subjects. -We then calculate Hardy Weinberg Equilibrium. -We use only controls, but cases may have SNPs that are out of HWE. +Here we calculate Hardy Weinberg Equilibrium (HWE) and produce plots for all populations (from graf-pop) that have >50 controls. Populations with <50 controls but > 50 cases + control are also run. + +MAF threshold for computing HWE is the minimum(software_params.maf_for_hwe, sqrt(5/n)) where n is the number of controls or cases + controls (if number of controls < 50). Subject QC Summary Tables -------------------------