In [1]:
# Import libraries
import os

# Import CAsMap package
import sigpatsearch as sg

In [2]:
# Get current working directory. Used to set the output directory relative to the current one.
current_dir = os.getcwd()

# Path to input files (genotype, phenotype and categorical covariate)
data_path = '../data/high_order_epistasis'
dataset = 'avrB'

genotype_file  = os.path.join(data_path, dataset, 'X.dat')
phenotype_file = os.path.join(data_path, dataset, 'Y.dat')
covariate_file = os.path.join(data_path, dataset, 'C.dat')

# Path to output directory
output_path = os.path.join(current_dir, '../output/high_order_epistasis', dataset)

# Create output directory (if it does not exist)
if not os.path.isdir(output_path):
    os.makedirs(output_path)

In [3]:
# Create object to search for high-order epistasis in a Genome-Wide Association Study (GWAS)
high_order_epistasis = sg.createSigPatSearch(method='facs')

# Set hyperparameters
high_order_epistasis.set_alpha(0.05)  # Target FWER
high_order_epistasis.set_lmax(0)  # Include interactions of any order in the analysis

In [4]:
# Read input files
high_order_epistasis.read_eth_files(genotype_file, phenotype_file, covariate_file)

In [5]:
# Run significant pattern mining algorithm to retrieve statistically associated interactions between genomic variants
high_order_epistasis.execute()

In [None]:
# Write high-level summary and profiling info related to the execution of the algorithm
high_order_epistasis.write_summary(os.path.join(output_path, 'summary.txt'))
high_order_epistasis.write_profile(os.path.join(output_path, 'profiling.txt'))

# Write raw list of (possibly redundant) significantly associated multiplicative interactions of genomic variants
high_order_epistasis.write_pvals_significant_itemsets(os.path.join(output_path, 'significant_interactions.csv'))

# Optional: write list of P-values for all testable interactions (significantly associated or not)
# NOTE: Seems to be broken, so perhaps we should remove it for now
high_order_epistasis.write_pvals_testable_itemsets(os.path.join(output_path, 'testable_interactions_pvalues.csv'))