Skip to content

Commit

Permalink
Merge pull request #145 from AlexandrovLab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
marcos-diazg committed Aug 10, 2022
2 parents 77664ed + 4f3bd00 commit 71e389c
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
| | **out_put** | String | The name of the output folder. The output folder will be generated in the current working directory. |
| | **input_data** | String | Name of the input folder (in case of "vcf" type input) or the input file (in case of "table" type input). The project file or folder should be inside the current working directory. For the "vcf" type input, the project has to be a folder which will contain the vcf files in vcf format or text formats. The "text" type projects have to be a file. |
| | **reference_genome** | String | The name of the reference genome. The default reference genome is "GRCh37". This parameter is applicable only if the input_type is "vcf". |
| | **opportunity_genome** | String | The build or version of the reference signatures for the reference genome. The default opportunity genome is GRCh37. If the input_type is "vcf", the genome_build automatically matches the input reference genome value. |
| | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures. The default opportunity genome is GRCh37. If the input_type is "vcf", the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (GRCh37, GRCh38, mm9, mm10 and rn6). If a different opportunity genome is selected, the default genome GRCh37 will be used. |
| | **context_type** | String | A string of mutaion context name/names separated by comma (","). The items in the list defines the mutational contexts to be considered to extract the signatures. The default value is "96,DINUC,ID", where "96" is the SBS96 context, "DINUC" is the DINUCLEOTIDE context and ID is INDEL context. |
| | **exome** | Boolean | Defines if the exomes will be extracted. The default value is "False". |
| **NMF Replicates** | | | |
Expand All @@ -123,7 +123,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
| | **min_stability** | Float | Default is 0.2. The cutoff thresh-hold of the minimum stability. Solutions with minimum stabilities below this thresh-hold will not be considered. |
| | **combined_stability** | Float | Default is 1.0. The cutoff thresh-hold of the combined stability (sum of average and minimum stability). Solutions with combined stabilities below this thresh-hold will not be considered. |
| **Decomposition** | | | |
| | **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2. Default is 3.1. Defines the version of COSMIC reference signatures. |
| | **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2 and 3.3. Default is 3.3. Defines the version of the COSMIC reference signatures. |
| | **de_novo_fit_penalty** | Float | Takes any positive float. Default is 0.02. Defines the weak (remove) thresh-hold cutoff to assign denovo signatures to a sample. |
| | **nnls_add_penalty** | Float | Takes any positive float. Default is 0.05. Defines the strong (add) thresh-hold cutoff to assign COSMIC signatures to a sample. |
| | **nnls_remove_penalty** | Float | Takes any positive float. Default is 0.01. Defines the weak (remove) thresh-hold cutoff to assign COSMIC signatures to a sample. |
Expand Down
20 changes: 16 additions & 4 deletions SigProfilerExtractor/sigpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def record_parameters(sysdata, execution_parameters, start_time):

sysdata.write("COSMIC MATCH\n")
sysdata.write("\topportunity_genome: {}\n".format(execution_parameters["opportunity_genome"]))
sysdata.write("\cosmic_version: {}\n".format(execution_parameters["cosmic_version"]))
sysdata.write("\tcosmic_version: {}\n".format(execution_parameters["cosmic_version"]))
sysdata.write("\tnnls_add_penalty: {}\n".format(execution_parameters["nnls_add_penalty"]))
sysdata.write("\tnnls_remove_penalty: {}\n".format(execution_parameters["nnls_remove_penalty"]))
sysdata.write("\tinitial_remove_penalty: {}\n".format(execution_parameters["initial_remove_penalty"]))
Expand All @@ -162,7 +162,7 @@ def sigProfilerExtractor(input_type,
input_data,
reference_genome="GRCh37",
opportunity_genome = "GRCh37",
cosmic_version=3.1,
cosmic_version=3.3,
context_type = "default",
exome = False,
minimum_signatures=1,
Expand Down Expand Up @@ -213,7 +213,7 @@ def sigProfilerExtractor(input_type,
reference_genome: A string, optional. The name of the reference genome. The default reference genome is "GRCh37". This parameter is applicable only if the input_type is "vcf".
opportunity_genome: The build or version of the reference signatures for the reference genome. The default opportunity genome is GRCh37. If the input_type is "vcf", the genome_build automatically matches the input reference genome value.
opportunity_genome: The build or version of the reference genome for the reference signatures. The default opportunity genome is GRCh37. If the input_type is "vcf", the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (GRCh37, GRCh38, mm9, mm10 and rn6). If a different opportunity genome is selected, the default genome GRCh37 will be used.
context_type: A list of strings, optional. The items in the list defines the mutational contexts to be considered to extract the signatures. The default value is "SBS96,DBS78,ID83".
Expand Down Expand Up @@ -814,7 +814,19 @@ def sigProfilerExtractor(input_type,
devopts['processSTE']=processSTE
devopts['sequence']=sequence

decomp.spa_analyze( allgenomes, output, signatures=processAvg,genome_build=genome_build, verbose=False,decompose_fit_option= True,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts)

# Check if genome_build is available in COSMIC, if not reset to GRCh37
if genome_build == "GRCh37" or genome_build == "GRCh38" or genome_build == "mm9" or genome_build == "mm10" or genome_build == "rn6":
genome_build = genome_build
else:
sysdata = open(out_put+"/JOB_METADATA.txt", "a")
sysdata.write("\n[{}] The selected opportunity genome is {}. COSMIC signatures are available only for GRCh37/38, mm9/10 and rn6 genomes. So, the opportunity genome is reset to GRCh37.\n". \
format(str(datetime.datetime.now()).split(".")[0], str(genome_build)))
print("The selected opportunity genome is "+str(genome_build)+". COSMIC signatures are available only for GRCh37/38, mm9/10 and rn6 genomes. So, the opportunity genome is reset to GRCh37.")
sysdata.close()
genome_build = "GRCh37"

decomp.spa_analyze(allgenomes, output, signatures=processAvg, genome_build=genome_build, cosmic_version=cosmic_version, exome=exome, verbose=False,decompose_fit_option= True,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts)


sysdata = open(out_put+"/JOB_METADATA.txt", "a")
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
if os.path.exists("dist"):
shutil.rmtree("dist")

VERSION = '1.1.9'
VERSION = '1.1.10'


with open('README.md') as f:
Expand All @@ -20,7 +20,7 @@ def write_version_py(filename='SigProfilerExtractor/version.py'):
# THIS FILE IS GENERATED FROM SIGPROFILEREXTRACTOR SETUP.PY
short_version = '%(version)s'
version = '%(version)s'
Update = 'Fix typos, requirements, and remove legacy decomposition code'
Update = '1. Reset reference genome to GRCh37 if a not supported genome is selected. 2. Add support for COSMIC exome reference signatures'
"""
fh = open(filename, 'w')
Expand All @@ -33,9 +33,9 @@ def write_version_py(filename='SigProfilerExtractor/version.py'):
'numpy>=1.21.2',
'pandas>=1.2.4',
'nimfa>=1.1.0',
'SigProfilerMatrixGenerator>=1.2.8',
'SigProfilerMatrixGenerator>=1.2.9',
'sigProfilerPlotting>=1.2.2',
'SigProfilerAssignment>=0.0.8',
'SigProfilerAssignment>=0.0.10',
'pillow',
'statsmodels>=0.9.0',
'scikit-learn>=0.24.2',
Expand Down

0 comments on commit 71e389c

Please sign in to comment.