In [None]:
from SigProfilerExtractor import estimate_best_solution as ebs
from SigProfilerMatrixGenerator import install as genInstall
from SigProfilerExtractor import sigpro as sig
from SigProfilerExtractor import estimate_best_solution as ebs
import pandas as pd

# Install reference genome if necessary
genInstall.install('GRCh37')

In [1]:
def convert_csv_to_tsv(input_path, output_path):
    """
    Converts a CSV file to TSV format.
    """
    df = pd.read_csv(input_path)
    df.to_csv(output_path, sep='\t', index=False)


def run_sigprofiler(input_csv, output_dir, ref_genome, min_sig, max_sig, nmf_replicates, cpu_cores):
    """
    Executes the SigProfilerExtractor function with the specified parameters.
    """
    # Convert CSV to TSV
    tsv_path = input_csv.replace(".csv", ".tsv")
    convert_csv_to_tsv(input_csv, tsv_path)

    # Run SigProfilerExtractor
    sig.sigProfilerExtractor(
        input_type="matrix",
        output=output_dir,
        input_data=tsv_path,
        reference_genome=ref_genome,
        minimum_signatures=min_sig,
        maximum_signatures=max_sig,
        nmf_replicates=nmf_replicates,
        cpu=cpu_cores,
    )

Tool       | Installed 
-----------------------
curl       | True      
wget       | False     
rsync      | True      


INFO - GRCh37 is already installed.


All reference files have been created.
To proceed with matrix_generation, please provide the path to your vcf files and an appropriate output path.
Installation complete.


In [2]:
if __name__ == "__main__":
    # Input data
    input_csv = "simulated_data/s_25_n_0.06_GRCh37_17b_86_98_39_22a_43_17a_13_54_33_21_59_60_87_37_96_28_55_99_26_3_1_12_93_22b.csv"
    output_dir = "results_s_25_n_0.06_GRCh37_17b_86_98_39_22a_43_17a_13_54_33_21_59_60_87_37_96_28_55_99_26_3_1_12_93_22b"
    ref_genome = "GRCh37"
    min_sig = 4
    max_sig = 40
    nmf_replicates = 100
    cpu_cores = -1
    
    # Run the process
    run_sigprofiler(input_csv, output_dir, ref_genome, min_sig, max_sig, nmf_replicates, cpu_cores)


************** Reported Current Memory Use: 0.26 GB *****************

Extracting signature 4 for mutation type 96
The matrix normalizing cutoff is 27328


process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 5 seconds 

process 4 continues please wait... 
execution time: 2 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 6 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 continues please wait... 
execution time: 8 seconds 

process 4 continues please wait... 
execution time: 8 seconds 

process 4 continues please wait... 
execution time: 3 seconds 

process 4 c

In [None]:
import os
from SigProfilerExtractor import estimate_best_solution as ebs

# Define the base parameters
output_dir = "results_s_25_n_0.06_GRCh37_17b_86_98_39_22a_43_17a_13_54_33_21_59_60_87_37_96_28_55_99_26_3_1_12_93_22b"
base_csvfile = os.path.join(output_dir, "SBS96/All_solutions_stat.csv")
all_solutions_folder = os.path.join(output_dir, "SBS96/All_solutions")
genomes = os.path.join(output_dir, "SBS96/Samples.txt")
title = "Selection_Plot"
cpu_cores = -1  # Use all available CPU cores
stability_threshold = [0.5]
min_stability_threshold = [-0.269]


# Loop through different combinations of thresholds
for stability_threshold in stability_threshold_values:
    for min_stability_threshold in min_stability_threshold_values:
        combined_stability_threshold = min_stability_threshold + stability_threshold
        # Define a unique subfolder for this run
        subfolder =  f"solution/stab_{stability_threshold}_minstab_{min_stability_threshold}_comb_{combined_stability_threshold}"
        subfolder_path = os.path.join(output_dir, subfolder)
        os.makedirs(subfolder_path, exist_ok=True)

        # Run the estimation
        print(f"Running for Stability: {stability_threshold}, Min Stability: {min_stability_threshold}, Combined: {combined_stability_threshold}")
        ebs.estimate_solution(
            base_csvfile,
            all_solutions_folder,
            genomes,
            subfolder_path,
            title,
            stability_threshold,
            min_stability_threshold,
            combined_stability_threshold
        )


Running for Stability: 0, Min Stability: -1, Combined: -1
Running for Stability: 0, Min Stability: -0.8, Combined: -0.8


KeyboardInterrupt: 

In [None]:
if __name__ == "__main__":
    # Input data
    input_csv = "simulated_data/s_8_n_0.02_GRCh37_17b_86_98_39_22a_43_17a_13.csv"
    output_dir = "results_s_8_n_0.02_GRCh37_17b_86_98_39_22a_43_17a_13.csv"
    ref_genome = "GRCh37"
    min_sig = 2
    max_sig = 20
    nmf_replicates = 100
    cpu_cores = -1

    # Run the process
    run_sigprofiler(input_csv, output_dir, ref_genome, min_sig, max_sig, nmf_replicates, cpu_cores)

