In [1]:
import cnvutils

In [2]:
import numpy as np

# TODO: replace with SeedSequence spawning
# See https://numpy.org/doc/stable/reference/random/parallel.html
rng = np.random.default_rng()

# TODO: Assemble this list from the metadata for the passed chromosome
cancer_types = ["brca", "coad", "hnscc", "lscc", "luad", "ov", "pdac"]

# Generate a permutation of the labels of which samples have and don't have the event
perm_events = {}
for cancer_type in cancer_types:
    event_path = cnvutils.filenames.get_has_event_path(
        data_dir="../data",
        source="cptac",
        cancer_type=cancer_type,
        level=None,
        chromosome=8,
        arm="p",
        gain_or_loss="loss",
    )
    
    event = pd.read_csv(event_path, sep='\t', index_col=0)
    perm_event = event.assign(event=rng.permutation(event["event"]))
    
    perm_events[cancer_type] = perm_event

In [3]:
# Re-run all the tumor vs. normal t-tests, within samples with and without the event
# separately, with the permuted has_event labels
perm_res = cnvutils.multi_runner(
    func=cnvutils.event_effects_ttest,
    sources=["cptac", "gistic"],
    levels=["gene"],
    chromosomes_events={
        8: {
            "p": ["loss"],
            "q": ["gain"],
        },
    },
    more_dicts=[
        {
            "name": "save",
            "vals": [False],
        },
        {
            "name": "permuted_event_data",
            "vals": [perm_events],
        },
        {
            "name": "comparison",
            "vals": ["tumor"]
        },
        {
            "name": "has_event",
            "vals": [True, False],
        },
        {
            "name": "proteomics_or_transcriptomics",
            "vals": ["proteomics", "transcriptomics"],
        },
        {
            "name": "cis_or_trans",
            "vals": ["cis", "trans"],
        },
    ]
)

# Convert tuple of (filename, df) into a dictionary
# The next function will access the appropriate df based on what
# its filename would have been if it was saved to disk
perm_res = dict(perm_res)

Running event_effects_ttest with {'chromosome': 8, 'arm': 'q', 'event_start': 47260878, 'event_end': 145052465, 'gain_or_loss': 'gain', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov'], 'source': 'cptac', 'comparison': 'tumor', 'tissue_type': None, 'has_event': True, 'level': None, 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0.237005
01BR032    False    0.061429
01BR033    False    1.000000
01BR040  

Running event_effects_ttest with {'chromosome': 8, 'arm': 'q', 'event_start': 47260878, 'event_end': 145052465, 'gain_or_loss': 'gain', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov'], 'source': 'cptac', 'comparison': 'tumor', 'tissue_type': None, 'has_event': False, 'level': None, 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0.237005
01BR032    False    0.061429
01BR033    False    1.000000
01BR040 


Running event_effects_ttest with {'chromosome': 8, 'arm': 'p', 'event_start': 406428, 'event_end': 38176533, 'gain_or_loss': 'loss', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov'], 'source': 'cptac', 'comparison': 'tumor', 'tissue_type': None, 'has_event': True, 'level': None, 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0.237005
01BR032    False    0.061429
01BR033    False    1.000000
01BR040    


Loading brca proteomics (1/6)...                              
Running event_effects_ttest with {'chromosome': 8, 'arm': 'p', 'event_start': 166085, 'event_end': 40153481, 'gain_or_loss': 'loss', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov', 'pdac'], 'source': 'gistic', 'comparison': 'tumor', 'tissue_type': None, 'has_event': True, 'level': 'gene', 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0.23

Running event_effects_ttest with {'chromosome': 8, 'arm': 'q', 'event_start': 36784373, 'event_end': 145052466, 'gain_or_loss': 'gain', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov', 'pdac'], 'source': 'gistic', 'comparison': 'tumor', 'tissue_type': None, 'has_event': True, 'level': 'gene', 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0.237005
01BR032    False    0.061429
01BR033    False    1.00000

Running event_effects_ttest with {'chromosome': 8, 'arm': 'p', 'event_start': 166085, 'event_end': 40153481, 'gain_or_loss': 'loss', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov', 'pdac'], 'source': 'gistic', 'comparison': 'tumor', 'tissue_type': None, 'has_event': False, 'level': 'gene', 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0.237005
01BR032    False    0.061429
01BR033    False    1.000000



Loading brca proteomics (1/6)...                              
Loading brca proteomics (1/7)...                              Loading brca proteomics (1/6)...                              Loading brca proteomics (1/7)...                              
Loading brca proteomics (1/7)...                              Running event_effects_ttest with {'chromosome': 8, 'arm': 'p', 'event_start': 406428, 'event_end': 38176533, 'gain_or_loss': 'loss', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov'], 'source': 'cptac', 'comparison': 'tumor', 'tissue_type': None, 'has_event': False, 'level': None, 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False 


Loading brca proteomics (1/6)...                              Running event_effects_ttest with {'chromosome': 8, 'arm': 'q', 'event_start': 36784373, 'event_end': 145052466, 'gain_or_loss': 'gain', 'cis_or_trans': 'cis', 'proteomics_or_transcriptomics': 'proteomics', 'cancer_types': ['brca', 'coad', 'hnscc', 'lscc', 'luad', 'ov', 'pdac'], 'source': 'gistic', 'comparison': 'tumor', 'tissue_type': None, 'has_event': False, 'level': 'gene', 'data_dir': '/home/caleb/github/PayneLab/pancancerCNV/00_functions/chr08/../data', 'save': False, 'permuted_event_data': {'brca':            event  proportion
01BR001    False    0.978223
01BR008    False    0.000000
01BR009    False    0.238529
01BR010    False    0.000000
01BR015    False    0.969679
01BR017    False    0.021042
01BR018    False    0.967549
01BR020    False    0.981566
01BR023    False    0.871533
01BR025    False    0.000000
01BR026     True    1.000000
01BR027     True    0.981531
01BR030     True    0.872832
01BR031    False    0


Loading pdac proteomics (7/7)...                                                omics (2/6)...                              Loading coad proteomics (2/6)...                              Loading coad proteomics (2/7)...                              



                                                                                



                                                                                



                                                                                



In [4]:
# Pass those t-test results to the proportion p value function and get the p values
all_pvals = cnvutils.get_has_vs_not_has_tumor_normal_diff_props(
    chromosomes_events={
            8: {
                "p": ["loss"],
                "q": ["gain"],
            },
        },
    sources=["cptac", "gistic"],
    levels=["gene"],
    ttest_res=perm_res,
)

# Add those p values to the overall distribution