In [None]:
import pandas as pd, numpy as np, sqlite3
import os
from scripts.misc import *

db_path = "results.db"
THRESHOLD = 0.35
ADDITIONAL_FILTER = True
FILTER_VAL = 0.25

In [4]:
df = get_filtered_data(db_path, "real", THRESHOLD, apply_additional_filter=ADDITIONAL_FILTER, low_threshold=FILTER_VAL, high_threshold=1-FILTER_VAL)

df.head()

Unnamed: 0,id,wt_prediction,mut_prediction,pred_difference,vcf_id,mirna_accession,gene_id,is_gene_upregulated,mutsig,gene_name,cancer_type
0,922,0.703395,0.200838,-0.503,PD10010a,MIMAT0018091,ENSG00000213967,True,SBS1,ZNF726,nnn
1,924,0.609351,0.248184,-0.361,PD10010a,MIMAT0018109,ENSG00000213967,True,SBS1,ZNF726,nnn
2,927,0.593382,0.23763,-0.356,PD10010a,MIMAT0019057,ENSG00000213967,True,SBS1,ZNF726,nnn
3,936,0.808837,0.20937,-0.599,PD10010a,MIMAT0026615,ENSG00000213967,True,SBS1,ZNF726,nnn
4,937,0.635327,0.121514,-0.514,PD10010a,MIMAT0027650,ENSG00000213967,True,SBS1,ZNF726,nnn


In [None]:
real_counts = get_gene_regulation_counts(db_path, "real", THRESHOLD, apply_additional_filter=ADDITIONAL_FILTER, low_threshold=FILTER_VAL, high_threshold=1-FILTER_VAL)
synth_counts = get_gene_regulation_counts(db_path, "synth", THRESHOLD, apply_additional_filter=ADDITIONAL_FILTER, low_threshold=FILTER_VAL, high_threshold=1-FILTER_VAL)

counts = pd.merge(real_counts, synth_counts, how="inner", on="gene_id", suffixes=["_real", "_synth"])
counts["upregulated_synth"] = counts["upregulated_synth"] / 10
counts["downregulated_synth"] = counts["downregulated_synth"] / 10

counts['log2_odds_ratio'] = counts.apply(lambda row: calculate_log2_odds_ratio(
    row['upregulated_real'], 
    row['downregulated_real'], 
    row['upregulated_synth'], 
    row['downregulated_synth']
), axis=1)

counts['shrunk_log2_odds'] = shrink_log2_odds(counts)
counts = add_z_score(counts)
counts = perform_fisher_test_vectorized(counts, bonf_holm=False)

counts["is_significant"] = counts['p_value'] < 0.05
counts["is_significant_adj"] = counts['p_adj'] < 0.05


In [None]:
export_path = f"results/last/{THRESHOLD:.2f}".replace("0.", "0")
os.makedirs(export_path, exist_ok=True)
count_sign = len(counts[counts.p_value < 0.05])
count_sign_adj = len(counts[counts.p_adj < 0.05])
filter_string = "FILTER_VAL:.2f".replace(".", "")

if ADDITIONAL_FILTER:
    counts.to_csv(f"{export_path}/counts_sig{count_sign}_adj{count_sign_adj}_filter{FILTER_VAL:.2f}.csv", index=False)

else:
    counts.to_csv(f"{export_path}/counts_sig{count_sign}_adj{count_sign_adj}.csv", index=False)
    
    

In [None]:
counts[counts.p_adj < 0.05]
