In [1]:
import os
import scipy
import pandas as pd
from pprint import pprint
from scipy.stats import wilcoxon
from itertools import combinations, product
from sanitize_ml_labels import sanitize_ml_labels

In [2]:
RESULT_PATH = "./correlation/"
os.makedirs(RESULT_PATH, exist_ok=True)

In [3]:
def sanitize_df(df):
    df.columns = sanitize_ml_labels(df.columns)
    df.index.names = list(
        map(lambda x: x[:-1] if x[-1].isdigit() else x , 
            sanitize_ml_labels(df.index.names)
        )
    )
    for col in df.columns[df.dtypes == object]:
        df[col] = sanitize_ml_labels(df[col])
        
    return df

In [4]:
df = pd.concat([
    pd.read_csv("./reports/"+file, index_col=0)
    for file in os.listdir("./reports/")
])
df = df.drop("dataset", axis=1)
df = df[df.run_type != "biological validation"]

In [5]:
df["window_size"] = [
    int(x.split("_")[1])
    for x in df.model.values
]

In [6]:
df["model_type"] = [
    x.split("_")[0]
    for x in df.model.values
]

In [7]:
df

Unnamed: 0,model,trained_on,task,target,run_type,roc_auc_score,average_precision_score,accuracy_score,window_size,model_type
0,cnn_500,single_gap,gap_filling,all_nucleotides,multivariate gaps train,0.613213,0.343835,0.368223,500,cnn
1,cnn_500,single_gap,gap_filling,adenine,multivariate gaps train,0.593924,0.378336,0.671299,500,cnn
2,cnn_500,single_gap,gap_filling,cytosine,multivariate gaps train,0.619264,0.316755,0.775205,500,cnn
3,cnn_500,single_gap,gap_filling,thymine,multivariate gaps train,0.594738,0.370109,0.650293,500,cnn
4,cnn_500,single_gap,gap_filling,guanine,multivariate gaps train,0.614865,0.292196,0.736641,500,cnn
...,...,...,...,...,...,...,...,...,...,...
0,cnn_1000,single_gap,gap_filling,all_nucleotides,multivariate gaps test,0.636623,0.372396,0.373535,1000,cnn
1,cnn_1000,single_gap,gap_filling,adenine,multivariate gaps test,0.612027,0.374539,0.665840,1000,cnn
2,cnn_1000,single_gap,gap_filling,cytosine,multivariate gaps test,0.647463,0.359355,0.775117,1000,cnn
3,cnn_1000,single_gap,gap_filling,thymine,multivariate gaps test,0.617989,0.404111,0.708740,1000,cnn


# CAE model analysis

In [8]:
df_cae = df[df.model_type == "cae"]

In [9]:
def analyze(df, target=None):
    if target:
        df = df[df.target == target]
    gaps = df[df.task == "gap_filling"]
    rec = df[df.task == "reconstruction"]
    print("Pearson")
    print("ACC ", scipy.stats.pearsonr(gaps.accuracy_score, rec.accuracy_score))
    print("PRC ", scipy.stats.pearsonr(gaps.average_precision_score, rec.average_precision_score))
    print("ROC ", scipy.stats.pearsonr(gaps.roc_auc_score, rec.roc_auc_score))
    print("Spearmanr")
    print("ACC ", scipy.stats.spearmanr(gaps.accuracy_score, rec.accuracy_score))
    print("PRC ", scipy.stats.spearmanr(gaps.average_precision_score, rec.average_precision_score))
    print("ROC ", scipy.stats.spearmanr(gaps.roc_auc_score, rec.roc_auc_score))
    

### Gap filling and recostruction correlation

In [11]:
print("General")
analyze(df_cae)
for target in df_cae.target.unique():
    print("\n\n" + target)
    analyze(df_cae, target)

General
Pearson
ACC  (0.482895338644168, 2.326397636045051e-08)
PRC  (-0.5134980133064615, 2.0039036190860594e-09)
ROC  (-0.20785939520592472, 0.02272097597082698)
Spearmanr
ACC  SpearmanrResult(correlation=0.19855545523994717, pvalue=0.02970560894212244)
PRC  SpearmanrResult(correlation=-0.4071463296062226, pvalue=3.931990884368521e-06)
ROC  SpearmanrResult(correlation=-0.09129800680602819, pvalue=0.3213319885539175)


all_nucleotides
Pearson
ACC  (-0.08501791437050782, 0.6928553591011505)
PRC  (-0.6566630954882765, 0.0004915147884109693)
ROC  (-0.15275313758830594, 0.47610295879266495)
Spearmanr
ACC  SpearmanrResult(correlation=-0.10695652173913042, pvalue=0.6188809974331805)
PRC  SpearmanrResult(correlation=-0.38956521739130434, pvalue=0.059875479374870595)
ROC  SpearmanrResult(correlation=0.07826086956521738, pvalue=0.7162392905010877)


adenine
Pearson
ACC  (-0.6960337936053477, 0.00015864120971077322)
PRC  (-0.8182573084594442, 1.034175641023154e-06)
ROC  (-0.7151842408931849, 8.

### Window size and performance correlation

In [12]:
rec = df_cae[df_cae.task == "reconstruction"]
print("ACC ", scipy.stats.pearsonr(rec.window_size, rec.accuracy_score))
print("PRC ", scipy.stats.pearsonr(rec.window_size, rec.average_precision_score))
print("ROC ", scipy.stats.pearsonr(rec.window_size, rec.roc_auc_score))
print("ACC ", scipy.stats.spearmanr(rec.window_size, rec.accuracy_score))
print("PRC ", scipy.stats.spearmanr(rec.window_size, rec.average_precision_score))
print("ROC ", scipy.stats.spearmanr(rec.window_size, rec.roc_auc_score))

ACC  (-0.7330026769680871, 1.7781647645197916e-21)
PRC  (-0.9842769769597799, 1.0617138059679294e-90)
ROC  (-0.9911752004941152, 2.055458957569254e-105)
ACC  SpearmanrResult(correlation=-0.8355935272699719, pvalue=1.7521187725128724e-32)
PRC  SpearmanrResult(correlation=-0.9428417797122391, pvalue=3.7639250348813904e-58)
ROC  SpearmanrResult(correlation=-0.9428417797122391, pvalue=3.7639250348813904e-58)


In [22]:
gaps = df_cae[df_cae.task == "gap_filling"]
print("ACC ", scipy.stats.pearsonr(gaps.window_size, gaps.accuracy_score))
print("PRC ", scipy.stats.pearsonr(gaps.window_size, gaps.average_precision_score))
print("ROC ", scipy.stats.pearsonr(gaps.window_size, gaps.roc_auc_score))
print("ACC ", scipy.stats.spearmanr(gaps.window_size, gaps.accuracy_score))
print("PRC ", scipy.stats.spearmanr(gaps.window_size, gaps.average_precision_score))
print("ROC ", scipy.stats.spearmanr(gaps.window_size, gaps.roc_auc_score))

ACC  (0.05559562305494252, 0.5464343780139739)
PRC  (0.6323842687836272, 9.410166136519479e-15)
ROC  (0.29261550365622535, 0.001182024110632261)
ACC  SpearmanrResult(correlation=0.25839757525238555, pvalue=0.004378300652845834)
PRC  SpearmanrResult(correlation=0.5636426014342228, pvalue=2.0484615366196184e-11)
ROC  SpearmanrResult(correlation=0.19740749762725004, pvalue=0.030682167816496086)


# CNN analysis

In [16]:
df_cnn = df[df.model_type == "cnn"]

### Window size and performance correlation

In [17]:
gaps = df_cnn[df_cnn.task == "gap_filling"]
print("ACC ", scipy.stats.pearsonr(gaps.window_size, gaps.accuracy_score))
print("PRC ", scipy.stats.pearsonr(gaps.window_size, gaps.average_precision_score))
print("ROC ", scipy.stats.pearsonr(gaps.window_size, gaps.roc_auc_score))
print("ACC ", scipy.stats.spearmanr(gaps.window_size, gaps.accuracy_score))
print("PRC ", scipy.stats.spearmanr(gaps.window_size, gaps.average_precision_score))
print("ROC ", scipy.stats.spearmanr(gaps.window_size, gaps.roc_auc_score))

ACC  (0.02621780038693372, 0.776223122135287)
PRC  (0.028982130532245944, 0.7533491853906168)
ROC  (0.04773408034671381, 0.6046517145993185)
ACC  SpearmanrResult(correlation=0.0026517563176666455, pvalue=0.9770683378676717)
PRC  SpearmanrResult(correlation=-0.11490884190242913, pvalue=0.21139742045523957)
ROC  SpearmanrResult(correlation=-0.02622278699824665, pvalue=0.7761816799226504)
