In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score

In [22]:
deep_intronic = pd.read_csv('final/deep_intronic/veta_output/tools_benchmark/all_types/results_tsv/preds_all_types_all_class.tsv', 
                     sep='\t')
deep_intronic_moderate = pd.read_csv('final/deep_intronic_moderate/veta_output/tools_benchmark/all_types/results_tsv/preds_all_types_all_class.tsv', 
                     sep='\t')

periexonic = pd.read_csv('final/periexonic/veta_output/tools_benchmark/all_types/results_tsv/preds_all_types_all_class.tsv', 
                     sep='\t')
periexonic_moderate = pd.read_csv('final/periexonic_moderate/veta_output/tools_benchmark/all_types/results_tsv/preds_all_types_all_class.tsv', 
                     sep='\t')

In [23]:
deep_intronic.head()

Unnamed: 0,chr,pos,ref,alt,SYMBOL,SQUIRLS_prediction,SpliceAI_prediction,Pangolin_prediction,AlphaGenome_prediction,label
0,1,935279,G,T,SAMD11,False,False,False,,False
1,1,9953838,C,T,TMEM274P,False,False,,,False
2,1,9956947,G,T,NMNAT1,False,False,,,False
3,1,9964768,T,C,NMNAT1,False,False,,,False
4,1,9973961,C,G,NMNAT1,False,False,,,False


# Analyze metrics for all tools

Transform strings to numeric

In [24]:
dfs = [deep_intronic, deep_intronic_moderate, periexonic, periexonic_moderate]
for df in dfs:
    for col in ['SpliceAI_prediction', 'Pangolin_prediction', 'SQUIRLS_prediction', 'AlphaGenome_prediction', 'label']:
        df[col] = pd.to_numeric(df[col])


In [25]:
for df in [deep_intronic, deep_intronic_moderate, periexonic, periexonic_moderate]:
    df['Pangolin_prediction'] = df['Pangolin_prediction'].fillna(0)
    df['SpliceAI_prediction'] = df['SpliceAI_prediction'].fillna(0)
    df['SQUIRLS_prediction'] = df['SQUIRLS_prediction'].fillna(0)
    df['AlphaGenome_prediction'] = df['AlphaGenome_prediction'].fillna(0)


## Deep intronic

#### Precision

In [26]:
print('Precision')
print('')
print('Pathogenic variants')

pang_pr_score_p = precision_score(deep_intronic['label'], deep_intronic['Pangolin_prediction'])
print(f'Pangolin: {pang_pr_score_p}')

sai_pr_score_p = precision_score(deep_intronic['label'], deep_intronic['SpliceAI_prediction'])
print(f'SpliceAI: {sai_pr_score_p}')

sq_pr_score_p = precision_score(deep_intronic['label'], deep_intronic['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_pr_score_p}')

al_pr_score_p = precision_score(deep_intronic['label'], deep_intronic['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_pr_score_p}')

print('')

print('Moderate variants')

pang_pr_score_m = precision_score(deep_intronic_moderate['label'], deep_intronic_moderate['Pangolin_prediction'])
print(f'Pangolin: {pang_pr_score_m}')

sai_pr_score_m = precision_score(deep_intronic_moderate['label'], deep_intronic_moderate['SpliceAI_prediction'])
print(f'SpliceAI: {sai_pr_score_m}')

sq_pr_score_m = precision_score(deep_intronic_moderate['label'], deep_intronic_moderate['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_pr_score_m}')

al_pr_score_m = precision_score(deep_intronic_moderate['label'], deep_intronic_moderate['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_pr_score_m}')

Precision

Pathogenic variants
Pangolin: 0.3132530120481928
SpliceAI: 0.2532467532467532
SQUIRLS: 0.2222222222222222
AlphaGenome: 0.0

Moderate variants
Pangolin: 0.08064516129032258
SpliceAI: 0.07258064516129033
SQUIRLS: 0.06666666666666667
AlphaGenome: 0.0


#### Recall

In [27]:
print('Recall')
print('')
print('Pathogenic variants')

pang_rec_score_p = recall_score(deep_intronic['label'], deep_intronic['Pangolin_prediction'])
print(f'Pangolin: {pang_rec_score_p}')

sai_rec_score_p = recall_score(deep_intronic['label'], deep_intronic['SpliceAI_prediction'])
print(f'SpliceAI: {sai_rec_score_p}')

sq_rec_score_p = recall_score(deep_intronic['label'], deep_intronic['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_rec_score_p}')

al_rec_score_p = recall_score(deep_intronic['label'], deep_intronic['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_rec_score_p}')

print('')

print('Moderate variants')

pang_rec_score_m = recall_score(deep_intronic_moderate['label'], deep_intronic_moderate['Pangolin_prediction'])
print(f'Pangolin: {pang_rec_score_m}')

sai_rec_score_m = recall_score(deep_intronic_moderate['label'], deep_intronic_moderate['SpliceAI_prediction'])
print(f'SpliceAI: {sai_rec_score_m}')

sq_rec_score_m = recall_score(deep_intronic_moderate['label'], deep_intronic_moderate['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_rec_score_m}')

al_rec_score_m = recall_score(deep_intronic_moderate['label'], deep_intronic_moderate['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_rec_score_m}')

Recall

Pathogenic variants
Pangolin: 0.18571428571428572
SpliceAI: 0.2785714285714286
SQUIRLS: 0.05714285714285714
AlphaGenome: 0.0

Moderate variants
Pangolin: 0.11363636363636363
SpliceAI: 0.20454545454545456
SQUIRLS: 0.045454545454545456
AlphaGenome: 0.0


#### F1 score

In [28]:
print('F1 score')
print('')
print('Pathogenic variants')

pang_f1_score_p = f1_score(deep_intronic['label'], deep_intronic['Pangolin_prediction'])
print(f'Pangolin: {pang_f1_score_p}')

sai_f1_score_p = f1_score(deep_intronic['label'], deep_intronic['SpliceAI_prediction'])
print(f'SpliceAI: {sai_f1_score_p}')

squi_f1_score_p = f1_score(deep_intronic['label'], deep_intronic['SQUIRLS_prediction'])
print(f'SQUIRLS: {squi_f1_score_p}')

al_f1_score_p = f1_score(deep_intronic['label'], deep_intronic['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_f1_score_p}')

print('')

print('Moderate variants')

pang_f1_score_m = f1_score(deep_intronic_moderate['label'], deep_intronic_moderate['Pangolin_prediction'])
print(f'Pangolin: {pang_f1_score_m}')

sai_f1_score_m = f1_score(deep_intronic_moderate['label'], deep_intronic_moderate['SpliceAI_prediction'])
print(f'SpliceAI: {sai_f1_score_m}')

squi_f1_score_m = f1_score(deep_intronic_moderate['label'], deep_intronic_moderate['SQUIRLS_prediction'])
print(f'SQUIRLS: {squi_f1_score_m}')

al_f1_score_m = f1_score(deep_intronic_moderate['label'], deep_intronic_moderate['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_f1_score_m}')

F1 score

Pathogenic variants
Pangolin: 0.23318385650224216
SpliceAI: 0.2653061224489796
SQUIRLS: 0.09090909090909091
AlphaGenome: 0.0

Moderate variants
Pangolin: 0.09433962264150944
SpliceAI: 0.10714285714285714
SQUIRLS: 0.05405405405405406
AlphaGenome: 0.0


## Periexonic

#### Precision

In [29]:
print('Precision')
print('')
print('Pathogenic variants')

pang_pr_score_p = precision_score(periexonic['label'], periexonic['Pangolin_prediction'])
print(f'Pangolin: {pang_pr_score_p}')

sai_pr_score_p = precision_score(periexonic['label'], periexonic['SpliceAI_prediction'])
print(f'SpliceAI: {sai_pr_score_p}')

sq_pr_score_p = precision_score(periexonic['label'], periexonic['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_pr_score_p}')

al_pr_score_p = precision_score(periexonic['label'], periexonic['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_pr_score_p}')

print('')

print('Moderate variants')

pang_pr_score_m = precision_score(periexonic_moderate['label'], periexonic_moderate['Pangolin_prediction'])
print(f'Pangolin: {pang_pr_score_m}')

sai_pr_score_m = precision_score(periexonic_moderate['label'], periexonic_moderate['SpliceAI_prediction'])
print(f'SpliceAI: {sai_pr_score_m}')

squi_pr_score_m = precision_score(periexonic_moderate['label'], periexonic_moderate['SQUIRLS_prediction'])
print(f'SQUIRLS: {squi_pr_score_m}')

al_pr_score_m = precision_score(periexonic_moderate['label'], periexonic_moderate['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_pr_score_m}')

Precision

Pathogenic variants
Pangolin: 0.2954070981210856
SpliceAI: 0.2712022367194781
SQUIRLS: 0.26547085201793724
AlphaGenome: 0.5379746835443038

Moderate variants
Pangolin: 0.1837968561064087
SpliceAI: 0.1689691817215728
SQUIRLS: 0.15305067218200621
AlphaGenome: 0.0


#### Recall

In [30]:
print('Recall')
print('')
print('Pathogenic variants')

pang_rec_score_p = recall_score(periexonic['label'], periexonic['Pangolin_prediction'])
print(f'Pangolin: {pang_rec_score_p}')

sai_rec_score_p = recall_score(periexonic['label'], periexonic['SpliceAI_prediction'])
print(f'SpliceAI: {sai_rec_score_p}')

squi_rec_score_p = recall_score(periexonic['label'], periexonic['SQUIRLS_prediction'])
print(f'SQUIRLS: {squi_rec_score_p}')

al_rec_score_p = recall_score(periexonic['label'], periexonic['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_rec_score_p}')

print('')

print('Moderate variants')

pang_rec_score_m = recall_score(periexonic_moderate['label'], periexonic_moderate['Pangolin_prediction'])
print(f'Pangolin: {pang_rec_score_m}')

sai_rec_score_m = recall_score(periexonic_moderate['label'], periexonic_moderate['SpliceAI_prediction'])
print(f'SpliceAI: {sai_rec_score_m}')

squi_rec_score_m = recall_score(periexonic_moderate['label'], periexonic_moderate['SQUIRLS_prediction'])
print(f'SQUIRLS: {squi_rec_score_m}')

al_rec_score_m = recall_score(periexonic_moderate['label'], periexonic_moderate['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_rec_score_m}')

Recall

Pathogenic variants
Pangolin: 0.8179190751445087
SpliceAI: 0.8410404624277457
SQUIRLS: 0.8554913294797688
AlphaGenome: 0.24566473988439305

Moderate variants
Pangolin: 0.5891472868217055
SpliceAI: 0.6162790697674418
SQUIRLS: 0.5736434108527132
AlphaGenome: 0.0


#### F1 score

In [31]:
print('F1 score')
print('')
print('Pathogenic variants')

pang_f1_score_p = f1_score(periexonic['label'], periexonic['Pangolin_prediction'])
print(f'Pangolin: {pang_f1_score_p}')

sai_f1_score_p = f1_score(periexonic['label'], periexonic['SpliceAI_prediction'])
print(f'SpliceAI: {sai_f1_score_p}')

sq_f1_score_p = f1_score(periexonic['label'], periexonic['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_f1_score_p}')

al_f1_score_p = f1_score(periexonic['label'], periexonic['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_f1_score_p}')

print('')

print('Moderate variants')

pang_f1_score_m = f1_score(periexonic_moderate['label'], periexonic_moderate['Pangolin_prediction'])
print(f'Pangolin: {pang_f1_score_m}')

sai_f1_score_m = f1_score(periexonic_moderate['label'], periexonic_moderate['SpliceAI_prediction'])
print(f'SpliceAI: {sai_f1_score_m}')

sq_f1_score_m = f1_score(periexonic_moderate['label'], periexonic_moderate['SQUIRLS_prediction'])
print(f'SQUIRLS: {sq_f1_score_m}')

al_f1_score_m = f1_score(periexonic_moderate['label'], periexonic_moderate['AlphaGenome_prediction'])
print(f'AlphaGenome: {al_f1_score_m}')

F1 score

Pathogenic variants
Pangolin: 0.4340490797546012
SpliceAI: 0.41014799154334036
SQUIRLS: 0.405201916495551
AlphaGenome: 0.3373015873015873

Moderate variants
Pangolin: 0.280184331797235
SpliceAI: 0.2652210175145955
SQUIRLS: 0.2416326530612245
AlphaGenome: 0.0


# Result table for F1 Score

| Tool          | Deep Intronic |            | Periexonic |            |
|----------------|---------------|------------|------------|------------|
|       | Pathogenic    | Moderate   | Pathogenic | Moderate   |
|----------------|---------------|------------|------------|------------|
| Pangolin       | 0.233       | 0.094      | 0.434     | 0.280      |
| SpliceAI      | 0.265       | 0.107      | 0.410    | 0.265     |
| SQUIRLS       | 0.091        | 0.054      | 0.405      | 0.242     |
| AlphaGenome   | 0.000        | 0.000      | 0.337      | 0.000      |
