In [6]:
import pandas as pd
from SPARQL_parser import SPARQL
from multiprocesspandas import applyparallel
import re

def is_correct_SPARQL_query(query):
    query = re.sub(r"PREFIX \w+:.*\n", "", query)
    
    try:
        SPARQL(query)
    except:
        return False
    return True

def is_correct_SPARQL_query_2(x):
    from SPARQL_parser import SPARQL
    import re
    
    try:
        SPARQL(re.sub(r"PREFIX \w+:.*\n", "", x['query']))
    except:
        return False
    return True

def print_x(x):
    return x['query']

def evaluate_SPARQL_classifier(df: pd.DataFrame) -> None:
    df['correct_SPARQL_query'] = df.apply_parallel(is_correct_SPARQL_query_2, axis=0, n_chunks=None)
    df['correct_execution'] = df.apply(lambda x: x['execution'].startswith(("[{'",'[]','{')) if isinstance(x['execution'], str) else False, axis=1)

    both_true = df.loc[df['correct_SPARQL_query'] & df['correct_execution']]
    first_false = df.loc[~df['correct_SPARQL_query'] & df['correct_execution']]
    first_true = df.loc[df['correct_SPARQL_query'] & ~df['correct_execution']]
    both_false = df.loc[~df['correct_SPARQL_query'] & ~df['correct_execution']]

    n_true_positives = len(both_true)
    n_false_positives = len(first_true)
    n_false_negatives = len(first_false)
    n_true_negatives = len(both_false)
    n_total = len(df)
    print(f"both true: {n_true_positives}, first true: {n_false_positives}, first false: {n_false_negatives}, both false: {n_true_negatives}, total: {n_total}")

    acc = (n_true_negatives + n_true_positives) / (n_total) if n_total > 0 else 0
    prec = (n_true_positives) / (n_true_positives + n_false_positives) if n_true_positives + n_false_positives > 0 else 0
    recall = (n_true_positives) / (n_true_positives + n_false_negatives) if n_true_positives + n_false_negatives > 0 else 0
    f1score = 2*(prec*recall)/(prec + recall) if prec + recall > 0 else 0

    print(f"accuracy: {acc:.4%}, precision: {prec:.4%}, recall: {recall:.4%}, f1score: {f1score:.4%}")

In [7]:
path_to_executed_dataset = "../outputs/final_queries_v1.4_full_execution_limit_10.parquet.gzip"
df = pd.read_parquet(path_to_executed_dataset, engine="fastparquet")

df_timeout = df.loc[df['execution'] == 'timeout'].copy()
df_fail = df.loc[df['execution'].str.startswith('exception')]
df_empty = df.drop(df_timeout.index).drop(df_fail.index).loc[df['execution'].map(len) == 0]
df_successful = df.drop(df_timeout.index).drop(df_fail.index).drop(df_empty.index)

In [8]:
evaluate_SPARQL_classifier(df_successful)
evaluate_SPARQL_classifier(df.drop(df_timeout.index))
evaluate_SPARQL_classifier(df_timeout)

100%|██████████| 2549/2549 [00:02<00:00, 868.06it/s] 


both true: 2335, first true: 0, first false: 214, both false: 0, total: 2549
accuracy: 91.6046%, precision: 100.0000%, recall: 91.6046%, f1score: 95.6183%


100%|██████████| 2612/2612 [00:04<00:00, 644.23it/s]


both true: 2335, first true: 55, first false: 214, both false: 8, total: 2612
accuracy: 89.7014%, precision: 97.6987%, recall: 91.6046%, f1score: 94.5536%


100%|██████████| 230/230 [00:00<00:00, 899.20it/s]


both true: 0, first true: 214, first false: 0, both false: 16, total: 230
accuracy: 6.9565%, precision: 0.0000%, recall: 0.0000%, f1score: 0.0000%
