In [1]:
import pandas as pd
import zipfile
import os
from tqdm import tqdm

In [2]:
def train(trace_id):
    train_prop = f'PATH_TRAINING = MSCallGraph_traces/train/{trace_id}_transductive_train.tsv\nPATH_OUTPUT   = rules\nSNAPSHOTS_AT = 10\nWORKER_THREADS = 7'
    f = open("train_prop.txt", "w")
    f.write(train_prop)
    f.close()
    !java -Xmx1G -cp AnyBURL-23-1.jar de.unima.ki.anyburl.Learn config-learn.properties train_prop.txt

In [3]:
def predict(trace_id):
    predict_prop = f'PATH_TRAINING = MSCallGraph_traces/train/{trace_id}_transductive_train.tsv\nPATH_VALID    = MSCallGraph_traces/test/{trace_id}_transductive_test.tsv\nPATH_TEST     = MSCallGraph_traces/test/{trace_id}_transductive_test.tsv\nPATH_RULES    = rules-10\nPATH_OUTPUT   = preds-10\nWORKER_THREADS = 7\nTOP_K_OUTPUT = 100'
    f = open("predict_prop.txt", "w")
    f.write(predict_prop)
    f.close()
    !java -Xmx1G -cp AnyBURL-23-1.jar de.unima.ki.anyburl.Apply predict_prop.txt

In [4]:
def test(trace_id):
    eval_prop = f'PATH_TRAINING = MSCallGraph_traces/train/{trace_id}_transductive_train.tsv\nPATH_VALID    = MSCallGraph_traces/test/{trace_id}_transductive_test.tsv\nPATH_TEST     = MSCallGraph_traces/test/{trace_id}_transductive_test.tsv\nPATH_PREDICTIONS = preds-10\nTOP_K = 100'
    f = open("eval_prop.txt", "w")
    f.write(eval_prop)
    f.close()
    to_return = !java -Xmx1G -cp AnyBURL-23-1.jar de.unima.ki.anyburl.Eval eval_prop.txt
    return to_return[:-1]

In [5]:
def test_metrics(traceid):
    train(traceid)
    predict(traceid)
    return(test(traceid))

In [6]:
def test_on_traces(model_name):
    directory = model_name+"_testing_traces"

    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f'Directory {directory} created successfully!')
    else:
        print(f'Directory {directory} already exists.')
        


    # Specifica il percorso della cartella da cui si vogliono ottenere i nomi dei file
    folder_path =  "MSCallGraph_traces/train/"

    files_list = []
    for filename in os.listdir(folder_path):
        # Ottenere il nome del file
        file_name = os.path.basename(filename)
        files_list.append(file_name)
    
    files_list = files_list[:10]
    metric_names = ['hits_at_1','hits_at_3','hits_at_10','mrr']
    all_traces_list = []
    # Scansione di ogni file nella cartella
    with tqdm(desc=f'{model_name} testing traces', total=len(files_list)) as progress_bar:
        for file_name in files_list:
            # Stampa il nome del file
            all_traces_list.append(test_metrics(file_name[:-23]))
            progress_bar.update(1)


    mean_trace_df = pd.Dataframe(all_traces_list,columns=metric_names)
    mean_trace_df.to_csv(f"{model_name}_testing_traces/mean_trace_test.csv")
    
    display(mean_trace_df)
    def zip_folder(folder_path, output_path):
        with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, dirs, files in os.walk(folder_path):
                for file in files:
                    zipf.write(os.path.join(root, file))

    folder_path = model_name+"_testing_traces"
    output_path = f'{folder_path}.zip'

    zip_folder(folder_path, output_path)

In [None]:
test_on_traces("anyburl")

Directory anyburl_testing_traces already exists.


anyburl testing traces:   0%|          | 0/10 [00:00<?, ?it/s]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-3 starts to work with L=3 C=Cyclic 
THREAD-2 starts to work with L=1 C=Cyclic 
THREAD-0 starts to work with L=3 C=Cyclic 
THREAD-4 starts to work with L=1 C=Cyclic 
THREAD-6 starts to work with L=3 C=Cyclic 
THREAD-5 starts to work with L=3 C=Cyclic 
THREAD-1 starts to work with L=0 C=Zero 
 000020 | 033120  > 99k 000063 |  > 99k

>>> CREATING SNAPSHOT 0 after 11 seconds

>>> storing rules in file rules-10
>>> stored

anyburl testing traces:  10%|█         | 1/10 [01:56<17:32, 116.92s/it]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-5 starts to work with L=1 C=Cyclic 
THREAD-6 starts to work with L=1 C=Cyclic 
THREAD-4 starts to work with L=1 C=Cyclic 
THREAD-2 starts to work with L=1 C=Cyclic 
THREAD-1 starts to work with L=2 C=Cyclic 
THREAD-0 starts to work with L=3 C=Cyclic 
THREAD-3 starts to work with L=1 C=Acyclic 
  > 99k | 021680 000083 000000 | 000045

>>> CREATING SNAPSHOT 0 after 11 seconds

>>> storing rules in file rules-10
>>> sto

anyburl testing traces:  20%|██        | 2/10 [03:53<15:34, 116.76s/it]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-4 starts to work with L=0 C=Zero 
THREAD-3 starts to work with L=1 C=Acyclic 
THREAD-5 starts to work with L=3 C=Cyclic 
THREAD-1 starts to work with L=3 C=Cyclic 
THREAD-6 starts to work with L=1 C=Acyclic 
THREAD-2 starts to work with L=3 C=Cyclic 
THREAD-0 starts to work with L=2 C=Cyclic 
 000020 |  > 99k 000008 000047 | 000038

>>> CREATING SNAPSHOT 0 after 11 seconds

>>> storing rules in file rules-10
>>> stor

anyburl testing traces:  30%|███       | 3/10 [05:51<13:41, 117.36s/it]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-4 starts to work with L=1 C=Acyclic 
THREAD-6 starts to work with L=3 C=Cyclic 
THREAD-3 starts to work with L=1 C=Cyclic 
THREAD-2 starts to work with L=0 C=Zero 
THREAD-1 starts to work with L=1 C=Acyclic 
THREAD-0 starts to work with L=1 C=Cyclic 
THREAD-5 starts to work with L=0 C=Zero 
 000010 | 035299  > 99k 000146 | 000206
 000000 | 008854 000097 000137 | 000094

>>> CREATING SNAPSHOT 0 after 11 seconds

>>> s

anyburl testing traces:  40%|████      | 4/10 [07:47<11:41, 116.84s/it]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-4 starts to work with L=2 C=Cyclic 
THREAD-5 starts to work with L=1 C=Cyclic 
THREAD-2 starts to work with L=0 C=Zero 
THREAD-6 starts to work with L=3 C=Cyclic 
THREAD-1 starts to work with L=2 C=Cyclic 
THREAD-0 starts to work with L=0 C=Zero 
THREAD-3 starts to work with L=1 C=Cyclic 
 000010 | 035631 000154 000153 |  > 99k
 000010 | 006726 000125 000208 | 000165

>>> CREATING SNAPSHOT 0 after 11 seconds

>>> sto

anyburl testing traces:  50%|█████     | 5/10 [09:43<09:41, 116.37s/it]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-1 starts to work with L=3 C=Cyclic 
THREAD-2 starts to work with L=3 C=Cyclic 
THREAD-4 starts to work with L=1 C=Acyclic 
THREAD-0 starts to work with L=2 C=Cyclic 
THREAD-5 starts to work with L=3 C=Cyclic 
THREAD-3 starts to work with L=0 C=Zero 
THREAD-6 starts to work with L=0 C=Zero 
 000010 |  > 99k 000150 000074 | 000160
 000000 | 024524 000150 000074 | 000074

>>> CREATING SNAPSHOT 0 after 11 seconds

>>> st

anyburl testing traces:  60%|██████    | 6/10 [11:44<07:51, 117.92s/it]

* read 141449 triples
* indexed 10000 triples
* indexed 20000 triples
* indexed 40000 triples
* indexed 80000 triples
* set up index for 18 relations, 40505 head entities, and 40552 tail entities
* set up list structure for randomized access searches uring rule learning ...  done
* precomputing random starting points for each relation/direction for the beam search ... done.
* creating worker thread #0
* creating worker thread #1
* creating worker thread #2
* creating worker thread #3
* creating worker thread #4
* creating worker thread #5
* creating worker thread #6
THREAD-6 starts to work with L=1 C=Acyclic 
THREAD-4 starts to work with L=3 C=Cyclic 
THREAD-5 starts to work with L=2 C=Cyclic 
THREAD-3 starts to work with L=2 C=Cyclic 
THREAD-1 starts to work with L=0 C=Zero 
THREAD-2 starts to work with L=0 C=Zero 
THREAD-0 starts to work with L=1 C=Cyclic 
 000010 | 049010 000076 000140 | 000017
 000010 | 015064 000085 000240 | 000087

>>> CREATING SNAPSHOT 0 after 12 seconds

>>> st

anyburl testing traces:  70%|███████   | 7/10 [13:42<05:54, 118.13s/it]