In [1]:
pip install pslpython

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
# -*- coding: utf-8 -*-
"""
Created on Thu Aug  4 09:49:29 2022

@author: ewanhilton
"""


from classes.DatasetGenerator import DatasetGenerator
from classes.EntityConverter import EntityConverter
from classes.PSLFileBuilder import PSLFileBuilder
from pykeen.datasets import CoDExSmall

#Setting this to True is required to get all files needed for PSL,
#but is very costly
CREATE_FILES = True 

def pre_main():
    dataset = CoDExSmall()
    
    train_triples = dataset.training.mapped_triples.numpy()
    val_triples = dataset.validation.mapped_triples.numpy()
    test_triples = dataset.testing.mapped_triples.numpy()   
    
    generator = DatasetGenerator()
    generator.generate_dataset_file('train.txt','CoDEx',train_triples,dataset)
    generator.generate_dataset_file('valid.txt','CoDEx',val_triples,dataset)
    generator.generate_dataset_file('test.txt','CoDEx',test_triples,dataset)
    
    if CREATE_FILES:
        dataset = CoDExSmall()
        
        train_triples = dataset.training.mapped_triples.numpy()
        val_triples = dataset.validation.mapped_triples.numpy()
        #test_triples = dataset.testing.mapped_triples.numpy()  
    
        entity_converter = EntityConverter(dataset)
        create_files(train_triples,val_triples,entity_converter)
    
    #Create files needed by PSL
def create_files(train_triples, val_triples,entity_converter):    
    filebuilder = PSLFileBuilder(train_triples, val_triples, entity_converter)
    filebuilder.build_map_files()
    filebuilder.build_obs_files()
    filebuilder.build_target_files()
    filebuilder.build_truth_files()

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
# -*- coding: utf-8 -*-
"""
Created on Thu Aug  4 08:04:34 2022

@author: ewanhilton
"""
import os

from pslpython.model import Model
from pslpython.partition import Partition
from pslpython.predicate import Predicate
from pslpython.rule import Rule
from pykeen.datasets import CoDExSmall
from classes.ANYBurlToPSLConverter import ANYBurlToPSLConverter
from classes.RuleImporter import RuleImporter
from classes.EntityConverter import EntityConverter
from classes.DatasetGenerator import DatasetGenerator

MODEL_NAME = 'ANYBurl and PSL Model'

DATA_DIR = os.path.join('data')

ADDITIONAL_PSL_OPTIONS = {
    'log4j.threshold': 'INFO'
}

ADDITIONAL_CLI_OPTIONS = [
    # '--postgres'
]

def main():
    importer = RuleImporter()
    importer.import_rules()
    
    model = Model(MODEL_NAME)
    dataset = CoDExSmall()
    entity_converter = EntityConverter(dataset)
   
    #train_triples = dataset.training.mapped_triples.numpy()
    #val_triples = dataset.validation.mapped_triples.numpy()
    #test_triples = dataset.testing.mapped_triples.numpy()  
    
    generator = DatasetGenerator()
    # Add Predicates
    add_predicates(model,entity_converter,generator)
    
    # Add Rules
    add_rules(model,importer.rules)

    # Inference
    results = infer(model,entity_converter,generator)

    write_results(results, model)
 
def add_predicates(model,entity_converter,generator):    
    for relindex,name in entity_converter.relindex_to_name.items():
        predicate = Predicate(generator.encode_text(name), closed = name == 'member of', size = 2)
        model.add_predicate(predicate)       
"""  
def add_rules(model, rules):
    converter = ANYBurlToPSLConverter(rules)
    for rule in converter.converted_rules:
        if rule.split('->')[1].split('(')[0].replace(' ','') == 'member%20of':
            model.add_rule(Rule(rule))
"""

def add_rules(model, rules):
    converter = ANYBurlToPSLConverter(rules)
    for rule in converter.converted_rules:
        print(rule)
    model.add_rule(Rule("100: employer(X,Y) -> genre(X,Y) ^2"))

def add_data(model,entity_converter,generator):
    for relindex,name in entity_converter.relindex_to_name.items():      
        if os.path.getsize(f"data/obs/{generator.encode_text(name)}_obs.txt") > 0: #Check file has content before adding 
            path = f'data/obs/{generator.encode_text(name)}_obs.txt'
            model.get_predicate(generator.encode_text(name)).add_data_file(Partition.OBSERVATIONS, path)
    
        if os.path.getsize(f"data/targets/{generator.encode_text(name)}_targets.txt") > 0:
            path = f'data/targets/{generator.encode_text(name)}_targets.txt'
            model.get_predicate(generator.encode_text(name)).add_data_file(Partition.TARGETS, path)
    
        if os.path.getsize(f"data/truth/{generator.encode_text(name)}_truth.txt") > 0:
            path = f'data/truth/{generator.encode_text(name)}_truth.txt'
            model.get_predicate(generator.encode_text(name)).add_data_file(Partition.TRUTH, path)
    
def infer(model,entity_converter,generator):
    add_data(model, entity_converter, generator)
    return model.infer(additional_cli_options = ADDITIONAL_CLI_OPTIONS, psl_config = ADDITIONAL_PSL_OPTIONS)
      
def write_results(results, model):
    out_dir = 'inferred-predicates'
    os.makedirs(out_dir, exist_ok = True)

    for predicate in model.get_predicates().values():
        if (predicate.closed()):
            continue

        out_path = os.path.join(out_dir, "%s.txt" % (predicate.name()))
        results[predicate].to_csv(out_path, sep = "\t", header = False, index = False)       

In [3]:
pre_main()

  0%|          | 0/42 [00:00<?, ?it/s]
100%|██████████| 32888/32888 [00:00<00:00, 978911.04it/s]

100%|██████████| 32888/32888 [00:00<00:00, 965157.71it/s]

100%|██████████| 32888/32888 [00:00<00:00, 966131.13it/s]
  7%|▋         | 3/42 [00:00<00:01, 27.63it/s]
100%|██████████| 32888/32888 [00:00<00:00, 935671.25it/s]

100%|██████████| 32888/32888 [00:00<00:00, 987135.18it/s]

100%|██████████| 32888/32888 [00:00<00:00, 947750.69it/s]
 14%|█▍        | 6/42 [00:00<00:01, 27.12it/s]
100%|██████████| 32888/32888 [00:00<00:00, 936840.51it/s]

100%|██████████| 32888/32888 [00:00<00:00, 945723.40it/s]

100%|██████████| 32888/32888 [00:00<00:00, 950185.78it/s]
 21%|██▏       | 9/42 [00:00<00:01, 26.97it/s]
100%|██████████| 32888/32888 [00:00<00:00, 940956.02it/s]

100%|██████████| 32888/32888 [00:00<00:00, 948584.92it/s]

100%|██████████| 32888/32888 [00:00<00:00, 948565.35it/s]
 29%|██▊       | 12/42 [00:00<00:01, 26.91it/s]
100%|██████████| 32888/32888 [00:00<00:00, 938599.88it/s]

100%|████

In [17]:
main()

0.04: occupation(X,A)
 & (X != dance_music) -> genre(X,dance_music) ^2
0.03409090909090909: occupation(X,musician)
 & (X != dance_music) -> genre(X,dance_music) ^2
0.13186813186813187: occupation(X,model)
 & (X != dance_music) -> genre(X,dance_music) ^2
0.03968253968253968: occupation(X,film_actor)
 & (X != dance_music) -> genre(X,dance_music) ^2
0.1111111111111111: occupation(X,rapper)
 & (X != dance_music) -> genre(X,dance_music) ^2
0.046: languages_spoken__written__or_signed(X,English)
 & (X != painter) -> occupation(X,painter) ^2
0.05: languages_spoken__written__or_signed(X,A)
 & (X != painter) -> occupation(X,painter) ^2
0.05714285714285714: languages_spoken__written__or_signed(X,Italian)
 & (X != painter) -> occupation(X,painter) ^2
0.16666666666666666: place_of_death(X,Saint_Petersburg)
 & (X != prose) -> genre(X,prose) ^2
0.02416918429003021: place_of_death(X,A)
 & (X != prose) -> genre(X,prose) ^2
0.08982035928143713: member_of(X,International_Telecommunication_Union)
 & (X !=

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

890037 [pslpython.model PSL] INFO --- 2199 [main] INFO  org.linqs.psl.util.RandUtils  - Using random seed: 46594397
890045 [pslpython.model PSL] INFO --- 2206 [main] INFO  org.linqs.psl.application.inference.InferenceApplication  - Grounding out model.
890057 [pslpython.model PSL] ERROR --- (Unknown PSL logging level) -- Unexpected exception!
890058 [pslpython.model PSL] ERROR --- (Unknown PSL logging level) -- java.lang.RuntimeException: Error thrown while constructing org.linqs.psl.application.inference.mpe.ADMMInference
890059 [pslpython.model PSL] ERROR --- (Unknown PSL logging level) -- 	at org.linqs.psl.application.inference.InferenceApplication.getInferenceApplica

ModelError: PSL returned a non-zero exit status: 1.

In [None]:
print("hello")