In [1]:
import warnings
warnings.filterwarnings("ignore")
import Entities_Mapper as em
import json
import PredicateMapper as pm
import re

In [2]:
class Mapping:
    def __init__(self, input_path, verbs_map_path, embd_verbs_path, output_path):
        self.input_path = input_path
        self.output_path = output_path
        self.verbs_map_path = verbs_map_path
        self.embd_verbs_path = embd_verbs_path
        self.mapping_result = [] ## store the result



    def is_passive(self, s , p , o ):
        # Unpack the triple into subject, predicate, and object
        subject = s
        predicate = p
        obj = o
        
        # Define patterns for identifying passive voice
        passive_patterns = [
            # re.compile(r"\b(?:am|is|are|was|were|been|being)\b", re.IGNORECASE),
            re.compile(r"\b(?:-by)\b", re.IGNORECASE),
        ]
        
        # Check if any passive voice pattern is present in the predicate
        is_passive = any(pattern.search(predicate) for pattern in passive_patterns)
        
        # Determine the voice based on the analysis
        if is_passive:
            return True
        else:
            return False

    def convert_to_active(self,s,p,o):
      # Unpack the triple into subject, predicate, and object
        subject = s
        predicate = p
        obj = o
        
        # subject, predicate, obj = passive_triple
        predicate_without_by = re.sub(r"\b(?:-by)\b", "", predicate, flags=re.IGNORECASE).strip()
        
        active_triple = (obj,predicate_without_by, subject)
        return active_triple

    def apply(self):
        entities_mapper = em.EntitiesMapper(self.input_path, self.output_path )
        entities_mapper.run()
        predicate_mapper = pm.PredicateMapper(self.input_path, self.verbs_map_path,  self.embd_verbs_path)
        # predicate_mapper.predicate_mapping(0.7)
        for element in entities_mapper.input_triples:
            subject = element["subject"]
            predicate = element["predicate"]
            object = element["object"]
            # if predicate in predicate_mapper.mapped_predicate.keys():
            predicate = predicate_mapper.direct_mapping(predicate)
            if subject in entities_mapper.label2cskg_entity.keys():
                subject = entities_mapper.label2cskg_entity[subject]
            if object in entities_mapper.label2cskg_entity.keys():
                 object = entities_mapper.label2cskg_entity[object]

            ## verify if active or passive and convert to active
            if self.is_passive(subject, predicate, object):
                subject, predicate, object = self.convert_to_active(subject, predicate, object)
            ## lemma of predicates
            # predicate = self.lemmatize_predicate(predicate)
            
            self.mapping_result.append(
                {
                        'sentence': element['sentence'],
                        'subject': subject,
                        'predicate': predicate,
                        'object': object,
                        'confidence': element['confidence'],
                    'first_validation': element['first_validation']
                        
                    }
                
            )
            self.handle_duplicates()

    
    
    ### avec la semilarité des predicats !!!
    def apply_sim_pred(self):
        entities_mapper = em.EntitiesMapper(self.input_path, self.output_path )
        entities_mapper.run()
        predicate_mapper = pm.PredicateMapper(self.input_path, self.verbs_map_path,  self.embd_verbs_path)
        predicate_mapper.similarity_predicate_mapping(0.7)
        for element in entities_mapper.input_triples:
            subject = element["subject"]
            predicate = element["predicate"]
            object = element["object"]
            if predicate in predicate_mapper.mapped_predicate.keys():
                predicate = predicate_mapper.mapped_predicate[predicate]
            if subject in entities_mapper.label2cskg_entity.keys():
                subject = entities_mapper.label2cskg_entity[subject]
            if object in entities_mapper.label2cskg_entity.keys():
                 object = entities_mapper.label2cskg_entity[object]

            ## verify if active or passive and convert to active
            if self.is_passive(subject, predicate, object):
                subject, predicate, object = self.convert_to_active(subject, predicate, object)
            ## lemma of predicates
            # predicate = self.lemmatize_predicate(predicate)
            
            self.mapping_result.append(
                {
                        'sentence': element['sentence'],
                        'subject': subject,
                        'predicate': predicate,
                        'object': object,
                        'confidence': element['confidence']
                        
                    }
                
            )
        self.handle_duplicates()
            
    def handle_duplicates(self):
        # Create a dictionary to store unique sentences with their confidence
        unique_sentences = {}
    
        # Iterate through each element in the JSON
        for item in self.mapping_result:
            sentence_key = (item['subject'], item['predicate'], item['object'])
            sentence_value = item['sentence']
            confidence = item['confidence']
    
            # Check if the sentence already exists in the dictionary
            if sentence_key in unique_sentences:
                # Check if the sentence is identical
                if unique_sentences[sentence_key]['sentence'] != sentence_value:
                    # Merge sentences by adding a semicolon
                    unique_sentences[sentence_key]['sentence'] += ";" + sentence_value
            else:
                unique_sentences[sentence_key] = {'sentence': sentence_value, 'confidence': confidence}
    
        # Convert the dictionary into a list of dictionaries for saving
        unique_data = [{'subject': key[0], 'predicate': key[1], 'object': key[2], 'sentence': value['sentence'], 'confidence': value['confidence']} for key, value in unique_sentences.items()]
        self.mapping_result = unique_data
        # return unique_data
   
    def write_to_json(self):
        with open(self.output_path, 'w', encoding='utf-8') as jsonfile:
            json.dump(self.mapping_result, jsonfile, ensure_ascii=False, indent=2)
    def save_results(self):
        pass

In [3]:
input_path = "C:/Users/admin-user/Desktop/my_phd/implementations_KG/src/post_processing/syntactic_cleaning/Bench_merged_triples.json"
output_path = "C:/Users/admin-user/Desktop/my_phd/implementations_KG/src/post_processing/mapping/Bench_mapped_triples.json"
verbs_map_path = "C:/Users/admin-user/Desktop/my_phd/implementations_KG/resources/CSKG_VerbNet_verb_map.csv"
embd_path = 'C:/Users/admin-user/Desktop/my_phd/implementations_KG/resources/predicate_embeddings.json'


mapping = Mapping(input_path,verbs_map_path, embd_path ,output_path)
mapping.apply()
mapping.write_to_json()

get Entities and pairs
	>> Entities to be mapped: 189
- 	 >> Mapping with wikidata started
	 >> Wikidata Processed 100 entities in 52.27 secs.
> Mapped to Wikidata: 46
-------------------------------------------


In [None]:
mapping.mapping_result

In [None]:
len(mapping.mapping_result)

In [None]:
predicate_mapper = pm.PredicateMapper(input_path, verbs_map_path,  embd_path)

In [None]:
predicate_mapper.direct_mapping("be use")