## Prepare example SHACL profiles from JSON API

In [1]:
import requests
import pprint
import pandas as pd
import json
import re

### Récupérer la liste des id des profils d'un projet

In [2]:
def replace_and_remove(input_string):
    """
    Replaces spaces with dashes and removes commas and parentheses from a string.
    
    Parameters:
    input_string (str): The input string to be processed.
    
    Returns:
    str: The processed string.
    """
    # Replace spaces with dashes
    # and remove commas and parentheses
    output_string = re.sub(r'[(),.]', '', input_string)  # Remove commas, dots and parentheses
    output_string = re.sub(r' - ', '-', output_string)  # Replace by dash without spaces
    output_string = re.sub(r' ongoing', '', output_string)  # Remove ' ongoing'
    output_string = re.sub(r'\s', '-', output_string)  # Replace spaces with dashes
    output_string = output_string.title()   # All first letters as capitals
    
    return output_string

In [3]:
project_number = 201  #'6'
project_url = "https://ontome.net/api/profiles.json?lang=en&selected-by-project="+ str(project_number)

In [4]:
try:
    response = requests.get(project_url)
    json_profiles = response.json()
except Exception as e:
    print(e)


json_profiles_list = [[p['profileID'], replace_and_remove(p['profileLabel'])] for p in json_profiles]
print(len(json_profiles_list))
print(json_profiles_list[-10:])

21
[[214, 'Person-Social-Or-Professional-Position'], [216, 'Person-Religious-Affiliation'], [218, 'Person-Legal-Fact'], [220, 'Person-Legal-Quality-Acquisition'], [222, 'Person-Study'], [224, 'Person-Study-Title-Acquisition'], [226, 'Person-Academic-Position'], [228, 'Person-Teaching'], [252, 'Person-Origin'], [262, 'Person-Classification']]


In [5]:
def get_project_profiles_list(id_project):
    project_url = "https://ontome.net/api/profiles.json?lang=en&selected-by-project="+str(id_project)
    
    try:
        response = requests.get(project_url)
        json_profiles = response.json()
    except Exception as e:
        print(e)
    # créer la liste des profils
    json_profiles_list = [[p['profileID'], replace_and_remove(p['profileLabel'])] for p in json_profiles]

    return json_profiles_list


In [6]:
id_project = 201 # 6
json_profiles_list = get_project_profiles_list(id_project)


print(len(json_profiles_list))
print(json_profiles_list[:3])

21
[[16, 'Interactions-Social-Relationships-And-Memberships-Of-Persons'], [30, 'Social-And-Legal-Qualities-Of-A-Person'], [172, 'Person-Gender-Light']]


## Fonction unique

In [12]:
def write_profile_shapes_for_project(id_project, write_folder):

    json_profiles_list = get_project_profiles_list(id_project)
    # json_profiles_list = [5, 'Geovistory-Basics'], [8, 'Maritime-history'], [12, 'Biographical-basics-and-family']
    # print(json_profiles_list[:20])


    for profile in json_profiles_list:
 
        profile_id = profile[0]
        profile_label = profile[1]


        # defines the profile classes URL
        
        profile_classes_url = 'https://ontome.net/api/classes-profile.json?lang=en&available-in-profile=' + str(profile_id)

        # gets the JSON values for classes in this profile
        try:
            response = requests.get(profile_classes_url)
            json_classes = response.json()
        except Exception as e:
            print(e)

        # get class prefixes
        lcp = []
        for cla in json_classes:
            lcp.append('@prefix ' + cla["namespacePrefix"] + ': <' + cla["namespaceURI"] + '>.')
        classes_prefixes = list(set(lcp))


        # classes dataframe
        lc = []
        for cla in json_classes:
            lc.append([cla['namespacePrefix'] , cla["namespaceURI"], cla["classIdentifierInNamespace"], 
                    cla['classID'], cla["classLabel"], cla["entityBasicType"], cla["ancestorClasses"]])

        df_lc = pd.DataFrame(lc)
        df_lc.columns = ['pref', 'ns_uri', 'uri', 'id', 'label', 'type', 'ancestors']



        ## defines the profile properties URL

        # gets the JSON values for properties in this profile
        profile_properties_url = 'https://ontome.net/api/properties-profile.json?lang=en&available-in-profile=' + str(profile_id)

        try:
            response = requests.get(profile_properties_url)
            json_properties = response.json()
        except Exception as e:
            print(e)

        # get properties prefixes
        lpp = []
        for prop in json_properties:
            lpp.append('@prefix ' + prop["namespacePrefix"] + ': <' + prop["namespaceURI"] + '>.')
        properties_prefixes = list(set(lpp))

        ### Namespaces list
        ns_list = list(set(classes_prefixes + properties_prefixes))
        ns_list_txt = '\n'.join(ns_list)
    

        # properties dataframe
        lp = []
        for prop in json_properties:
            lp.append([ prop["propertyDomain"], prop["domainInstancesMinQuantifier"], prop["domainInstancesMaxQuantifier"], 
                    prop['namespacePrefix'], prop['namespaceURI'], 
                    prop["propertyIdentifierInNamespace"], prop['propertyLabel'], 
                    prop["propertyRange"], prop["rangeInstancesMinQuantifier"],prop["rangeInstancesMaxQuantifier"]])

        df_lp = pd.DataFrame(lp)
        df_lp.columns = ['id_s', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri', 'label', 'id_t', 'min_t', 'max_t']



        ## enrich properties

        # get domain classes
        df_ms = df_lp.merge(df_lc[['pref', 'uri', 'id']], left_on='id_s', right_on='id', suffixes=['_ps', '_so'] )
        # get range classes
        df_mt = df_ms.merge(df_lc[['pref', 'uri', 'id']], left_on='id_t', right_on='id', suffixes=['_pt', '_ta'] )
        # new properties dataframe
        # new properties list
        p_df = df_mt[['pref_pt', 'uri_so', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri_ps', 'label', 'pref_ta', 'uri', 'min_t', 'max_t']]
        p_df.columns = ['pref_sub', 'uri_sub', 'min_sub', 'max_sub',
                        'ns_prefix', 'ns_pp', 'uri_pp', 'pp_label', 
                        'pref_ob', 'uri_ob', 'min_ob', 'max_ob']


        ### Write a file with the corresponding shacl profile

        shacl_prof = """### SHACL OntoME Profile {} \n\n

@prefix sdh-shacl: <https://sdhss.org/shacl/profiles/>. 
@prefix sh: <http://www.w3.org/ns/shacl#> .  
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: 	<http://www.w3.org/2000/01/rdf-schema#> .  
@prefix sdh-short: <https://sdhss.org/ontology/shortcuts/>.
        """.format(profile_id) \
+ ns_list_txt

        ## for each class
        for index, row in df_lc.iterrows():


            if row['type'] == 9:
                min_count_label_comment = ''
            else:    
                min_count_label_comment = min_count_label_comment = 'sh:minCount 1;'


            shacl_prof += """ \n
    sdh-shacl:{0}_{1}_Shape a sh:NodeShape ;
        sh:targetClass {0}:{1} ;
        sh:name "{2}" ;

        sh:property [
            sh:path rdfs:label ;
            sh:name "Label";
            sh:datatype xsd:string;
            sh:order 1;
            {3}
            sh:maxCount 1;
            ];

        sh:property [
            sh:path rdfs:comment ;
            sh:name "Description";
            sh:datatype rdf:HTML;
            sh:order 2;
            {3}
            ];  
        """.format(row['pref'], row['uri'], row['label'], 
                    min_count_label_comment)


            ## outgoing properties
            for index, row_p in p_df.iterrows():
                    
                if row_p['max_ob'] == 1:
                        max_count = 'sh:maxCount 1 ;'
                else:
                        max_count = ''    

                ## Inner join property with current class
                if (row_p['pref_sub'] == row['pref'] 
                        and row_p['uri_sub'] == row['uri']) :
                        
                    pl = """    
        sh:property [
                sh:path {0}:{1} ;
                sh:name "{2}";
                sh:class {3}:{4} ;
                {5}
                ];
                    """.format(row_p['ns_prefix'], row_p['uri_pp'], 
                            row_p['pp_label'], row_p['pref_ob'], row_p['uri_ob'], max_count)
                        
                    shacl_prof += pl
            


            ## dates of Temporal Entities and specific classes (Person)
            if row['type'] == 9:
                temporal_properties_standard_time = """
        sh:property [
            sh:path sdh-short:P1 ;
            sh:name "at some time within";
            sh:datatype xsd:string;
            sh:order 3;
            sh:maxCount 1;
            ];

        sh:property [
            sh:path sdh-short:P4 ;
            sh:name "begins on";
            sh:datatype xsd:string;
            sh:order 4;
            sh:maxCount 1;
            ];

        sh:property [
            sh:path sdh-short:P7 ;
            sh:name "ends on";
            sh:datatype xsd:string;
            sh:order 5;
            sh:maxCount 1;
            ];

        sh:property [
            sh:path sdh-short:P3 ;
            sh:name "begins after";
            sh:datatype xsd:string;
            sh:order 6;
            sh:maxCount 1;
            ];

        sh:property [
            sh:path sdh-short:P5 ;
            sh:name "begins before";
            sh:datatype xsd:string;
            sh:order 7;
            sh:maxCount 1;
            ];

        sh:property [
            sh:path sdh-short:P6 ;
            sh:name "ends after";
            sh:datatype xsd:string;
            sh:order 8;
            sh:maxCount 1;
            ];

        sh:property [
            sh:path sdh-short:P8 ;
            sh:name "ends before";
            sh:datatype xsd:string;
            sh:order 9;
            sh:maxCount 1;
            ];
            """            
            else:

                temporal_properties_standard_time = ''

                if row['pref']+':'+ row['uri'] == 'crm:E21':
                        person_birth_date = """
            sh:property [
                sh:path sdh-short:P2 ;
                sh:name "has birth date";
                sh:datatype xsd:string;
                sh:maxCount 1;
                sh:order 3;
                ];
                        """
                else:
                        person_birth_date = ''   



            shacl_prof += """ 
            {0}{1}   
            """.format(temporal_properties_standard_time, 
                    person_birth_date)   


            shacl_prof += '.'


        # print(shacl_prof)

        ## write file            
        file_address = "../{}/{}-{}.ttl".format(write_folder, profile_id, profile_label)
        with open(file_address, "w") as text_file:
            text_file.write(shacl_prof)







In [13]:
write_folder = 'sdhss_shacl_profiles'  # 'geovistory_shacl_profiles'

In [14]:
write_profile_shapes_for_project(id_project, write_folder)

In [None]:
def write_profile_shapes_for_project(id_project):

    json_profiles_list = get_project_profiles_list(id_project)
    # json_profiles_list = [5, 'Geovistory-Basics'], [8, 'Maritime-history'], [12, 'Biographical-basics-and-family']
    # print(json_profiles_list[:20])


    for profile in json_profiles_list:
 
        profile_id = profile[0]
        profile_label = profile[1]


        # defines the profile classes URL
        
        profile_classes_url = 'https://ontome.net/api/classes-profile.json?lang=en&available-in-profile=' + str(profile_id)

        # gets the JSON values for classes in this profile
        try:
            response = requests.get(profile_classes_url)
            json_classes = response.json()
        except Exception as e:
            print(e)

        # get class prefixes
        lcp = []
        for cla in json_classes:
            lcp.append('@prefix ' + cla["namespacePrefix"] + ': <' + cla["namespaceURI"] + '>.')
        classes_prefixes = list(set(lcp))


        # classes dataframe
        lc = []
        for cla in json_classes:
            lc.append([cla['namespacePrefix'] , cla["namespaceURI"], cla["classIdentifierInNamespace"], 
                    cla['classID'], cla["classLabel"], cla["entityBasicType"], cla["ancestorClasses"]])

        df_lc = pd.DataFrame(lc)
        df_lc.columns = ['pref', 'ns_uri', 'uri', 'id', 'label', 'type', 'ancestors']



        ## defines the profile properties URL

        # gets the JSON values for properties in this profile
        profile_properties_url = 'https://ontome.net/api/properties-profile.json?lang=en&available-in-profile=' + str(profile_id)

        try:
            response = requests.get(profile_properties_url)
            json_properties = response.json()
        except Exception as e:
            print(e)

        # get properties prefixes
        lpp = []
        for prop in json_properties:
            lpp.append('@prefix ' + prop["namespacePrefix"] + ': <' + prop["namespaceURI"] + '>.')
        properties_prefixes = list(set(lpp))

        ### Namespaces list
        ns_list = list(set(classes_prefixes + properties_prefixes))
        ns_list_txt = '\n'.join(ns_list)
    

        # properties dataframe
        lp = []
        for prop in json_properties:
            lp.append([ prop["propertyDomain"], prop["domainInstancesMinQuantifier"], prop["domainInstancesMaxQuantifier"], 
                    prop['namespacePrefix'], prop['namespaceURI'], 
                    prop["propertyIdentifierInNamespace"], prop['propertyLabel'], 
                    prop["propertyRange"], prop["rangeInstancesMinQuantifier"],prop["rangeInstancesMaxQuantifier"]])

        df_lp = pd.DataFrame(lp)
        df_lp.columns = ['id_s', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri', 'label', 'id_t', 'min_t', 'max_t']



        ## enrich properties

        # get domain classes
        df_ms = df_lp.merge(df_lc[['pref', 'uri', 'id']], left_on='id_s', right_on='id', suffixes=['_ps', '_so'] )
        # get range classes
        df_mt = df_ms.merge(df_lc[['pref', 'uri', 'id']], left_on='id_t', right_on='id', suffixes=['_pt', '_ta'] )
        # new properties dataframe
        # new properties list
        p_df = df_mt[['pref_pt', 'uri_so', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri_ps', 'label', 'pref_ta', 'uri', 'min_t', 'max_t']]
        p_df.columns = ['pref_sub', 'uri_sub', 'min_sub', 'max_sub',
                        'ns_prefix', 'ns_pp', 'uri_pp', 'pp_label', 
                        'pref_ob', 'uri_ob', 'min_ob', 'max_ob']


        ### Write a file with the corresponding shacl profile

        shacl_prof = """### SHACL OntoME Profile {} \n\n

@prefix sdh-shacl: <https://sdhss.org/shacl/profiles/>. 
@prefix sh: <http://www.w3.org/ns/shacl#> .  
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: 	<http://www.w3.org/2000/01/rdf-schema#> .  
@prefix sdh-short: <https://sdhss.org/ontology/shortcuts/>.
        """.format(profile_id) \
+ ns_list_txt

        ## for each class
        for index, row in df_lc.iterrows():


            if row['type'] == 9:
                min_count_label_comment = ''
            else:    
                min_count_label_comment = min_count_label_comment = 'sh:minCount 1;'


            shacl_prof += """ \n
    sdh-shacl:{0}_{1}_Shape a sh:NodeShape ;
        sh:targetClass {0}:{1} ;
        sh:name "{2}" ;

        sh:property [
            sh:path rdfs:label ;
            sh:name "Label";
            sh:datatype xsd:string;
            sh:order 1;
            {3}
            sh:maxCount 1;
            ];

        sh:property [
            sh:path rdfs:comment ;
            sh:name "Description";
            sh:datatype rdf:HTML;
            sh:order 2;
            {3}
            ];  
        """.format(row['pref'], row['uri'], row['label'], 
                    min_count_label_comment)


            ## outgoing properties
            for index, row_p in p_df.iterrows():
                    
                if row_p['max_ob'] == 1:
                        max_count = 'sh:maxCount 1 ;'
                else:
                        max_count = ''    

                ## Inner join property with current class
                if (row_p['pref_sub'] == row['pref'] 
                        and row_p['uri_sub'] == row['uri']) :
                        
                    pl = """    
        sh:property [
                sh:path {0}:{1} ;
                sh:name "{2}";
                sh:class {3}:{4} ;
                {5}
                ];
                    """.format(row_p['ns_prefix'], row_p['uri_pp'], 
                            row_p['pp_label'], row_p['pref_ob'], row_p['uri_ob'], max_count)
                        
                    shacl_prof += pl
            


            ## dates of Temporal Entities and specific classes (Person)
            if row['type'] == 9:
                temporal_properties_standard_time = """
        sh:property [
            sh:path sdh-short:P1 ;
            sh:name "at some time within";
            sh:datatype xsd:string;
            sh:order 3;
            sh:maxCount 1;
            ];
                """
                temporal_properties_begins_on = """
        sh:property [
            sh:path sdh-short:P4 ;
            sh:name "begins on";
            sh:datatype xsd:string;
            sh:order 4;
            sh:maxCount 1;
            ];
            """
                temporal_properties_ends_on = """
        sh:property [
            sh:path sdh-short:P7 ;
            sh:name "ends on";
            sh:datatype xsd:string;
            sh:order 5;
            sh:maxCount 1;
            ];
            """
                temporal_properties_begins_after = """
        sh:property [
            sh:path sdh-short:P3 ;
            sh:name "begins after";
            sh:datatype xsd:string;
            sh:order 6;
            sh:maxCount 1;
            ];
            """
                temporal_properties_begins_before = """
        sh:property [
            sh:path sdh-short:P5 ;
            sh:name "begins before";
            sh:datatype xsd:string;
            sh:order 7;
            sh:maxCount 1;
            ];
            """
                temporal_properties_ends_after = """
        sh:property [
            sh:path sdh-short:P6 ;
            sh:name "ends after";
            sh:datatype xsd:string;
            sh:order 8;
            sh:maxCount 1;
            ];
            """
                temporal_properties_ends_before = """
        sh:property [
            sh:path sdh-short:P8 ;
            sh:name "ends before";
            sh:datatype xsd:string;
            sh:order 9;
            sh:maxCount 1;
            ];
            """            
            else:
                temporal_properties_standard_time = ''
                temporal_properties_begins_on = ''
                temporal_properties_ends_on = ''
                temporal_properties_begins_after = ''
                temporal_properties_begins_before = '' 
                temporal_properties_ends_after = ''
                temporal_properties_ends_before = ''




            if row['pref']+':'+ row['uri'] == 'crm:E21':
                    person_birth_date = """
        sh:property [
            sh:path sdh-short:P2 ;
            sh:name "has birth date";
            sh:datatype xsd:string;
            sh:maxCount 1;
            sh:order 3;
            ];
                    """
            else:
                    person_birth_date = ''   



            shacl_prof += """ 
            {4}{5}{6}{7}{8}{9}{10}{11}   
            """.format(person_birth_date,
                    temporal_properties_standard_time, 
                    temporal_properties_begins_on, temporal_properties_ends_on,
                    temporal_properties_begins_after,temporal_properties_begins_before, 
                    temporal_properties_ends_after,temporal_properties_ends_before
                    )   


            shacl_prof += '.'


        # print(shacl_prof)

        ## write file            
        file_address = "../geovistory_shacl_profiles/{}-{}.ttl".format(profile_id, profile_label)
        with open(file_address, "w") as text_file:
            text_file.write(shacl_prof)





