## Prepare example SHACL profiles from JSON API

In [1]:
import requests
import pprint
import pandas as pd
import json

### Récupérer la liste des id des profils d'un projet

In [2]:
project_number = '6'
project_url = "https://ontome.net/api/profiles.json?lang=en&selected-by-project="+project_number

In [5]:
try:
    response = requests.get(project_url)
    json_profiles = response.json()
except Exception as e:
    print(e)

json_profiles_list = [p['profileID'] for p in json_profiles]
print(len(json_profiles_list))
print(json_profiles_list[:3])

92
[5, 8, 12]


In [10]:
def get_project_profiles_list(id_project):
    project_url = "https://ontome.net/api/profiles.json?lang=en&selected-by-project="+str(id_project)
    
    try:
        response = requests.get(project_url)
        json_profiles = response.json()
    except Exception as e:
        print(e)
    # créer la liste des profils
    json_profiles_list = [p['profileID'] for p in json_profiles]

    return json_profiles_list


In [42]:
id_project = 6
json_profiles_list = get_project_profiles_list(id_project)


print(len(json_profiles_list))
print(json_profiles_list[:3])

92
[5, 8, 12]


In [40]:
print(json_profiles_list)

[5, 8, 12, 15, 16, 17, 20, 21, 28, 30, 31, 32, 34, 35, 36, 73, 75, 79, 85, 89, 95, 103, 105, 109, 113, 115, 117, 121, 126, 128, 138, 140, 142, 144, 146, 148, 150, 158, 166, 168, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 258, 260, 262, 274, 276, 278, 280, 282, 284, 286, 488]


### Créer un file SHACL par profil

In [41]:
for profile_id in json_profiles_list[:5]:

    # defines the profile classes URL
    
    profile_classes_url = 'https://ontome.net/api/classes-profile.json?lang=en&available-in-profile=' + str(profile_id)

    # gets the JSON values for classes in this profile
    try:
        response = requests.get(profile_classes_url)
        json_classes = response.json()
    except Exception as e:
        print(e)

    # get class prefixes
    lcp = []
    for cla in json_classes:
        lcp.append('@prefix ' + cla["namespacePrefix"] + ': <' + cla["namespaceURI"] + '>.')
    classes_prefixes = list(set(lcp))


    # classes dataframe
    lc = []
    for cla in json_classes:
        lc.append([cla['namespacePrefix'] , cla["namespaceURI"], cla["classIdentifierInNamespace"], 
                cla['classID'], cla["classLabel"], cla["entityBasicType"], cla["ancestorClasses"]])

    df_lc = pd.DataFrame(lc)
    df_lc.columns = ['pref', 'ns_uri', 'uri', 'id', 'label', 'type', 'ancestors']



    ## defines the profile properties URL

    # gets the JSON values for properties in this profile
    profile_properties_url = 'https://ontome.net/api/properties-profile.json?lang=en&available-in-profile=' + str(profile_id)

    try:
        response = requests.get(profile_properties_url)
        json_properties = response.json()
    except Exception as e:
        print(e)

    # get properties prefixes
    lpp = []
    for prop in json_properties:
        lpp.append('@prefix ' + prop["namespacePrefix"] + ': <' + prop["namespaceURI"] + '>.')
    properties_prefixes = list(set(lpp))

    ### Namespaces list
    ns_list = list(set(classes_prefixes + properties_prefixes))
    ns_list_txt = '\n'.join(ns_list)
  

    # properties dataframe
    lp = []
    for prop in json_properties:
        lp.append([ prop["propertyDomain"], prop["domainInstancesMinQuantifier"], prop["domainInstancesMaxQuantifier"], 
                prop['namespacePrefix'], prop['namespaceURI'], 
                prop["propertyIdentifierInNamespace"], prop['propertyLabel'], 
                prop["propertyRange"], prop["rangeInstancesMinQuantifier"],prop["rangeInstancesMaxQuantifier"]])

    df_lp = pd.DataFrame(lp)
    df_lp.columns = ['id_s', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri', 'label', 'id_t', 'min_t', 'max_t']



    ## enrich properties

    # get domain classes
    df_ms = df_lp.merge(df_lc[['pref', 'uri', 'id']], left_on='id_s', right_on='id', suffixes=['_ps', '_so'] )
    # get range classes
    df_mt = df_ms.merge(df_lc[['pref', 'uri', 'id']], left_on='id_t', right_on='id', suffixes=['_pt', '_ta'] )
    # new properties dataframe
    # new properties list
    p_df = df_mt[['pref_pt', 'uri_so', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri_ps', 'label', 'pref_ta', 'uri', 'min_t', 'max_t']]
    p_df.columns = ['pref_sub', 'uri_sub', 'min_sub', 'max_sub',
                    'ns_prefix', 'ns_pp', 'uri_pp', 'pp_label', 
                    'pref_ob', 'uri_ob', 'min_ob', 'max_ob']


    ### Write a file with the corresponding shacl profile

    shacl_prof = """### SHACL OntoME Profile {} \n\n

    @prefix sdh-shacl: <https://sdhss.org/shacl/profiles/>. 
    @prefix sh: <http://www.w3.org/ns/shacl#> .  
    @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
    @prefix rdfs: 	<http://www.w3.org/2000/01/rdf-schema#> .  
    @prefix sdh-short: <https://sdhss.org/ontology/shortcuts/P1>.
    """.format(profile_id) + ns_list_txt

    ## for each class
    for index, row in df_lc.iterrows():
        
        if row['type'] == 9:
            min_count_label_comment = ''
            temporal_properties_standard_time = """
            sh:property [
            sh:path sdh-short:P1 ;
            sh:name "Standard Date-Time";
            sh:datatype xsd:string;
            sh:order 3;
            ];
            """
        else:
            min_count_label_comment = 'sh:minCount 1;'
            temporal_properties_standard_time = ''
        

        if row['pref']+':'+ row['uri'] == 'crm:E21':
            person_birth_date = """
        sh:property [
            sh:path sdh-short:P2 ;
            sh:name "has birth date";
            sh:datatype xsd:string;
            sh:order 3;
            ];
            """
        else:
            person_birth_date = ''   

        shacl_prof += """ \n
    sdh-shacl:{0}_{1}_Shape a sh:NodeShape ;
        sh:targetClass {0}:{1} ;
        sh:name "{2}" ;

        sh:property [
            sh:path rdfs:label ;
            sh:name "Label";
            sh:datatype xsd:string;
            sh:order 1;
            {3}
            sh:maxCount 1;
            ];

        sh:property [
            sh:path rdfs:comment ;
            sh:name "Description";
            sh:datatype rdf:HTML;
            sh:order 2;
            {3}
            ];   
        {5}
        {4}    
        """.format(row['pref'], row['uri'], row['label'], 
                min_count_label_comment,
                temporal_properties_standard_time, person_birth_date)

        ## outgoing properties
        for index, row_p in p_df.iterrows():
            
            if row_p['max_ob'] == 1:
                max_count = 'sh:maxCount 1 ;'
            else:
                max_count = ''    

            if (row_p['pref_sub'] == row['pref'] 
                and row_p['uri_sub'] == row['uri']) :
                
                pl = """    
    sh:property [
            sh:path {0}:{1} ;
            sh:name "{2}";
            sh:class {3}:{4} ;
            {5}
            ];
            """.format(row_p['ns_prefix'], row_p['uri_pp'], 
                    row_p['pp_label'], row_p['pref_ob'], row_p['uri_ob'], max_count)


                shacl_prof += pl


        shacl_prof += '.'


    # print(shacl_prof)

    ## write file            
    file_address = "../geovistory_profiles/shacl-profile-{}.ttl".format(profile_id)
    with open(file_address, "w") as text_file:
        text_file.write(shacl_prof)







## Fonction unique

In [48]:
def write_profile_shapes_for_project(id_project):

    json_profiles_list = get_project_profiles_list(id_project)


    for profile_id in json_profiles_list [:7]:

        # defines the profile classes URL
        
        profile_classes_url = 'https://ontome.net/api/classes-profile.json?lang=en&available-in-profile=' + str(profile_id)

        # gets the JSON values for classes in this profile
        try:
            response = requests.get(profile_classes_url)
            json_classes = response.json()
        except Exception as e:
            print(e)

        # get class prefixes
        lcp = []
        for cla in json_classes:
            lcp.append('@prefix ' + cla["namespacePrefix"] + ': <' + cla["namespaceURI"] + '>.')
        classes_prefixes = list(set(lcp))


        # classes dataframe
        lc = []
        for cla in json_classes:
            lc.append([cla['namespacePrefix'] , cla["namespaceURI"], cla["classIdentifierInNamespace"], 
                    cla['classID'], cla["classLabel"], cla["entityBasicType"], cla["ancestorClasses"]])

        df_lc = pd.DataFrame(lc)
        df_lc.columns = ['pref', 'ns_uri', 'uri', 'id', 'label', 'type', 'ancestors']



        ## defines the profile properties URL

        # gets the JSON values for properties in this profile
        profile_properties_url = 'https://ontome.net/api/properties-profile.json?lang=en&available-in-profile=' + str(profile_id)

        try:
            response = requests.get(profile_properties_url)
            json_properties = response.json()
        except Exception as e:
            print(e)

        # get properties prefixes
        lpp = []
        for prop in json_properties:
            lpp.append('@prefix ' + prop["namespacePrefix"] + ': <' + prop["namespaceURI"] + '>.')
        properties_prefixes = list(set(lpp))

        ### Namespaces list
        ns_list = list(set(classes_prefixes + properties_prefixes))
        ns_list_txt = '\n'.join(ns_list)
    

        # properties dataframe
        lp = []
        for prop in json_properties:
            lp.append([ prop["propertyDomain"], prop["domainInstancesMinQuantifier"], prop["domainInstancesMaxQuantifier"], 
                    prop['namespacePrefix'], prop['namespaceURI'], 
                    prop["propertyIdentifierInNamespace"], prop['propertyLabel'], 
                    prop["propertyRange"], prop["rangeInstancesMinQuantifier"],prop["rangeInstancesMaxQuantifier"]])

        df_lp = pd.DataFrame(lp)
        df_lp.columns = ['id_s', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri', 'label', 'id_t', 'min_t', 'max_t']



        ## enrich properties

        # get domain classes
        df_ms = df_lp.merge(df_lc[['pref', 'uri', 'id']], left_on='id_s', right_on='id', suffixes=['_ps', '_so'] )
        # get range classes
        df_mt = df_ms.merge(df_lc[['pref', 'uri', 'id']], left_on='id_t', right_on='id', suffixes=['_pt', '_ta'] )
        # new properties dataframe
        # new properties list
        p_df = df_mt[['pref_pt', 'uri_so', 'min_s', 'max_s', 'ns_prefix', 'ns_uri', 'uri_ps', 'label', 'pref_ta', 'uri', 'min_t', 'max_t']]
        p_df.columns = ['pref_sub', 'uri_sub', 'min_sub', 'max_sub',
                        'ns_prefix', 'ns_pp', 'uri_pp', 'pp_label', 
                        'pref_ob', 'uri_ob', 'min_ob', 'max_ob']


        ### Write a file with the corresponding shacl profile

        shacl_prof = """### SHACL OntoME Profile {} \n\n

        @prefix sdh-shacl: <https://sdhss.org/shacl/profiles/>. 
        @prefix sh: <http://www.w3.org/ns/shacl#> .  
        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: 	<http://www.w3.org/2000/01/rdf-schema#> .  
        @prefix sdh-short: <https://sdhss.org/ontology/shortcuts/P1>.
        """.format(profile_id) + ns_list_txt

        ## for each class
        for index, row in df_lc.iterrows():
            
            if row['type'] == 9:
                min_count_label_comment = ''
                temporal_properties_standard_time = """
                sh:property [
                sh:path sdh-short:P1 ;
                sh:name "Standard Date-Time";
                sh:datatype xsd:string;
                sh:order 3;
                ];
                """
            else:
                min_count_label_comment = 'sh:minCount 1;'
                temporal_properties_standard_time = ''
            

            if row['pref']+':'+ row['uri'] == 'crm:E21':
                person_birth_date = """
            sh:property [
                sh:path sdh-short:P2 ;
                sh:name "has birth date";
                sh:datatype xsd:string;
                sh:order 3;
                ];
                """
            else:
                person_birth_date = ''   

            shacl_prof += """ \n
        sdh-shacl:{0}_{1}_Shape a sh:NodeShape ;
            sh:targetClass {0}:{1} ;
            sh:name "{2}" ;

            sh:property [
                sh:path rdfs:label ;
                sh:name "Label";
                sh:datatype xsd:string;
                sh:order 1;
                {3}
                sh:maxCount 1;
                ];

            sh:property [
                sh:path rdfs:comment ;
                sh:name "Description";
                sh:datatype rdf:HTML;
                sh:order 2;
                {3}
                ];   
            {5}
            {4}    
            """.format(row['pref'], row['uri'], row['label'], 
                    min_count_label_comment,
                    temporal_properties_standard_time, person_birth_date)

            ## outgoing properties
            for index, row_p in p_df.iterrows():
                
                if row_p['max_ob'] == 1:
                    max_count = 'sh:maxCount 1 ;'
                else:
                    max_count = ''    

                if (row_p['pref_sub'] == row['pref'] 
                    and row_p['uri_sub'] == row['uri']) :
                    
                    pl = """    
        sh:property [
                sh:path {0}:{1} ;
                sh:name "{2}";
                sh:class {3}:{4} ;
                {5}
                ];
                """.format(row_p['ns_prefix'], row_p['uri_pp'], 
                        row_p['pp_label'], row_p['pref_ob'], row_p['uri_ob'], max_count)


                    shacl_prof += pl


            shacl_prof += '.'


        # print(shacl_prof)

        ## write file            
        file_address = "../geovistory_profiles/shacl-profile-{}.ttl".format(profile_id)
        with open(file_address, "w") as text_file:
            text_file.write(shacl_prof)







In [49]:
write_profile_shapes_for_project(id_project)