In [1]:
import os 
import json 
import zipfile
import rocrate.utils as utils
import rocrate.rocrate as rocrate
from datetime import datetime
import pytest

# Checking RO-Crate Metadata Root Data Entity

### RO-Crate Self Descriptor

In [26]:
def self_descriptor_check(tar_file, extension):
    """\
    Check the self descriptor in RO-Crate
    Please check the requirements details in: 
    <https://www.researchobject.org/ro-crate/1.1/root-data-entity.html>
    """
    NAME = "Self descriptor check"
    error_message = "self descriptor is not provided"
    
    with open (os.path.join(tar_file, "ro-crate-metadata.json"), 'r') as file:
        metadata = json.load(file)
        graph = metadata['@graph']
    
    for entity in graph:
        type = utils.get_norm_value(entity, "@type")
        if type == ['CreativeWork']:
            if (utils.get_norm_value(entity, "@id") == ['ro-crate-metadata.json'] or utils.get_norm_value(entity, "@id") == ['ro-crate-metadata.jsonld']) and utils.get_norm_value(entity, "about") == ['./']:
                link_ = utils.get_norm_value(entity, "conformsTo")[0]
                if link_.startswith("https://w3id.org/ro/crate/"): 
                    return NAME, True
    return NAME, error_message, False

### Direct property of Root Data Entity

In [55]:
def datetime_valid(dt_str):
    try: 
        datetime.fromisoformat(dt_str)
    except: 
        return False
    return True


def direct_property_check(tar_file, extension):
    """\
    A valid RO-Crate MUST meets the direct property requirements
    Please check the requirements details in: 
    <https://www.researchobject.org/ro-crate/1.1/root-data-entity.html>
    """
    NAME = "Direct property check"
    error_message = ["datePublished is not in ISO 8601 date format", "Directory property of RO-Crate is wrong"]
    
    with open (os.path.join(tar_file, "ro-crate-metadata.json"), 'r') as file:
        metadata = json.load(file)
        graph = metadata['@graph']
        
    for entity in graph: 
        type = utils.get_norm_value(entity, '@type')
        if type[0] == 'Dataset' and utils.get_norm_value(entity, '@id')[0].endswith('/'):
            for _ in utils.get_norm_value(entity, 'datePublished'):
                if datetime_valid(_) == True:
                    return NAME, datetime_valid(_)
                else: 
                    return NAME, error_message[0], datetime_valid(_)
    return NAME, error_message[1], False
                

# Checking RO-Crate Data Entity

### Referencing file or folder from root data entity

In [56]:
def referencing_check(tar_file, extension):
    """/
    Where file or folder are represented as Data Entity in RO-Crate JSON-LD
    There MUST be linked to, directly or indirectly, hasPart in Root Data Entity.
    For more information, please check : 
    <https://www.researchobject.org/ro-crate/1.1/root-data-entity.html>
    """
    
    NAME = "Referencing check"
    error_message = "The referencing {} is wrong"
    
    referencing_result = {}
    
    context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
    
    for entity in metadata.values(): 
        hasPart = utils.get_norm_value(entity, "hasPart")
        creator = utils.get_norm_value(entity, "creator")
        if len(hasPart) != 0: 
            break
    
    for parts in hasPart: 
        id_ = utils.get_norm_value(metadata[parts], "@id")
        extensions = os.path.splitext(id_[0])[1]
        
        if extensions == "" and id_[0].endswith('/'):
            if isinstance(metadata["%s" % id_[0]]["@type"], list) and 'Dataset' in metadata["%s" % id_[0]]["@type"]:
                referencing_result[id_[0]] = True
            elif metadata["%s" % id_[0]]["@type"] == 'Dataset':
                referencing_result[id_[0]] = True
            else:
                referencing_result[id_[0]] = [False, error_message.format(id_[0])]
        elif extensions != "": 
            if isinstance(metadata["%s" % id_[0]]["@type"], list) and "File" in metadata["%s" % id_[0]]["@type"]:
                referencing_result[id_[0]] = True
            elif metadata["%s" % id_[0]]["@type"] == "File":
                referencing_result[id_[0]] = True
            else:
                referencing_result[id_[0]] = [False, error_message.format(id_[0])]
    
    counter = 0
    for values in referencing_result.values():
        counter += 1
        if isinstance(values, list):
            return NAME, values[1], values[0]
        if counter == len(referencing_result.values()):
            return NAME, True
        
    
    return NAME, error_message, False

### Detailed Descriptions of Encodings

In [None]:
def encoding_check(tar_file, extension): 
    
    """
    The details of encoding should meet the requirments
    Please check more information at:
    <https://www.researchobject.org/ro-crate/1.1/data-entities.html>
    """
    
    NAME = "Encoding check"
    error_message = ["Encoding in {} is wrong", "The value of @type in {} is incorrect"]
    ### Create a dictionary to store the encoding check result
    encoding_result = {}
    
    context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
    
    for entity in metadata.values(): 
        encoding = utils.get_norm_value(entity, "encodingFormat")
        if len(encoding) >= 2:
            type = utils.get_norm_value(metadata[encoding[1]], "@type")
            if utils.is_url(encoding[1]):
                if type[0] == "WebSite":
                    encoding_result[encoding[1]] = True
                else:
                    encoding_result[encoding[1]] = [False, error_message[1].format(encoding[1])]
            else:
                extension = os.path.splitext(encoding[1])[1]
                if extension == "" and encoding[1].endswith("/") and "Dataset" in type:
                    encoding_result[encoding[1]] = True
                elif extension != "" and "File" in type:
                    encoding_result[encoding[1]] = True
                else:
                    encoding_result[encoding[1]] = [False, error_message[1].format(encoding[1])]
    
    ### If any of the value in the dictionary are false which should be a list, then return false
    for values in encoding_result.values():
        if isinstance(values, list):
            return NAME, error_message[0].format(list(encoding_result.keys())[list(encoding_result.values()).index([values[0], values[1]])]), values[0]
    
    return NAME, True
                
        

# Checking Scripts and Workflows

In [None]:
def scripts_and_workflow_check(tar_file, extension):
    """
    
    """
    NAME = "Scripts and workflow check"
    error_message = "scripts and workflow is wrong"
    warning_message = "Warning: {} is an extension which is not in the recognised workflow extension file. {} has been added to the extension file."
    
    workflow_dict = {}
    
    context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
    
    for entity in metadata.values():
        type = utils.get_norm_value(entity, "@type")
        if "ComputationalWorkflow" in type:
            extension = os.path.splitext(utils.get_norm_value(entity,"@id")[0])[1]
            if "File" in type and "SoftwareSourceCode" in type:
                with open ("workflow_extension.txt", "r") as file:
                    prepared_extension = file.read().splitlines()
                with open("workflow_extension.txt", "a") as f:
                    if extension in prepared_extension:
                        workflow_dict[utils.get_norm_value(entity, "@id")[0]] = True
                    else:
                        f.write("\n")
                        f.write("%s" % extension)
                        workflow_dict[utils.get_norm_value(entity, "@id")[0]] = warning_message.format(extension, extension)
            else:
                return NAME, error_message, False
            
    counter = 0
    for values in workflow_dict.values():
        counter += 1
        if isinstance(values, str):
            return NAME, values
        if counter == len(workflow_dict.values()):
            return NAME, True
    
    return NAME, error_message, False