In [None]:
from google.colab import drive
import json
import pandas as pd
import re

# Mount your Google Drive
drive.mount('/content/drive')

# Define the DCAT schema properties without URLs
dcat_schema_properties = [
    "access rights",
    "conforms to",
    "contact point",
    "creator",
    "description",
    "has policy",
    "identifier",
    "is referenced by",
    "theme or category",  # either theme or category
    "title",
    "keyword or tag or keywords",  # either keyword, tag, or keywords
    "landing page",
    "license",
    "language",
    "relation",
    "rights",
    "qualified relation",
    "publisher",
    "release date",
    "type or genre",  # either type or genre
    "update or modification date",  # either update or modification date
    "qualified attribution",
    "has current version",
    "has version",
    "previous version",
    "replaces",
    "status",
    "version",
    "version notes",
    "first",
    "last",
    "previous"
    "frequency"
    "distribution"
    "in series"
    "spatial coverage"
    "geographic coverage"
    "spatial resolution"
    "temporal coverage"
    "temporal resolution"
    "was generated by"

]

# Paths to your JSON metadata files on Google Drive (up to 10 files)
metadata_paths = [
    "/content/drive/My Drive/Thesis_23/DCAT_38201.json",
    "/content/drive/My Drive/Thesis_23/DCAT_35507.json",
    "/content/drive/My Drive/Thesis_23/DCAT_3504.json",
    "/content/drive/My Drive/Thesis_23/DCAT_174.json",
    "/content/drive/My Drive/Thesis_23/DCAT_176.json",
      "/content/drive/My Drive/Thesis_23/DCAT_2344.json",
    "/content/drive/My Drive/Thesis_23/DCAT_30261.json",
     "/content/drive/My Drive/Thesis_23/DCAT_35546.json",
    "/content/drive/My Drive/Thesis_23/DCAT_38201.json",
    "/content/drive/My Drive/Thesis_23/DCAT_38385.json"
]

results = []

for metadata_path in metadata_paths:
    # Load the JSON metadata file
    with open(metadata_path, 'r') as json_file:
        metadata = json.load(json_file)

    # Get the properties from the JSON metadata
    metadata_properties = list(metadata.keys())

    # Create lists to store present and missing terms without URLs
    present_terms = []
    missing_terms = []

    # Iterate through the properties in the JSON metadata and check against the DCAT schema properties
    for prop in metadata_properties:
        prop_without_url = prop.split("/")[-1]  # Get the property name without URL
        # Extract property name without URL and remove any special characters
        prop_without_url_cleaned = re.sub(r'[^a-zA-Z]', '', prop_without_url)
        found = False
        for dcat_property in dcat_schema_properties:
            # Extract DCAT property name without URL and remove any special characters
            dcat_property_cleaned = re.sub(r'[^a-zA-Z]', '', dcat_property)
            if prop_without_url_cleaned.lower() == dcat_property_cleaned.lower():
                present_terms.append(dcat_property)
                found = True
                break
        if not found:
            missing_terms.append(dcat_property)

    # Add the results to the DataFrame
    results.append({
        'Metadata File': metadata_path,
        **{term: '✓' if term in present_terms else '❌' for term in dcat_schema_properties},
    })

# Create a Pandas DataFrame from the results
df = pd.DataFrame(results)

# Display the DataFrame
df


Mounted at /content/drive


Unnamed: 0,Metadata File,access rights,conforms to,contact point,creator,description,has policy,identifier,is referenced by,theme or category,...,has current version,has version,previous version,replaces,status,version,version notes,first,last,previousfrequencydistributionin seriesspatial coveragegeographic coveragespatial resolutiontemporal coveragetemporal resolutionwas generated by
0,/content/drive/My Drive/Thesis_23/DCAT_38201.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
1,/content/drive/My Drive/Thesis_23/DCAT_35507.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
2,/content/drive/My Drive/Thesis_23/DCAT_3504.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
3,/content/drive/My Drive/Thesis_23/DCAT_174.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
4,/content/drive/My Drive/Thesis_23/DCAT_176.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
5,/content/drive/My Drive/Thesis_23/DCAT_2344.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
6,/content/drive/My Drive/Thesis_23/DCAT_30261.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
7,/content/drive/My Drive/Thesis_23/DCAT_35546.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
8,/content/drive/My Drive/Thesis_23/DCAT_38201.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
9,/content/drive/My Drive/Thesis_23/DCAT_38385.json,❌,❌,❌,✓,✓,❌,✓,❌,❌,...,❌,❌,❌,❌,❌,❌,❌,❌,❌,❌
