In [27]:
import os

# Get the directory of the current script
base_dir = os.getcwd()
try:
    if folder:
        base_dir = os.path.join(base_dir, folder)
except:
    pass

In [28]:
!pip install rdflib
!pip install pydicom
from rdflib import *
from hashlib import sha256

In [1]:
import uuid

def generate_unique_uri(base_uri):
    unique_identifier = str(uuid.uuid4())
    return URIRef(f"{base_uri}{unique_identifier}")

# Create Source Graph

In [29]:
schema = Graph()
schema.bind('tb', 'http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#')
schema.bind('ab', 'http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#')
tbox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#')
abox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#')
dcat = Namespace('https://www.w3.org/ns/dcat#')
dcterms = Namespace('http://purl.org/dc/terms/')
dqv = Namespace('http://www.w3.org/ns/dqv#')

## Load Data

In [30]:
import pandas as pd

In [31]:
import pydicom

In [32]:
ds = pydicom.dcmread(os.path.join(base_dir, '../../DataProductLayer/DataProduct2/Data/0002.DCM'))

# Print some header information
print("Patient's Name: ", ds.PatientName)
print("Patient ID: ", ds.PatientID)
print("Modality: ", ds.Modality)
print("Study Date: ", ds.StudyDate)

## Data Product Profile

### OWNER

In [33]:
metadata_path = os.path.join(base_dir, '../../DataProductLayer/DataProduct/Metadata/explotaiton.csv')

In [34]:
meta_df = pd.read_csv(metadata_path)
meta_dp = meta_df.loc[meta_df['name'] == "UPENN-GBM_clinical_info_v2"]
owner = meta_dp.owner.values[0]


### NAME

In [35]:
dataset_name = "dicomimage"
dataset_name

add triples

In [36]:
# Name
schema.add((abox[dataset_name], RDF.type, dcat.Dataset))
#identifier
identifier = sha256(dataset_name.encode('utf-8')).hexdigest()
schema.add((abox[dataset_name],dcterms.identifier, Literal(identifier)))
#owner
schema.add((abox[dataset_name], tbox.owner, Literal(owner)))




### ATTRIBUTES


In [37]:
# Iterate over all attributes
for attribute in dir(ds)[:50]:
    # Check if the attribute starts with a letter
    # This is to filter out Python built-in attributes
    if attribute[0].isalpha():
        # Check if the attribute exists in the DICOM file
        if hasattr(ds, attribute):
            # Create RDF triple for Attribute
            schema.add((abox[attribute], RDF.type, tbox.Attribute))
            # Create RDF triple for Attribute
            schema.add((abox[dataset_name], tbox.hasAttribute, abox[attribute]))
            # rdf for the dataproperty
            schema.add((abox[attribute], tbox.attribute, Literal(attribute)))



## DatasetTypeTemplate

### Format

In [38]:
format = 'dicom'

In [39]:
# triple
schema.add((abox.Image, RDF.type, tbox.DatasetTypeTemplate))

# format
schema.add((abox.Image, dcterms['format'], Literal(format)))  # Correct usage of the namespace

# addDTT
schema.add((abox[dataset_name], tbox.hasDTT, abox.Image))


### TechnologyAspects

In [40]:
#triple
schema.add((abox.TA1, RDF.type, tbox.TechnologyAspects))

#  addTA
schema.add((abox[dataset_name], tbox.hasTA, abox.TA2))

#TYPE
acces_uri = generate_unique_uri(abox)

schema.add((abox.TA2, tbox.typeAcces, acces_uri))


schema.add((acces_uri, RDF.type, tbox.Acces))
schema.add((acces_uri, RDFS.label, abox.Static))

data_path = os.path.join(base_dir, '../../DataProductLayer/DataProduct2/Data/0002.DCM')
# PATH
schema.add((acces_uri, tbox.path, Literal(data_path)))
# PATH



# SAVE SOURCE_GRAPH

In [41]:
rdf_format = "turtle"
schema.serialize(destination=os.path.join(base_dir, 'Source_Graph_DP2.ttl'), format=rdf_format)