In [121]:
from google.colab import drive
import numpy as np
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [122]:
!pip install rdflib




In [123]:
import pandas as pd
from rdflib import Graph, Literal, Namespace, URIRef

# Create an RDF graph
g = Graph()

# Define a namespace for your data
ns = Namespace("http://example.org/your_namespace#")

# Specify the directory path where your files are located in Google Drive
directory_path = '/content/drive/My Drive/Hippa part2/data/'

# Specify the CSV file name (replace 'your_data.csv' with your actual file name)
csv_file_name = 'Data.csv'

# Read your CSV data into a DataFrame with the 'latin-1' encoding
df = pd.read_csv(directory_path + 'Data.csv', encoding='latin-1')

# Iterate through the DataFrame rows and convert to RDF triples
for index, row in df.iterrows():
    subject = URIRef(ns + row['Code'])
    description = Literal(row['Description'])
    code_system = Literal(row['CodeSystem'])
    code_type = Literal(row['CodeType'])
    hipaa_value_set_name = Literal(row['HIPPAValueSetName'])

    # Add RDF triples using human-readable predicate names
    g.add((subject, URIRef("CODE_IS_A_SUBTYPE_OF"), code_system))
    g.add((subject, URIRef("REPRESENTING_A"), code_type))
    g.add((subject, URIRef("RELATED_TO"), hipaa_value_set_name))
    g.add((subject, URIRef("DESCRIBES"), description))

# Serialize the RDF graph to Turtle format
rdf_data = g.serialize(format='turtle')

# Save the RDF data to a file
with open('output.ttl', 'w', encoding='utf-8') as f:
    f.write(rdf_data)


In [124]:
import os

# Get the current working directory
current_directory = os.getcwd()

# List the files in the current directory
files_in_directory = os.listdir(current_directory)

files_in_directory


['.config', 'output.ttl', 'drive', 'sample_data']

In [125]:
# Specify a custom directory path and file name
output_file_path = '/content/drive/My Drive/Hippa part2/data/output2.ttl'

# Save the RDF data to the custom file path
with open(output_file_path, 'w', encoding='utf-8') as    f:
    f.write(rdf_data)

In [126]:
try:
    with open(output_file_path, 'w', encoding='utf-8') as f:
        f.write(rdf_data)
except Exception as e:
    print(f"Error: {e}")


In [127]:
from rdflib.plugins.sparql import prepareQuery

# Define the SPARQL query to retrieve all predicates
query = prepareQuery(
    """
    SELECT DISTINCT ?predicate
    WHERE {
        ?s ?predicate ?o .
    }
    """,
    initNs={"ns": ns}  # Replace "ns" with your namespace object
)

# Execute the query and print the results
for row in g.query(query):
    print(row.predicate)


DESCRIBES
RELATED_TO
REPRESENTING_A
CODE_IS_A_SUBTYPE_OF
