In [None]:
from pathlib import Path

import kuzu
import polars as pl

from ddl import snomed


snomed_data = Path("./data/external/snomed-ips/Snapshot/Terminology/")
kuzu_path = Path("./data/internal/kuzu-db/")
if kuzu_path.exists:
    for file in kuzu_path.glob("*"):
        file.unlink(file)
    kuzu_path.rmdir()
    kuzu_path.mkdir()

db = kuzu.Database(kuzu_path)
conn = kuzu.Connection(db)

# for file in snomed_data.glob("*.txt"):
#     if file.stem.split("_")[1].lower() in ["concept", "relationship"]:
#         print(file.stem)


# load Concepts
df = pl.read_csv(
    snomed_data / "sct2_Concept_IPSSnapshot_IPST_20240701.txt", separator="\t"
).with_columns(
    pl.col("effectiveTime").cast(pl.String).str.to_date("%Y%m%d"),
    pl.col("active").cast(pl.Boolean),
)
conn.execute(snomed["concept"] + "COPY concept FROM df;")


# load Relationship, note we need to change ordering of columns for loading in Kuzu
df = (
    pl.read_csv(
        snomed_data / "sct2_Relationship_IPSSnapshot_IPST_20240701.txt", separator="\t"
    )
    .with_columns(
        pl.col("effectiveTime").cast(pl.String).str.to_date("%Y%m%d"),
        pl.col("active").cast(pl.Boolean),
    )
    .select(
        pl.col(
            [
                "sourceId",
                "destinationId",
                "id",
                "effectiveTime",
                "active",
                "moduleId",
                "relationshipGroup",
                "typeId",
                "characteristicTypeId",
                "modifierId",
            ]
        )
    )
)
conn.execute(
    "DROP TABLE IF EXISTS relationship;"
    + snomed["relationship"]
    + "COPY relationship FROM df;"
)

[<kuzu.query_result.QueryResult at 0x11c2d1110>,
 <kuzu.query_result.QueryResult at 0x11c2d3e50>,
 <kuzu.query_result.QueryResult at 0x10e139850>]

In [23]:
ddl_concept = """
DROP TABLE IF EXISTS concept;
CREATE NODE TABLE concept(
    id INT64,
    effectiveTime DATE,
    active BOOLEAN,
    moduleId INT64,
    definitionStatusId INT64,
    PRIMARY KEY (id)
);
COPY concept FROM df;
"""

conn.execute(ddl_concept)

[<kuzu.query_result.QueryResult at 0x110759f10>,
 <kuzu.query_result.QueryResult at 0x10ee63290>,
 <kuzu.query_result.QueryResult at 0x10ee60b10>]

In [None]:
ddl_relationship = """
DROP TABLE IF EXISTS relationship;
CREATE NODE TABLE relationship(
id INT64,
effectiveTime DATE,
active BOOLEAN,
moduleId INT64,
sourceId INT64,
destinationId INT64,
relationshipGroup INT64,
typeId INT64,
characteristicTypeId INT64,
modifierId INT64,
"""