# Example Cypher Read of RTX-KG2 with Kuzu

Related example found online at: [https://colab.research.google.com/drive/1XGZf2xhFwvNOFCXVCi8pAPvpu0NOMmzi](https://colab.research.google.com/drive/1XGZf2xhFwvNOFCXVCi8pAPvpu0NOMmzi)

In [1]:
import os
import pathlib
import tarfile

import kuzu

from notebooks.rtx_kg2_functions import download_file, extract_tar_gz

In [2]:
# set some variables for the work below
source_data_url = "https://github.com/CU-DBMI/rtx-kg2-gateway/releases/download/v0.0.1/kg2c_lite_2.8.4.full.with-metanames.dataset.kuzu.tar.gz"
target_dir = "../data"
target_database_path = f"{target_dir}/kg2c_lite_2.8.4.full.with-metanames.dataset.kuzu"

In [3]:
pathlib.Path(target_dir).mkdir(exist_ok=True)

In [4]:
# niave check for existing database to avoid redownloading / extracting if possible
if not pathlib.Path(target_database_path).is_dir():
    downloaded_file = download_file(url=source_data_url, download_dir=target_dir)
    extract_dir = extract_tar_gz(
        tar_gz_path=f"{target_dir}/{downloaded_file}", output_dir=target_dir
    )

In [5]:
# init a Kuzu database and connection
db = kuzu.Database(target_database_path)
kz_conn = kuzu.Connection(db)

In [6]:
# show tables
kz_conn.execute("CALL SHOW_TABLES() RETURN *;").get_as_df()

Unnamed: 0,name,type,comment
0,produces_Gene_NucleicAcidEntity,REL,
1,treats_Protein_Cell,REL,
2,overlaps_Gene_GeneFamily,REL,
3,affects_CellularComponent_Cell,REL,
4,treats_ClinicalAttribute_PopulationOfIndividua...,REL,
...,...,...,...
12594,disrupts_Drug_IndividualOrganism,REL,
12595,has_part_MolecularActivity_MolecularActivity,REL,
12596,affects_Phenomenon_Cell,REL,
12597,causes_Drug_InformationContentEntity,REL,


In [7]:
# run an example query
kz_conn.execute(
    """
    MATCH (d:Disease)
    WHERE d.name = "Down syndrome"
    RETURN d.id, d.name, d.all_categories;
    """
).get_as_df()

Unnamed: 0,d.id,d.name,d.all_categories
0,MONDO:0008608,Down syndrome,[biolink:Disease]
