# Create a Database of Oncological Entities Based on Unstructured Notes
In this notebook, we create a database of entities based on extracted terms from the notes in previous notebooks. 
This database can be used for dashboarding using [Databricks SQL](https://databricks.com/product/databricks-sql). 

In [None]:
delta_path='/FileStore/HLS/nlp/delta/jsl/'

## 1. Create Temporary Views

In [None]:
spark.read.load(f"{delta_path}/silver/icd10-hcc-df").createOrReplaceTempView('icd10Hcc')
spark.read.load(f"{delta_path}/gold/best-icd-mapped").createOrReplaceTempView('bestIcdMapped')
spark.read.load(f"{delta_path}/gold/rxnorm-res-cleaned").createOrReplaceTempView('rxnormRes')
spark.read.load(f"{delta_path}/silver/rxnorm-code-greedy-res").createOrReplaceTempView('rxnormCodeGreedy')
spark.read.load(f"{delta_path}/silver/temporal-re").createOrReplaceTempView('temporalRe')
spark.read.load(f"{delta_path}/silver/bodypart-relationships").createOrReplaceTempView('bodypartRelationships')
spark.read.load(f"{delta_path}/silver/cpt").createOrReplaceTempView('cpt')
spark.read.load(f"{delta_path}/silver/assertion").createOrReplaceTempView('assertion')

## 2. Create the Database

In [None]:
database_name='jsl_onc'
DatabaseName=''.join([st.capitalize() for st in database_name.split('_')])
database_path=f"{delta_path}tables/{database_name}"
print(f"{DatabaseName} database tables will be stored in {database_path}")

In [None]:
sql(f"DROP DATABASE IF EXISTS {DatabaseName} CASCADE;")
sql(f"CREATE DATABASE IF NOT EXISTS {DatabaseName} LOCATION '{database_path}'")
sql(f"USE {DatabaseName};")

## 3. Create Tables

In [None]:
CREATE OR REPLACE TABLE Rxnorm_Res AS (
  select md5(path) as note_id,path,confidence, drug_chunk,rxnorm_code,drugs as drug from rxnormRes
);

In [None]:
CREATE OR REPLACE TABLE CPT AS (
  select md5(path) as note_id, path, confidence, chunks, entity,cpt_code,cpt
from cpt)

In [None]:
CREATE OR REPLACE TABLE ASSERTION AS (
  select md5(path) as note_id, path, chunk, entity,assertion from assertion
)

In [None]:
CREATE OR REPLACE TABLE TEMPORAL_RE AS (
  select md5(path) as note_id, * from temporalRe
)

In [None]:
CREATE OR REPLACE TABLE BEST_ICD AS (
  select * from bestIcdMapped
)

In [None]:
CREATE OR REPLACE TABLE ICD10_HCC AS (
  select md5(path) as note_id, path, confidence, final_chunk, entity,icd10_code,icd_codes_names,icd_code_billable
  from icd10Hcc
)

In [None]:
select * from ICD10_HCC

Copyright / License info of the notebook. Copyright Databricks, Inc. [2021].  The source in this notebook is provided subject to the [Databricks License](https://databricks.com/db-license-source).