In [30]:
import pandas as pd
import os
import datetime
from dotenv import dotenv_values
import snowflake.connector as sc
import snowflake.connector.pandas_tools as snp
import re

In [31]:
config = dotenv_values(os.path.join(os.getcwd(),'env','sandbox','.env'))

con = sc.connect(
    user = config["USERNAME"],
    password = config["PASSWORD"],
    account = config["ACCOUNT"],
    warehouse = config["WAREHOUSE"],
    role = config["ROLE"],
    database = config["TARGET_DB"],
    schema = config["TARGET_SCHEMA"]
)
# Create a cursor object
cur = con.cursor()

In [32]:
# Fetch the column names
query = f"""
SELECT column_name from DEIDENTIFIED_PCORNET_CDM.information_schema.columns
    where 
        table_name = 'DEID_TUMOR' 
        and table_schema = 'CDM'
        and lower(column_name) like lower('%_N%') 
        and lower(column_name) not like lower('RAW_%')
        and data_type = 'TEXT';
"""
cur.execute(query)
columns = cur.fetchall()
columns_to_exclude = {'DATE_CASE_INITIATED_N2085','DATE_CASE_COMPLETED_N2090', 'PATID'}
column_list = [col[0] for col in columns if col[0] not in columns_to_exclude]

# # Construct the list of column names
column_list_str = ", ".join(column_list)
column_list_str

'SECONDARY_DIAGNOSIS8_N3794, HISTOLOGY_ICD_O2_N420, RX_SUMM_SURG_PRIM_SITE_N1290, AJCC_TNM_POST_THERAPY_T_N1033, MEDICAL_RECORD_NUMBER_N2300, OVER_RIDE_SS_TNM_M_N1983, LN_HEAD_AND_NECK_LEVELS_N3877, MULTIGENE_SIGNATURE_RES_N3895, OVER_RIDE_HOSPSEQ_DXCON_N1986, CENSUS_TRACT2010_N135, AJCC_TNM_POST_THERAPY_S_N1024, RX_SUMM_SYSTEMIC_SUR_SE_N1639, PATH_REPORT_TYPE2_N7481, HER2_ISH_SUMMARY_N3854, PHASE1_RADIATION_TO_DRA_N1505, RAD_TREATMENT_VOLUME_N1540, LONGITUDE_N2354, TNM_EDITION_NUMBER_N1060, TNM_CLIN_DESCRIPTOR_N980, TUMOR_MARKER2_N1160, TNM_CLIN_M_N960, TEXT_HISTOLOGY_TITLE_N2590, RX_SUMM_RAD_TO_CNS_N1370, PERITONEAL_CYTOLOGY_N3911, GRADE_CLINICAL_N3843, PSA_LAB_VALUE_N3920, SECONDARY_DIAGNOSIS2_N3782, NUMBER_OF_POSITIVE_PARA_N3901, SCHEMA_DISCRIMINATOR2_N3927, CS_SITE_SPECIFIC_FACTOR_N2879, SEPARATE_TUMOR_NODULES_N3929, OVER_RIDE_CS14_N3763, CENSUS_BLOCK_GRP197090_N368, LN_DISTANT_MEDIASTINAL__N3875, RX_HOSP_SURG_APP2010_N668, ADDR_CURRENT_POSTAL_COD_N1830, SENTINEL_LYMPH_NODES_PO_N8

In [33]:
# Construct the unpivot SQL query

# Table name
table_name = 'TUMOR_FACT'
# source = 'DEIDENTIFIED_PCORNET_CDM.CDM.DEID_TUMOR'
source = 'CDM_DATALAKE.GPC.GPC_DEID_TUMOR'

unpivot_query = f"""
CREATE OR REPLACE VIEW {table_name} AS
(
    select 
        -1 :: NUMBER(38, 0)                                                                              as ENCOUNTER_NUM
        ,PATID :: NUMBER(38, 0)                                                                          as PATIENT_NUM
        ,concat('NAACCR|', split_part(concept, '_N', -1), ':', coalesce(concept_cd, ''))                 as CONCEPT_CD
        ,'@'                                                                                             as PROVIDER_ID
        ,DATE_CASE_INITIATED_N2085 :: TIMESTAMP                                                          as START_DATE
        ,'@'                                                                                             as MODIFIER_CD
        ,1                                                                                               as INSTANCE_NUM
        ,''                                                                                              as VALTYPE_CD
        ,''                                                                                              as TVAL_CHAR
        ,cast(null as  integer)                                                                          as NVAL_NUM
        ,''                                                                                              as VALUEFLAG_CD
        ,cast(null as  integer)                                                                          as QUANTITY_NUM
        ,'@'                                                                                             as UNITS_CD
        ,cast(DATE_CASE_COMPLETED_N2090 as TIMESTAMP)                                                    as END_DATE
        ,'@'                                                                                             as LOCATION_CD
        ,cast(null as  text)                                                                             as OBSERVATION_BLOB
        ,cast(null as  integer)                                                                          as CONFIDENCE_NUM
        ,CURRENT_TIMESTAMP                                                                               as UPDATE_DATE
        ,CURRENT_TIMESTAMP                                                                               as DOWNLOAD_DATE
        ,CURRENT_TIMESTAMP                                                                               as IMPORT_DATE
        ,cast(null as VARCHAR(50))                                                                       as SOURCESYSTEM_CD
        ,cast(null as  integer)                                                                          as UPLOAD_ID
    from DEIDENTIFIED_PCORNET_CDM.CDM.DEID_TUMOR
    unpivot (
        concept_cd for concept IN ({column_list_str})
    )
    order by patid
)
union all
(
    select 
        -1 :: NUMBER(38, 0)                                                                              as ENCOUNTER_NUM
        ,PATID :: NUMBER(38, 0)                                                                          as PATIENT_NUM
        ,concat('NAACCR|MORPH:',histologic_type_icd_o3_n522, '/',behavior_code_icd_o3_n523)              as CONCEPT_CD
        ,'@'                                                                                             as PROVIDER_ID
        ,DATE_CASE_INITIATED_N2085 :: TIMESTAMP                                                          as START_DATE
        ,'@'                                                                                             as MODIFIER_CD
        ,1                                                                                               as INSTANCE_NUM
        ,''                                                                                              as VALTYPE_CD
        ,''                                                                                              as TVAL_CHAR
        ,cast(null as  integer)                                                                          as NVAL_NUM
        ,''                                                                                              as VALUEFLAG_CD
        ,cast(null as  integer)                                                                          as QUANTITY_NUM
        ,'@'                                                                                             as UNITS_CD
        ,cast(DATE_CASE_COMPLETED_N2090 as TIMESTAMP)                                                    as END_DATE
        ,'@'                                                                                             as LOCATION_CD
        ,cast(null as  text)                                                                             as OBSERVATION_BLOB
        ,cast(null as  integer)                                                                          as CONFIDENCE_NUM
        ,CURRENT_TIMESTAMP                                                                               as UPDATE_DATE
        ,CURRENT_TIMESTAMP                                                                               as DOWNLOAD_DATE
        ,CURRENT_TIMESTAMP                                                                               as IMPORT_DATE
        ,cast(null as VARCHAR(50))                                                                       as SOURCESYSTEM_CD
        ,cast(null as  integer)                                                                          as UPLOAD_ID
    from {source}
)
;
"""
unpivot_query

"\nCREATE OR REPLACE VIEW TUMOR_FACT AS\n(\n    select \n        -1 :: NUMBER(38, 0)                                                                              as ENCOUNTER_NUM\n        ,PATID :: NUMBER(38, 0)                                                                          as PATIENT_NUM\n        ,concat('NAACCR|', split_part(concept, '_N', -1), ':', coalesce(concept_cd, ''))                 as CONCEPT_CD\n        ,'@'                                                                                             as PROVIDER_ID\n        ,DATE_CASE_INITIATED_N2085 :: TIMESTAMP                                                          as START_DATE\n        ,'@'                                                                                             as MODIFIER_CD\n        ,1                                                                                               as INSTANCE_NUM\n        ,''                                                                                    

In [None]:
# inner join {target_schema}.patient_crosswalk as pc
# using (patid)
# inner join {target_schema}.encounter_crosswalk as ec
# using (ENCOUNTERID);  

In [34]:
# Execute the unpivot SQL query
# cur.execute(f"DROP TABLE IF EXISTS {database_name}.{schema_name}.{table_name}")
cur.execute(unpivot_query)

# Close the cursor and connection
cur.close()
con.close()

print("View created successfully.")

View created successfully.


In [35]:
run_con = sc.connect(
    user = config["USERNAME"],
    password = config["PASSWORD"],
    account = config["ACCOUNT"],
    warehouse = config["WAREHOUSE"],
    role = config["ROLE"],
    database = config["TARGET_DB"],
    schema = config["METADATA_SCHEMA"]
)
# Create a cursor object
run_cur = run_con.cursor()

run_cur.execute(f"call RUNTOTALNUM( 'TUMOR_FACT', 'I2B2DATA', 'NAACCR_ONTOLOGY')")

<snowflake.connector.cursor.SnowflakeCursor at 0x137cf10d0>