In [None]:
import pandas as pd
import os
import datetime
from dotenv import dotenv_values
import snowflake.connector as sc
import snowflake.connector.pandas_tools as snp
import re

In [None]:
config = dotenv_values('env/.env')

con = sc.connect(
    user = config["user"],
    password = config["password"],
    account = config["account"],
    warehouse = config["warehouse"],
    role = config["role"],
    database = config["database"],
    schema = 'I2B2DATA'
)
# Create a cursor object
cur = con.cursor()

In [None]:
# Fetch the column names
query = f"""
SELECT column_name from DEIDENTIFIED_PCORNET_CDM.information_schema.columns
    where 
        table_name = 'DEID_TUMOR' 
        and table_schema = 'CDM'
        and lower(column_name) like lower('%_N%') 
        and lower(column_name) not like lower('RAW_%')
        and data_type = 'TEXT';
"""
cur.execute(query)
columns = cur.fetchall()
columns_to_exclude = {'DATE_CASE_INITIATED_N2085','DATE_CASE_COMPLETED_N2090', 'PATID'}
column_list = [col[0] for col in columns if col[0] not in columns_to_exclude]

# # Construct the list of column names
column_list_str = ", ".join(column_list)
column_list_str

In [None]:
# Construct the unpivot SQL query

# Table name
table_name = 'TUMOR_FACT'
schema_name = 'I2B2DATA'
database_name = 'I2B2_DEV'

unpivot_query = f"""
CREATE OR REPLACE VIEW {database_name}.{schema_name}.{table_name} AS
(
    select 
        -1 :: NUMBER(38, 0)                                                                              as ENCOUNTER_NUM
        ,PATID :: NUMBER(38, 0)                                                                          as PATIENT_NUM
        ,concat('NAACCR|', split_part(concept, '_N', -1), ':', coalesce(concept_cd, ''))                 as CONCEPT_CD
        ,'@'                                                                                             as PROVIDER_ID
        ,DATE_CASE_INITIATED_N2085 :: TIMESTAMP                                                          as START_DATE
        ,'@'                                                                                             as MODIFIER_CD
        ,1                                                                                               as INSTANCE_NUM
        ,''                                                                                              as VALTYPE_CD
        ,''                                                                                              as TVAL_CHAR
        ,cast(null as  integer)                                                                          as NVAL_NUM
        ,''                                                                                              as VALUEFLAG_CD
        ,cast(null as  integer)                                                                          as QUANTITY_NUM
        ,'@'                                                                                             as UNITS_CD
        ,cast(DATE_CASE_COMPLETED_N2090 as TIMESTAMP)                                                    as END_DATE
        ,'@'                                                                                             as LOCATION_CD
        ,cast(null as  text)                                                                             as OBSERVATION_BLOB
        ,cast(null as  integer)                                                                          as CONFIDENCE_NUM
        ,CURRENT_TIMESTAMP                                                                               as UPDATE_DATE
        ,CURRENT_TIMESTAMP                                                                               as DOWNLOAD_DATE
        ,CURRENT_TIMESTAMP                                                                               as IMPORT_DATE
        ,cast(null as VARCHAR(50))                                                                       as SOURCESYSTEM_CD
        ,cast(null as  integer)                                                                          as UPLOAD_ID
    from DEIDENTIFIED_PCORNET_CDM.CDM.DEID_TUMOR
    unpivot (
        concept_cd for concept IN ({column_list_str})
    )
    order by patid
)
union all
(
    select 
        -1 :: NUMBER(38, 0)                                                                              as ENCOUNTER_NUM
        ,PATID :: NUMBER(38, 0)                                                                          as PATIENT_NUM
        ,concat('NAACCR|MORPH:',histologic_type_icd_o3_n522, '/',behavior_code_icd_o3_n523)              as CONCEPT_CD
        ,'@'                                                                                             as PROVIDER_ID
        ,DATE_CASE_INITIATED_N2085 :: TIMESTAMP                                                          as START_DATE
        ,'@'                                                                                             as MODIFIER_CD
        ,1                                                                                               as INSTANCE_NUM
        ,''                                                                                              as VALTYPE_CD
        ,''                                                                                              as TVAL_CHAR
        ,cast(null as  integer)                                                                          as NVAL_NUM
        ,''                                                                                              as VALUEFLAG_CD
        ,cast(null as  integer)                                                                          as QUANTITY_NUM
        ,'@'                                                                                             as UNITS_CD
        ,cast(DATE_CASE_COMPLETED_N2090 as TIMESTAMP)                                                    as END_DATE
        ,'@'                                                                                             as LOCATION_CD
        ,cast(null as  text)                                                                             as OBSERVATION_BLOB
        ,cast(null as  integer)                                                                          as CONFIDENCE_NUM
        ,CURRENT_TIMESTAMP                                                                               as UPDATE_DATE
        ,CURRENT_TIMESTAMP                                                                               as DOWNLOAD_DATE
        ,CURRENT_TIMESTAMP                                                                               as IMPORT_DATE
        ,cast(null as VARCHAR(50))                                                                       as SOURCESYSTEM_CD
        ,cast(null as  integer)                                                                          as UPLOAD_ID
    from DEIDENTIFIED_PCORNET_CDM.CDM.DEID_TUMOR
)
;
"""
unpivot_query

In [None]:
# Execute the unpivot SQL query
# cur.execute(f"DROP TABLE IF EXISTS {database_name}.{schema_name}.{table_name}")
cur.execute(unpivot_query)

# Close the cursor and connection
cur.close()
con.close()

print("View created successfully.")

In [None]:
config = dotenv_values('env/.env')

run_con = sc.connect(
    user = config["user"],
    password = config["password"],
    account = config["account"],
    warehouse = config["warehouse"],
    role = config["role"],
    database = config["database"],
    schema = config["schema"]
)
# Create a cursor object
run_cur = run_con.cursor()

run_cur.execute(f"call RUNTOTALNUM( 'TUMOR_FACT', 'I2B2DATA', 'NAACCR_ONTOLOGY')")