# Create ReadV2 and ICD10 look-up tables 

To do before running this notebook in SAIL: 
1.  import processed .csv files 
2.  save them in CONC_IMP
3.  move any old version into old version folder 
4. change v_n in the code to the version number imported
5. change the name of the folder in MELDB_CONC_READ and MELDB_CONC_ICD10
6. check if it is necessary to update the sensitive code list

In [None]:
#libraries 
import SAIL_python
import pandas as pd 
import numpy as np 
import glob
import os
import re
from unidecode import unidecode

con = SAIL_python.SAILConnection()
con.connect()

## Count(s)
Check how many readv2 and ICD10 concepts are imported vs the number of concepts already available in the database 

Note: Manually modify the version number on the imported folder

In [None]:
#sensitive read codes 
READ_SENSITIVE = os.path.join('./', 'CONC_IMP', 'SAIL_sensitive_READ_2023_11_18.csv')
read_sensitive = pd.read_csv(READ_SENSITIVE, usecols = ['code'])

ICD10_SENSITIVE = os.path.join('./', 'CONC_IMP', 'SAIL_sensitive_ICD10_2023_11_18.csv')
icd10_sensitive = pd.read_csv(ICD10_SENSITIVE)

#list of CONCEPTS classified as sensitive - it can be modified manually as needed 
sens_conc = ['HIVAIDS', 'VIRAL_HEPATITIS']

In [None]:
#check new/updated imported concepts from csv 
v_n = '2_2_3'

MELDB_CONC_ICD10 = os.path.join('./', 'CONC_IMP/concepts-output-main-icd10_2_2_3/icd10')
MELDB_CONC_READ = os.path.join('./', 'CONC_IMP/concepts-output-main-readv2_2_2_3/readv2')

#extract name of the concepts imported in icd10 and in readv2 
new_read_concepts = [sub[: -4] for sub in os.listdir(MELDB_CONC_READ)]
new_icd10_concepts = [sub[: -4] for sub in os.listdir(MELDB_CONC_ICD10)]

#print 
print('New/updated concepts READ:', len(new_read_concepts))
print('New/updated concepts ICD10:', len(new_icd10_concepts))

In [None]:
#check conceptsalready available in teh database

#name of the view in SAIL -> this gives us the list of the concepts already available in SAIL 
q_read = """
        SELECT * FROM SYSCAT.TABLES WHERE TABSCHEMA = 'SAILW1377V' AND TABNAME LIKE 'CONC_READ_%' AND type = 'V'
        """
q_icd10 = """
        SELECT * FROM SYSCAT.TABLES WHERE TABSCHEMA = 'SAILW1377V' AND TABNAME LIKE 'CONC_ICD10_%' AND type = 'V'
         """

read = con.query(q_read, echo_level = 0)['TABNAME']
read_SAIL = [sub[10:] for sub in read.to_list()]

icd10 = con.query(q_icd10, echo_level = 0)['TABNAME']
icd10_SAIL = [sub[11:] for sub in icd10.to_list()]

print('concepts in SAIL (READ):', len(read_SAIL))
print('concepts in SAIL (ICD10):', len(icd10_SAIL))

In [None]:
#concepts in SAIL in/not in new/updated
read_update = set(read_SAIL).intersection(new_read_concepts)
icd10_update = set(icd10_SAIL).intersection(new_icd10_concepts)

read_not_update = set(read_SAIL).difference(new_read_concepts)
icd10_not_update = set(icd10_SAIL).difference(new_icd10_concepts)

In [None]:
print('Concepts not updated (READ):', read_not_update)
print('Concepts not updated (ICD10):', icd10_not_update)

print('Concepts updated (READ):', read_update)
print('Concepts updated (ICD10):', icd10_update)

## Check changes
We check if the new imported csv for each concept is different from the concept's look-up table/view available in SAIL. 
If there are no differences, this concept should be flagged as it should have not been imported in the TRE. 

### READ V2 

In [None]:
#array for the concepts that need to be flagged 
error_read = []

for i in read_update: 
    file = os.path.join(MELDB_CONC_READ, i + '.csv')
    df_new = pd.read_csv(file)
    
    #rename/drop column 
    df_new = df_new.rename(columns = {"code" : "READ_CODE", 'CONCEPT TYPE':'CONCEPT_TYPE', 'MELDB_concept':'MELDB_CONCEPT'})
    df_new = df_new[['READ_CODE', 'MELDB_CONCEPT','CONCEPT_TYPE']].sort_values(by = ['READ_CODE'], ascending = True).reset_index(drop = True)
    
    #VIEW 
    q = """
        SELECT READ_CODE, MELDB_CONCEPT, CONCEPT_TYPE FROM SAILW1377V.CONC_READ_%s
        """ % (i) 
    
    SAIL = con.query(q, echo_level = 0)
    SAIL = SAIL.sort_values(by = ['READ_CODE'], ascending = True).reset_index(drop = True)
       
    #check content 
    if SAIL.equals(df_new):
        error_read.append(i)
        print('Error: ', i, ' does not have any change')
    
    else: 
        pass

print('The following concepts are the same:', error_read)

### ICD10

In [None]:
error_icd10 = []

for i in icd10_update: 
    file = os.path.join(MELDB_CONC_ICD10, i + '.csv')
    df_new = pd.read_csv(file)
    
    #rename/drop column 
    df_new = df_new.rename(columns = {"code" : "ICD10_CODE", 'CONCEPT TYPE':'CONCEPT_TYPE', 'MELDB_concept':'MELDB_CONCEPT'})
    df_new = df_new[['ICD10_CODE', 'MELDB_CONCEPT','CONCEPT_TYPE']].sort_values(by = ['ICD10_CODE'], ascending = True).reset_index(drop = True)
    
    #VIEW 
    q = """
        SELECT ICD10_CODE, MELDB_CONCEPT, CONCEPT_TYPE FROM SAILW1377V.CONC_ICD10_%s
        """ % (i) 
    
    SAIL = con.query(q, echo_level = 0)
    SAIL = SAIL.sort_values(by = ['ICD10_CODE'], ascending = True).reset_index(drop = True)
       
    #check content 
    if SAIL.equals(df_new):
        error_icd10.append(i)
        print('Error: ', i, ' does not have any change')
    
    else: 
        pass

print('The following concepts are the same:', error_icd10)

## Create new tables with version 

We create new tables for the new/update concepts (both READ and ICD10) adding the version number at the end. 
We also create new .csv files to store in CONC_FIN folder for each new concept .csv 

#### READ tables

In [None]:
for i in read_update:    
    print(i)
    temp_table_name = 'TEMP_CONC_READ_' + str(i)

    #csv of th eupdated/new concept to dataframe 
    file = os.path.join(MELDB_CONC_READ, i + '.csv')
    conc_i = pd.read_csv(file)
    conc_i = conc_i.drop('description', axis = 1)
    conc_i = conc_i.rename(columns = {"code" : "READ_CODE", 'CONCEPT TYPE':'CONCEPT_TYPE', 'MELDB_concept':'MELDB_CONCEPT'})

    #check if conc is one of the sensitive concepts 
    if i in sens_conc: 
        conc_i['SENSITIVE_CODE'] = 1 
        conc_i = conc_i.astype(str)
    else:
    #adding sensitive codes 
        conc_i['SENSITIVE_CODE'] = np.where(conc_i['READ_CODE'].isin(read_sensitive['code']), '1', '0')
        conc_i = conc_i.astype(str)
    
#    #creating temp table in the database to join SAILUKHDV.READ
    con.dataframe_to_table('SAILW1377V', temp_table_name, conc_i)

    #creating final table on the database 
    #table name 
    table_name = 'CONC_READ_' + str(i) + '_' + str(v_n)
    view_name = 'CONC_READ_' + str(i)

    if i == 'ALL_MEDICATIONS': #slightly different because we have more columns 
        print('ALL_MED')
        
        q1 = """
            CREATE TABLE SAILW1377V.%s (
            READ_CODE VARCHAR(5), 
            DESCRIPTION_SAIL VARCHAR(200), 
            WILK_READCODE VARCHAR(5), 
            DRUG_CATEGORY VARCHAR(100), 
            CONCEPT_TYPE VARCHAR(100), 
            SENSITIVE_CODE INTEGER,
            MELDB_CONCEPT VARCHAR(100)
            ); 
            """  % (table_name) 

        table1 = con.query(q1, echo_level = 0)

        q2 = """
             INSERT INTO SAILW1377V.%s
             SELECT DISTINCT
                 A.READ_CODE,
                 CASE WHEN B.PREF_TERM_198 IS NOT NULL THEN B.PREF_TERM_198
                      WHEN B.PREF_TERM_198 IS NULL AND B.PREF_TERM_60 IS NOT NULL THEN B.PREF_TERM_60 
                      WHEN B.PREF_TERM_198 IS NULL AND B.PREF_TERM_60 IS NULL THEN B.PREF_TERM_30 
                 END, 
                 A.WILK_READCODE, A.DRUG_CATEGORY, A.CONCEPT_TYPE, A.SENSITIVE_CODE, A.MELDB_CONCEPT
             FROM 
                 SAILW1377V.%s A
             JOIN 
                 SAILUKHDV.READ_CD_CV2_SCD B 
             ON 
                 A.READ_CODE = B.READ_CODE 
             WHERE 
                EFFECTIVE_TO IS NULL
             ORDER BY 
                READ_CODE;
            """ % (table_name, temp_table_name)

        insert = con.query(q2, echo_level = 0)
    
    else: 
        print('NOT all MED')
        q1 = """
            CREATE TABLE SAILW1377V.%s (
            READ_CODE VARCHAR(10),  
            DESCRIPTION_SAIL VARCHAR(198), 
            SENSITIVE_CODE SMALLINT, 
            MELDB_CONCEPT VARCHAR(100), 
            CONCEPT_TYPE VARCHAR(300)
            ); 
            """  % (table_name) 

        table1 = con.query(q1, echo_level = 0)

        q2 = """
             INSERT INTO SAILW1377V.%s
             SELECT DISTINCT
                 A.READ_CODE,
                 CASE WHEN B.PREF_TERM_198 IS NOT NULL THEN B.PREF_TERM_198
                      WHEN B.PREF_TERM_198 IS NULL AND B.PREF_TERM_60 IS NOT NULL THEN B.PREF_TERM_60 
                      WHEN B.PREF_TERM_198 IS NULL AND B.PREF_TERM_60 IS NULL THEN B.PREF_TERM_30 
                 END, 
                 A.SENSITIVE_CODE, A.MELDB_CONCEPT, A.CONCEPT_TYPE
             FROM 
                 SAILW1377V.%s A
             JOIN 
                 SAILUKHDV.READ_CD_CV2_SCD B 
             ON 
                 A.READ_CODE = B.READ_CODE 
             WHERE 
                EFFECTIVE_TO IS NULL
             ORDER BY 
                READ_CODE;
            """ % (table_name,  temp_table_name)

        insert = con.query(q2, echo_level = 0)
    
    
    #drop temp table
    q3 = """     
         DROP TABLE SAILW1377V.%s
         """ % (temp_table_name)

    drop_table = con.query(q3, echo_level = 0)
    
    #Create the new view associated -> drop view if available and create new one 
    
    if i not in read_SAIL:
        print('view create:', i)
        q4 = """
            CREATE VIEW SAILW1377V.%s AS SELECT * FROM SAILW1377V.%s
            """ % (table_name, table_name)
        create = con.query(q4, echo_level = 0)
        
    else:
        print('view drop and create:', i)
        q5 = """
            DROP VIEW SAILW1377V.%s
            """ % (view_name)
    
        drop_view = con.query(q5, echo_level = 0)
    
        q4 = """
            CREATE VIEW SAILW1377V.%s AS SELECT * FROM SAILW1377V.%s
            """ % (view_name, table_name)
        create = con.query(q4, echo_level = 0)
    

    #create a xlsx file for each i  
    file_name = 'CONC_READ_{}_{}.xlsx'.format(i, v_n) 
    conc_i.to_excel(os.path.join('./', 'CONC_FIN', file_name), index = False)

#### ICD10 tables

In [None]:
for i in icd10_update:    
    print(i)
    temp_table_name = 'TEMP_CONC_ICD10_' + str(i)

    #csv of th eupdated/new concept to dataframe 
    file = os.path.join(MELDB_CONC_ICD10, i + '.csv')
    conc_i = pd.read_csv(file)
    conc_i = conc_i.drop('description', axis = 1)
    conc_i = conc_i.rename(columns = {"code" : "ICD10_CODE", 'CONCEPT TYPE':'CONCEPT_TYPE', 'MELDB_concept':'MELDB_CONCEPT'})
    conc_i['ICD10_3'] = conc_i['ICD10_CODE'].str[0:3]
    #conc_i = conc_i.rename(columns = {"code" : "READ_CODE", 'CONCEPT TYPE':'CONCEPT_TYPE', 'MELDB_concept':'MELDB_CONCEPT'})

    #check if conc is one of the sensitive concepts 
    if i in sens_conc: 
        conc_i['SENSITIVE_CODE'] = 1 
        conc_i = conc_i.astype(str)
    else:
    #adding sensitive codes 
        conc_i['SENSITIVE_CODE'] = np.where(conc_i['ICD10_CODE'].isin(read_sensitive['code']), '1', '0')
        conc_i = conc_i.astype(str)
    
    #creating temp table in the database to join SAILUKHDV.READ
    con.dataframe_to_table('SAILW1377V', temp_table_name, conc_i)

    #creating final table on the database 
    #table name 
    table_name = 'CONC_ICD10_' + str(i) + '_' + str(v_n)
    view_name = 'CONC_ICD10_' + str(i)

    
    q1 = """
            CREATE TABLE SAILW1377V.%s (
            ICD10_CODE VARCHAR(5), 
            DESCRIPTION_SAIL VARCHAR(200), 
            ICD10_3 VARCHAR(5),
            DESCRIPTION_SAIL_3DIGITS VARCHAR(200), 
            CODE_LENGTH SMALLINT, 
            SENSITIVE_CODE SMALLINT, 
            MELDB_CONCEPT VARCHAR(100), 
            CONCEPT_TYPE VARCHAR(300)
            ); 
            """  % (table_name)

    table1 = con.query(q1, echo_level = 0)

    q2 = """
        INSERT INTO SAILW1377V.%s
        WITH T1 AS (
            SELECT DISTINCT 
                C.CODE, C.DESCRIPTION AS DES
            FROM 
                SAILUKHDV.ICD10_CODES_AND_TITLES_AND_METADATA C
            JOIN 
                SAILW1377V.%s D 
            ON 
                C.CODE = LEFT(D.ICD10_CODE , 3)
            WHERE 
                EFFECTIVE_TO IS NULL 
         )
        (SELECT DISTINCT
            A.ICD10_CODE,
            B.DESCRIPTION AS DESCRIPTION_SAIL, 
            A.ICD10_3, 
            (SELECT DISTINCT T1.DES FROM T1 WHERE T1.CODE = LEFT(A.ICD10_CODE,3)) AS DESCRIPTION_SAIL_3DIGITS, 
            LENGTH(RTRIM(A.ICD10_CODE)) AS CODE_LENGTH, 
            A.SENSITIVE_CODE, A.MELDB_CONCEPT, A.CONCEPT_TYPE
        FROM 
            SAILW1377V.%s A
        LEFT JOIN 
            T1 
        ON 
            T1.CODE = A.ICD10_CODE
        JOIN 
            SAILUKHDV.ICD10_CODES_AND_TITLES_AND_METADATA B
        ON
            A.ICD10_CODE = B.ALT_CODE
        WHERE 
            EFFECTIVE_TO IS NULL 
            AND LENGTH(RTRIM(A.ICD10_CODE)) >= 4
        ORDER BY 
            ICD10_CODE
        )
        UNION
        (SELECT DISTINCT
            A.ICD10_CODE,
            B.DESCRIPTION AS DESCRIPTION_SAIL, 
            A.ICD10_3, 
            B.DESCRIPTION AS DESCRIPTION_SAIL_3DIGITS, 
            LENGTH(RTRIM(A.ICD10_CODE)) AS CODE_LENGTH, 
            A.SENSITIVE_CODE, A.MELDB_CONCEPT, A.CONCEPT_TYPE
        FROM 
            SAILW1377V.%s A
        JOIN 
            SAILUKHDV.ICD10_CODES_AND_TITLES_AND_METADATA B
        ON
            A.ICD10_CODE = B.code
        WHERE 
            EFFECTIVE_TO IS NULL
            AND length(a.ICD10_CODE) = 3 
        ORDER BY 
            ICD10_CODE
        );
            """ % (table_name, temp_table_name, temp_table_name, temp_table_name)


    insert = con.query(q2, echo_level = 0)


    #drop temp table
    q3 = """     
         DROP TABLE SAILW1377V.%s
         """ % (temp_table_name)

    drop_table = con.query(q3, echo_level = 0)

#Create the new view associated -> drop view if available and create new one 

    if i not in icd10_SAIL:
        print('view create:', i)
        q4 = """
            CREATE VIEW SAILW1377V.%s AS SELECT * FROM SAILW1377V.%s
            """ % (table_name, table_name)
        create = con.query(q4, echo_level = 0)

    else:
        print('view drop and create:', i)
        q5 = """
            DROP VIEW SAILW1377V.%s
            """ % (view_name)

        drop_view = con.query(q5, echo_level = 0)

        q4 = """
            CREATE VIEW SAILW1377V.%s AS SELECT * FROM SAILW1377V.%s
            """ % (view_name, table_name)
        create = con.query(q4, echo_level = 0)


    #create a xlsx file for each i  
    file_name = 'CONC_ICD10_{}_{}.xlsx'.format(i, v_n) 
    conc_i.to_excel(os.path.join('./', 'CONC_FIN', file_name), index = False)