From 044935c6493b62bebf479760adc78172123cc090 Mon Sep 17 00:00:00 2001 From: Irina Date: Mon, 1 Aug 2022 18:52:54 +0300 Subject: [PATCH 01/21] ICDs refresh scripts update --- ICD10/manual_work/create_manual_table.sql | 4 + ICD10/manual_work/crm_changes.sql | 8 +- ICD10CM/manual_work/create_manual_table.sql | 5 +- ICD10CM/manual_work/crm_changes.sql | 33 ++- ICD10CM/manual_work/mapping_refresh.sql | 2 + ICD10CN/manual_work/create_manual_table.sql | 31 +++ ICD10CN/manual_work/crm_changes.sql | 101 ++++++++ ICD10CN/manual_work/mapping_refresh.sql | 254 ++++++++++++++++++++ ICD10CN/manual_work/readme.md | 69 ++++++ ICD9CM/manual_work/create_manual_table.sql | 31 +++ 10 files changed, 534 insertions(+), 4 deletions(-) create mode 100644 ICD10CN/manual_work/create_manual_table.sql create mode 100644 ICD10CN/manual_work/crm_changes.sql create mode 100644 ICD10CN/manual_work/mapping_refresh.sql create mode 100644 ICD10CN/manual_work/readme.md create mode 100644 ICD9CM/manual_work/create_manual_table.sql diff --git a/ICD10/manual_work/create_manual_table.sql b/ICD10/manual_work/create_manual_table.sql index 740655987..29fcea2cd 100644 --- a/ICD10/manual_work/create_manual_table.sql +++ b/ICD10/manual_work/create_manual_table.sql @@ -18,6 +18,8 @@ * Date: 2021 **************************************************************************/ +DROP TABLE IF EXISTS refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done; CREATE TABLE refresh_lookup_done ( icd_code VARCHAR, icd_name VARCHAR, @@ -27,3 +29,5 @@ repl_by_code VARCHAR, repl_by_name VARCHAR, repl_by_domain VARCHAR, repl_by_vocabulary VARCHAR); + +SELECT*FROM refresh_lookup_done; diff --git a/ICD10/manual_work/crm_changes.sql b/ICD10/manual_work/crm_changes.sql index 897bd3bee..d3c4b2091 100644 --- a/ICD10/manual_work/crm_changes.sql +++ b/ICD10/manual_work/crm_changes.sql @@ -30,6 +30,10 @@ $body$ END $body$; +TRUNCATE TABLE dev_icd10.concept_relationship_manual; +INSERT INTO dev_icd10.concept_relationship_manual +SELECT*FROM dev_icd10.concept_relationship_manual_backup_2022_04_25; + -- deprecate previous inaccurate mapping UPDATE concept_relationship_manual crm SET invalid_reason = 'D', @@ -87,4 +91,6 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula vocabulary_id_2, relationship_id FROM concept_relationship_manual) ) -; \ No newline at end of file +; + +SELECT * FROM concept_relationship_manual; \ No newline at end of file diff --git a/ICD10CM/manual_work/create_manual_table.sql b/ICD10CM/manual_work/create_manual_table.sql index 576bb3eca..682582424 100644 --- a/ICD10CM/manual_work/create_manual_table.sql +++ b/ICD10CM/manual_work/create_manual_table.sql @@ -17,6 +17,7 @@ * Date: 2021 **************************************************************************/ DROP TABLE IF EXISTS refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done; CREATE TABLE refresh_lookup_done ( icd_code VARCHAR, icd_name VARCHAR, @@ -25,4 +26,6 @@ repl_by_id INT, repl_by_code VARCHAR, repl_by_name VARCHAR, repl_by_domain VARCHAR, -repl_by_vocabulary VARCHAR); \ No newline at end of file +repl_by_vocabulary VARCHAR); + +SELECT*FROM refresh_lookup_done; \ No newline at end of file diff --git a/ICD10CM/manual_work/crm_changes.sql b/ICD10CM/manual_work/crm_changes.sql index a12885420..88b1fdfd9 100644 --- a/ICD10CM/manual_work/crm_changes.sql +++ b/ICD10CM/manual_work/crm_changes.sql @@ -30,6 +30,30 @@ $body$ END $body$; +--restore concept_relationship_manual table (run it only if something went wrong) +TRUNCATE TABLE dev_icd10cm.concept_relationship_manual; +INSERT INTO dev_icd10cm.concept_relationship_manual +SELECT * FROM dev_icd10cm.concept_relationship_manual_backup_2022_04_21; + +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE FORMAT('create table %I as select * from concept_manual', + 'concept_manual_backup_' || update); + + END +$body$; + +--restore concept_manual table (run it only if something went wrong) +/*TRUNCATE TABLE dev_icd10cm.concept_manual; +INSERT INTO dev_icd10cm.concept_manual +SELECT * FROM dev_icd10cm.concept_manual_backup_2022_06_01;*/ + + -- deprecate previous inaccurate mapping UPDATE concept_relationship_manual crm SET invalid_reason = 'D', @@ -80,12 +104,17 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula concept_code_2, --to the same concept_code vocabulary_id_1, vocabulary_id_2, --of the same vocabulary - relationship_id) --with the same relationship + relationship_id, --with the same relationship + invalid_reason) NOT IN (SELECT concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, - relationship_id FROM concept_relationship_manual) + relationship_id, + invalid_reason FROM concept_relationship_manual + ) ) ; + + diff --git a/ICD10CM/manual_work/mapping_refresh.sql b/ICD10CM/manual_work/mapping_refresh.sql index 5a1bcdd91..869bb709e 100644 --- a/ICD10CM/manual_work/mapping_refresh.sql +++ b/ICD10CM/manual_work/mapping_refresh.sql @@ -252,3 +252,5 @@ SELECT * FROM p_map UNION SELECT * FROM t4 ORDER BY icd_code; + +SELECT*FROM refresh_lookup; diff --git a/ICD10CN/manual_work/create_manual_table.sql b/ICD10CN/manual_work/create_manual_table.sql new file mode 100644 index 000000000..c8a409d0a --- /dev/null +++ b/ICD10CN/manual_work/create_manual_table.sql @@ -0,0 +1,31 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ +DROP TABLE IF EXISTS refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done; +CREATE TABLE refresh_lookup_done ( +icd_code VARCHAR, +icd_name VARCHAR, +repl_by_relationship VARCHAR, +repl_by_id INT, +repl_by_code VARCHAR, +repl_by_name VARCHAR, +repl_by_domain VARCHAR, +repl_by_vocabulary VARCHAR); + +SELECT*FROM refresh_lookup_done; \ No newline at end of file diff --git a/ICD10CN/manual_work/crm_changes.sql b/ICD10CN/manual_work/crm_changes.sql new file mode 100644 index 000000000..86477b353 --- /dev/null +++ b/ICD10CN/manual_work/crm_changes.sql @@ -0,0 +1,101 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ +-- create current date backup of concept_relationship_manual table +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE format('create table %I as select * from concept_relationship_manual', + 'concept_relationship_manual_backup_' || update); + + END +$body$; + +TRUNCATE TABLE dev_icd10cn.concept_relationship_manual; +INSERT INTO dev_icd10cn.concept_relationship_manual +SELECT*FROM dev_icd10cn.concept_relationship_manual_backup_2022_05_16; + + +-- deprecate previous inaccurate mapping +UPDATE concept_relationship_manual crm +SET invalid_reason = 'D', + valid_end_date = current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order to preserve the original deprecation date + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND NOT EXISTS (SELECT 1 --don't deprecate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + +-- insert new mapping +with mapping AS -- select all new codes with their mappings from manual file + ( + SELECT DISTINCT icd_code AS concept_code_1, + repl_by_code AS concept_code_2, + 'ICD10CN' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 + repl_by_vocabulary AS vocabulary_id_2, + repl_by_relationship AS relationship_id, + current_date AS valid_start_date, -- set the date of the refresh as valid_start_date + to_date('20991231','yyyymmdd') AS valid_end_date, + NULL AS invalid_reason -- make all new mappings valid + FROM refresh_lookup_done + WHERE repl_by_id != 0 -- select only codes with mapping to standard concepts + ) +-- insert new mappings into concept_relationship_manual table +INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason) +( + SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + valid_start_date, + valid_end_date, + invalid_reason + FROM mapping m + -- don't insert codes with mapping if the same exists in the current manual file + WHERE (concept_code_1, --the same source_code is mapped + concept_code_2, --to the same concept_code + vocabulary_id_1, + vocabulary_id_2, --of the same vocabulary + relationship_id, --with the same relationship + invalid_reason) + NOT IN (SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + invalid_reason FROM concept_relationship_manual + ) + ) +; + + + diff --git a/ICD10CN/manual_work/mapping_refresh.sql b/ICD10CN/manual_work/mapping_refresh.sql new file mode 100644 index 000000000..6e4ef64b7 --- /dev/null +++ b/ICD10CN/manual_work/mapping_refresh.sql @@ -0,0 +1,254 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Dmitry Dymshyts, Polina Talapova, Daryna Ivakhnenko +* Date: 2021 +**************************************************************************/ +DROP TABLE IF EXISTS refresh_lookup; +CREATE TABLE refresh_lookup AS WITH miss_map +AS +( + -- 'deprecated mapping' + SELECT c.concept_code AS icd_code, + c.concept_name AS icd_name, + a.relationship_id AS current_relationship, + b.concept_id AS current_id, + b.concept_code AS current_code, + b.concept_name AS current_name, + b.domain_id AS current_domain, + b.vocabulary_id AS current_vocabulary, + 'deprecated mapping' AS reason +FROM concept_relationship_stage a + JOIN concept b + ON a.concept_code_2 = b.concept_code + AND b.vocabulary_id = a.vocabulary_id_2 + AND a.relationship_id IN ('Maps to', 'Maps to value') + AND b.invalid_reason IN ('D', 'U') + JOIN concept_stage c + ON a.concept_code_1 = c.concept_code + AND c.concept_class_id NOT IN ('ICD10 Chapter','ICD10 SubChapter') +AND a.invalid_reason IS NULL +UNION +-- 'non-standard mapping' +SELECT c.concept_code, + c.concept_name, + a.relationship_id AS current_relationship, + b.concept_id AS current_id, + b.concept_code AS current_code, + b.concept_name AS current_name, + b.domain_id AS current_domain, + b.vocabulary_id AS current_vocabulary, + 'non-standard mapping' AS reason +FROM concept_relationship_stage a + JOIN concept b + ON a.concept_code_2 = b.concept_code + AND b.vocabulary_id = a.vocabulary_id_2 + AND a.relationship_id IN ('Maps to', 'Maps to value') + AND b.invalid_reason IS NULL + AND b.standard_concept IS NULL + JOIN concept_stage c + ON a.concept_code_1 = c.concept_code + AND c.concept_class_id NOT IN ('ICD10 Chapter','ICD10 SubChapter') +UNION +-- 'without mapping' +SELECT a.concept_code, + a.concept_name, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + 'without mapping' AS reason +FROM concept_stage a + LEFT JOIN concept_relationship_stage r + ON a.concept_code = concept_code_1 + AND r.relationship_id IN ('Maps to') + AND r.invalid_reason IS NULL + LEFT JOIN concept b + ON b.concept_code = concept_code_2 + AND b.vocabulary_id = vocabulary_id_2 +WHERE a.vocabulary_id = 'ICD10CN' +AND a.invalid_reason IS NULL +AND b.concept_id IS NULL +AND a.concept_class_id NOT IN ('ICD10 Chapter','ICD10 SubChapter')), +--brothers of depracted concepts: for cases when source concept has 1-to-many mapping and one of the target concepts is dead, we should see all other target concepts to create an accurate mapping +miss_map_brother AS ( SELECT + a.icd_code, + a.icd_name, + c.relationship_id, + b.concept_id AS current_id, + b.concept_code AS current_code, + b.concept_name AS current_name, + b.domain_id AS current_domain, + b.vocabulary_id AS current_vocabulary, + b.concept_id, + b.concept_code, + b.concept_name, + b.domain_id, + b.vocabulary_id, + 'brother of deprecated mapping' AS reason +FROM miss_map a +JOIN concept_relationship_stage c ON c.concept_code_1 = a.icd_code +JOIN concept b + ON c.concept_code_2 = b.concept_code + AND b.vocabulary_id = c.vocabulary_id_2 + AND c.relationship_id IN ('Maps to', 'Maps to value') + AND b.invalid_reason IS NULL), +-- concepts which mapping can be replaced through 'Maps to' relationship +t1 AS (SELECT d.concept_code AS icd_code, + d.concept_name AS icd_name, + d.domain_id AS icd_domain, + j.concept_id AS repl_by_id, + j.concept_code AS repl_by_code, + j.concept_name AS repl_by_name, + j.domain_id AS repl_by_domain, + j.vocabulary_id AS repl_by_vocabulary, + NULL + FROM concept_relationship_stage a + JOIN concept b + ON a.concept_code_2 = b.concept_code + AND b.vocabulary_id = a.vocabulary_id_2 + AND a.relationship_id = 'Maps to' + AND b.invalid_reason IN ('D', 'U') + JOIN concept_stage d + ON a.concept_code_1 = d.concept_code + JOIN concept_relationship r2 ON b.concept_id = r2.concept_id_1 + JOIN concept j + ON j.concept_id = r2.concept_id_2 + AND j.vocabulary_id = 'SNOMED'-- place for target vocabulary + AND j.standard_concept = 'S' + AND r2.relationship_id = 'Maps to' + WHERE a.concept_code_1 IN (SELECT icd_code FROM miss_map)), +-- concepts which mapping can be replaced through 'Concept replaces' relationship +t2 AS (SELECT d.concept_code AS icd_code, + d.concept_name AS icd_name, + d.domain_id AS icd_domain, + j.concept_id AS repl_by_id, + j.concept_code AS repl_by_code, + j.concept_name AS repl_by_name, + j.domain_id AS repl_by_domain, + j.vocabulary_id AS repl_by_vocabulary, + NULL + FROM concept_relationship_stage a + JOIN concept b + ON a.concept_code_2 = b.concept_code + AND b.vocabulary_id = a.vocabulary_id_2 + AND a.relationship_id = 'Maps to' + AND b.invalid_reason IN ('D', 'U') + JOIN concept_stage d + ON a.concept_code_1 = d.concept_code + JOIN concept_relationship r2 ON b.concept_id = r2.concept_id_1 + JOIN concept j + ON j.concept_id = r2.concept_id_2 + AND j.vocabulary_id = 'SNOMED' -- place for target vocabulary + AND j.standard_concept = 'S' + AND r2.relationship_id = 'Concept replaced by' + WHERE a.concept_code_1 IN (SELECT icd_code FROM miss_map)), +-- all concepts which can be remapped autimatically (however, should be reviewed) +t3 AS (SELECT * FROM t1 UNION SELECT * FROM t2), +-- look-up table with all concepts with deprecated mapping + automatically remapped +t4 AS (SELECT miss_map.icd_code, + miss_map.icd_name, + miss_map.current_relationship, + miss_map.current_id, + miss_map.current_code, + miss_map.current_name, + miss_map.current_domain, + miss_map.current_vocabulary, + t3.repl_by_id, + t3.repl_by_code, + t3.repl_by_name, + t3.repl_by_domain, + t3.repl_by_vocabulary, + miss_map.reason + FROM miss_map + LEFT JOIN t3 ON miss_map.icd_code = t3.icd_code + UNION + SELECT * FROM miss_map_brother), +-- improve_map - automatically detected mapping improvements. Look carefully! Target vocabulary could have the same names of concepts with different domain_ids. Also, ICD10 chapter means a lot and should be taken into account for choosing appropriate mapping +improve_map +AS +(SELECT DISTINCT +a.concept_code AS icd_code, + a.concept_name AS icd_name, + r.relationship_id AS current_relationship, + d.concept_id AS current_id, + d.concept_code AS current_code, + d.concept_name AS current_name, + d.domain_id AS current_domain, + d.vocabulary_id AS current_vocabulary, + c.concept_id AS repl_by_id, + code AS repl_by_code, + str AS repl_by_name, + c.domain_id AS repl_by_domain, + c.vocabulary_id AS repl_by_vocabulary, + 'improve_map' AS reason +FROM concept a +JOIN concept_relationship r ON r.concept_id_1 = a.concept_id AND a.vocabulary_id = 'ICD10CN' +JOIN concept d ON d.concept_id = r.concept_id_2 AND r.invalid_reason IS NULL AND d.standard_concept = 'S' AND r.relationship_id IN ('Maps to', 'Maps to value') + JOIN sources.mrconso + ON lower (a.concept_name) = lower (str) + AND sab = 'SNOMEDCT_US' + AND suppress = 'N' + AND tty = 'PT' + JOIN concept c + ON c.concept_code = code + AND c.vocabulary_id = 'SNOMED' + AND c.standard_concept = 'S' + AND c.concept_class_id IN ('Procedure', 'Context-dependent', 'Clinical Finding', 'Event', 'Social Context', 'Observable Entity')), +t5 as ( +SELECT * FROM improve_map +WHERE icd_code IN (SELECT icd_code + FROM improve_map + WHERE repl_by_code != current_code) +AND icd_code NOT IN (SELECT icd_code FROM improve_map WHERE icd_name ~ '\s+and\s+' GROUP BY icd_code HAVING COUNT(icd_code)=1) + ), +t6 AS ( +SELECT DISTINCT +a.concept_code AS icd_code, + a.concept_name AS icd_name, + r.relationship_id AS current_relationship, + d.concept_id AS current_id, + d.concept_code AS current_code, + d.concept_name AS current_name, + d.domain_id AS current_domain, + d.vocabulary_id AS current_vocabulary, + c.concept_id AS repl_by_id, + c.concept_code AS repl_by_code, + c.concept_name AS repl_by_name, + c.domain_id AS repl_by_domain, + c.vocabulary_id AS repl_by_vocabulary, + 'improve_map' AS reason +FROM concept a +JOIN concept_relationship r ON r.concept_id_1 = a.concept_id and a.vocabulary_id = 'ICD10CN' +JOIN concept d ON d.concept_id = r.concept_id_2 AND r.invalid_reason IS NULL AND d.standard_concept = 'S' AND r.relationship_id IN ('Maps to', 'Maps to value') + JOIN concept_synonym cs ON lower (a.concept_name) = lower (cs.concept_synonym_name) AND a.vocabulary_id = 'ICD10CN' + JOIN concept c + ON cs.concept_id = c.concept_id + AND c.vocabulary_id = 'SNOMED' + AND c.standard_concept = 'S' + AND c.concept_class_id IN ('Procedure', 'Context-dependent', 'Clinical Finding', 'Event', 'Social Context', 'Observable Entity') +AND c.concept_id NOT IN (SELECT descendant_concept_id FROM devv5.concept_ancestor WHERE ancestor_concept_id = 40485423 )), -- concept Unilateral clinical finding has weak hierarchy +p_map AS ( + SELECT * FROM t5 + UNION +--exclude the cases 1 to 1 mapping with the current_id = repl_by_id, if there's multiple mapping and current_id = repl_by_id, the additional mapping serves as a hiearchy connector, so these are included into the comparison + SELECT * FROM t6 WHERE icd_code NOT IN (SELECT icd_code FROM ( +SELECT *, COUNT(1) over (partition BY icd_code) AS cnt FROM t6) a WHERE a.cnt =1 AND current_id = repl_by_id)) +SELECT * FROM p_map +UNION +SELECT * FROM t4 +ORDER BY icd_code; diff --git a/ICD10CN/manual_work/readme.md b/ICD10CN/manual_work/readme.md new file mode 100644 index 000000000..def467c4e --- /dev/null +++ b/ICD10CN/manual_work/readme.md @@ -0,0 +1,69 @@ +### STEP 6 of the refresh: work with manual staging tables (skip this step if implementing on the Pallas vocabulary server) +6.1.Extract the [respective csv file](https://drive.google.com/file/d/1iCmdHud7Y296SpeOan0vNNJEsg4qBsin/view?usp=sharing) into the concept_manual table. The file was generated using the query: +```sql +SELECT concept_name, + domain_id, + vocabulary_id, + concept_class_id, + standard_concept, + concept_code, + valid_start_date, + valid_end_date, + invalid_reason +FROM concept_manual +ORDER BY vocabulary_id, concept_code, invalid_reason, valid_start_date, valid_end_date, concept_name; +``` +6.2.Extract the [respective csv file](https://drive.google.com/file/d/1C9qVJwR369y9Jk02iS-qK45Gn5iGGRJr/view?usp=sharing) into the concept_synonym_manual table. The file was generated using the query: +```sql +SELECT synonym_name, + synonym_concept_code, + synonym_vocabulary_id, + language_concept_id +FROM concept_synonym_manual +ORDER BY synonym_vocabulary_id, synonym_concept_code, language_concept_id, synonym_name;` +``` +6.3.Extract the [respective csv file](https://drive.google.com/file/d/1uUrkcknqIogs4Os0Hqu51WjSPo0C5mvH/view?usp=sharing) into the concept_relationship_manual table. The file was generated using the query: +```sql +SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + valid_start_date, + valid_end_date, + invalid_reason +FROM concept_relationship_manual +ORDER BY vocabulary_id_1, vocabulary_id_2, relationship_id, concept_code_1, concept_code_2, invalid_reason, valid_start_date, valid_end_date; +``` +##### csv format: +- delimiter: ',' +- encoding: 'UTF8' +- header: ON +- decimal symbol: '.' +- quote escape: with backslash \ +- quote always: FALSE +- NULL string: empty + +### STEP 8 of the refresh: solving problems which are difened during the first load_stage run +8.1. Run mapping_refresh.sql. Table refresh_lookup will be created. It contains the list with mappings to outdated, deprecated or updated Standard concepts, as well as automaticaly improved mapping. +8.2. Download this table and open it in spreadsheet editor. Columns icd_ represent ICD10CN concepts with uncertain mapping, columns current_ refer to mapping which currently exists in concept_relationship_stage and columns repl_by_ suggest automatically created mapping, the reason for concepts appearing in this table you can see in column reason (e.g., 'improve_map','without mapping'). +8.3. Perform manual review and mapping. Note, if you think that current mapping is better than suggested replacement, delete rows with these concepts from Excel table. Add column repl_by_relationship and put there necessary relationship_id following the recommendations described below. Then, delete current_ and reason columns. +8.4. Save table as refresh_lookup_done.csv and upload it into your schema using script create_manual_table.sql +8.5. Run manual_mapping_qa.sql to check whether refresh mapping meets the ICD10CM logic +8.6. If everything is OK, deprecate old mappings for the ICD10CN codes of interest and add fresh mappings to the concept_relationship_manual using crm_changes.sql script + +### Recomanditions for relationship_ids + * **"Maps to"** is used for 1-to-1 FULL equivalent mapping only + * **"Maps to" + "Maps to value"** is used for for Observations and Measurements with results + * **"Is a"** is a temporary relationship used for this check only and applicable for 1-to-1 PARTIAL equivalent AND 1-to-many mappings. +Preserve a manual table with 'Is a' relationships, but change 'Is a' to 'Maps to' during the insertion into the concept_relatioship_manual (e.g. using CASE WHEN). + +#### Required fields in a manual table +- icd_code VARHCAR, +- icd_name VARHCAR, +- repl_by_relationship VARCHAR, +- repl_by_id INT, +- repl_by_code VARCHAR, +- repl_by_name VARCHAR, +- repl_by_domain VARCHAR, +- repl_by_vocabulary VARCHAR diff --git a/ICD9CM/manual_work/create_manual_table.sql b/ICD9CM/manual_work/create_manual_table.sql new file mode 100644 index 000000000..f76c5b520 --- /dev/null +++ b/ICD9CM/manual_work/create_manual_table.sql @@ -0,0 +1,31 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ +DROP TABLE IF EXISTS refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done; +CREATE TABLE refresh_lookup_done ( +icd_code VARCHAR, +icd_name VARCHAR, +repl_by_relationship VARCHAR, +repl_by_id INT, +repl_by_code VARCHAR, +repl_by_name VARCHAR, +repl_by_domain VARCHAR, +repl_by_vocabulary VARCHAR); + +SELECT*FROM refresh_lookup_done; \ No newline at end of file From 150175595e227aaa66a99636a8a7f7a28d53a1d4 Mon Sep 17 00:00:00 2001 From: Irina Date: Tue, 9 Aug 2022 18:52:38 +0300 Subject: [PATCH 02/21] ICD9CM crm changes --- ICD9CM/manual_work/crm_changes.sql | 140 +++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 ICD9CM/manual_work/crm_changes.sql diff --git a/ICD9CM/manual_work/crm_changes.sql b/ICD9CM/manual_work/crm_changes.sql new file mode 100644 index 000000000..6496d747b --- /dev/null +++ b/ICD9CM/manual_work/crm_changes.sql @@ -0,0 +1,140 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ +-- create current date backup of concept_relationship_manual table +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE format('create table %I as select * from concept_relationship_manual', + 'concept_relationship_manual_backup_' || update); + + END +$body$; + +--restore concept_relationship_manual table (run it only if something went wrong) +TRUNCATE TABLE dev_icd9cm.concept_relationship_manual; +INSERT INTO dev_icd9cm.concept_relationship_manual +SELECT * FROM dev_icd9cm.concept_relationship_manual_backup_2022_04_26; + +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE FORMAT('create table %I as select * from concept_manual', + 'concept_manual_backup_' || update); + + END +$body$; + +--restore concept_manual table (run it only if something went wrong) +/*TRUNCATE TABLE dev_icd9cm.concept_manual; +INSERT INTO dev_icd9cm.concept_manual +SELECT * FROM dev_icd9cm.concept_manual_backup_2022_06_01;*/ + + +-- deprecate previous inaccurate mapping +UPDATE concept_relationship_manual crm +SET invalid_reason = 'D', + valid_end_date = current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order to preserve the original deprecation date + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND NOT EXISTS (SELECT 1 --don't deprecate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd') + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + + +-- insert new mapping +with mapping AS -- select all new codes with their mappings from manual file + ( + SELECT DISTINCT icd_code AS concept_code_1, + repl_by_code AS concept_code_2, + 'ICD9CM' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 + repl_by_vocabulary AS vocabulary_id_2, + repl_by_relationship AS relationship_id, + current_date AS valid_start_date, -- set the date of the refresh as valid_start_date + to_date('20991231','yyyymmdd') AS valid_end_date, + NULL AS invalid_reason -- make all new mappings valid + FROM refresh_lookup_done + WHERE repl_by_id != 0 -- select only codes with mapping to standard concepts + ) +-- insert new mappings into concept_relationship_manual table +INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason) +( + SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + valid_start_date, + valid_end_date, + invalid_reason + FROM mapping m + -- don't insert codes with mapping if the same exists in the current manual file + WHERE (concept_code_1, --the same source_code is mapped + concept_code_2, --to the same concept_code + vocabulary_id_1, + vocabulary_id_2, --of the same vocabulary + relationship_id, --with the same relationship + invalid_reason) + NOT IN (SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + invalid_reason FROM concept_relationship_manual + ) + ) +; + + + From d93e30fc8aeb664eccc74e4a7fcdf839ec6c5c0d Mon Sep 17 00:00:00 2001 From: Irina Date: Thu, 11 Aug 2022 01:56:09 +0300 Subject: [PATCH 03/21] KCD7, CIM10, ICD10CN create_manual table update --- CIM10/manual_work/create_manual_table.sql | 34 ++++++ CIM10/manual_work/crm_changes.sql | 120 ++++++++++++++++++++++ KCD7/manual_work/create_manual_table.sql | 34 ++++++ KCD7/manual_work/crm_changes.sql | 120 ++++++++++++++++++++++ 4 files changed, 308 insertions(+) create mode 100644 CIM10/manual_work/create_manual_table.sql create mode 100644 CIM10/manual_work/crm_changes.sql create mode 100644 KCD7/manual_work/create_manual_table.sql create mode 100644 KCD7/manual_work/crm_changes.sql diff --git a/CIM10/manual_work/create_manual_table.sql b/CIM10/manual_work/create_manual_table.sql new file mode 100644 index 000000000..4eb812729 --- /dev/null +++ b/CIM10/manual_work/create_manual_table.sql @@ -0,0 +1,34 @@ + +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ + +DROP TABLE IF EXISTS refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done; +CREATE TABLE refresh_lookup_done ( +id serial primary key , +icd_code VARCHAR, +icd_name VARCHAR, +repl_by_relationship VARCHAR, +repl_by_id INT, +repl_by_code VARCHAR, +repl_by_name VARCHAR, +repl_by_domain VARCHAR, +repl_by_vocabulary VARCHAR); + +SELECT*FROM refresh_lookup_done; diff --git a/CIM10/manual_work/crm_changes.sql b/CIM10/manual_work/crm_changes.sql new file mode 100644 index 000000000..84613c2d3 --- /dev/null +++ b/CIM10/manual_work/crm_changes.sql @@ -0,0 +1,120 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ +-- create current date backup of concept_relationship_manual table +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE format('create table %I as select * from concept_relationship_manual', + 'concept_relationship_manual_backup_' || update); + + END +$body$; + +TRUNCATE TABLE dev_cim10.concept_relationship_manual; +INSERT INTO dev_cim10.concept_relationship_manual +SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_04_25; + +-- deprecate previous inaccurate mapping +UPDATE concept_relationship_manual crm +SET invalid_reason = 'D', + valid_end_date = current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order to preserve the original deprecation date + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND NOT EXISTS (SELECT 1 --don't deprecate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd') + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + +-- insert new mapping +with mapping AS -- select all new codes with their mappings from manual file + ( + SELECT DISTINCT icd_code AS concept_code_1, + repl_by_code AS concept_code_2, + 'cim10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 + repl_by_vocabulary AS vocabulary_id_2, + repl_by_relationship AS relationship_id, + current_date AS valid_start_date, -- set the date of the refresh as valid_start_date + to_date('20991231','yyyymmdd') AS valid_end_date, + NULL AS invalid_reason -- make all new mappings valid + FROM refresh_lookup_done + WHERE repl_by_id != 0 -- select only codes with mapping to standard concepts + ) +-- insert new mappings into concept_relationship_manual table +INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason) +( + SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + valid_start_date, + valid_end_date, + invalid_reason + FROM mapping m + -- don't insert codes with mapping if the same exists in the current manual file + WHERE (concept_code_1, --the same source_code is mapped + concept_code_2, --to the same concept_code + vocabulary_id_1, + vocabulary_id_2, --of the same vocabulary + relationship_id) --with the same relationship + NOT IN (SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id FROM concept_relationship_manual) + ) +; + +SELECT * FROM concept_relationship_manual; + + +SELECT * FROM concept_relationship_manual; + +SELECT * FROM concept_relationship_manual; \ No newline at end of file diff --git a/KCD7/manual_work/create_manual_table.sql b/KCD7/manual_work/create_manual_table.sql new file mode 100644 index 000000000..4eb812729 --- /dev/null +++ b/KCD7/manual_work/create_manual_table.sql @@ -0,0 +1,34 @@ + +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ + +DROP TABLE IF EXISTS refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done; +CREATE TABLE refresh_lookup_done ( +id serial primary key , +icd_code VARCHAR, +icd_name VARCHAR, +repl_by_relationship VARCHAR, +repl_by_id INT, +repl_by_code VARCHAR, +repl_by_name VARCHAR, +repl_by_domain VARCHAR, +repl_by_vocabulary VARCHAR); + +SELECT*FROM refresh_lookup_done; diff --git a/KCD7/manual_work/crm_changes.sql b/KCD7/manual_work/crm_changes.sql new file mode 100644 index 000000000..0d154075b --- /dev/null +++ b/KCD7/manual_work/crm_changes.sql @@ -0,0 +1,120 @@ +/************************************************************************** +* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts +* Date: 2021 +**************************************************************************/ +-- create current date backup of concept_relationship_manual table +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE format('create table %I as select * from concept_relationship_manual', + 'concept_relationship_manual_backup_' || update); + + END +$body$; + +TRUNCATE TABLE dev_cim10.concept_relationship_manual; +INSERT INTO dev_cim10.concept_relationship_manual +SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_05_18; + +-- deprecate previous inaccurate mapping +UPDATE concept_relationship_manual crm +SET invalid_reason = 'D', + valid_end_date = current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order to preserve the original deprecation date + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND NOT EXISTS (SELECT 1 --don't deprecate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd') + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + +-- insert new mapping +with mapping AS -- select all new codes with their mappings from manual file + ( + SELECT DISTINCT icd_code AS concept_code_1, + repl_by_code AS concept_code_2, + 'CIM10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 + repl_by_vocabulary AS vocabulary_id_2, + repl_by_relationship AS relationship_id, + current_date AS valid_start_date, -- set the date of the refresh as valid_start_date + to_date('20991231','yyyymmdd') AS valid_end_date, + NULL AS invalid_reason -- make all new mappings valid + FROM refresh_lookup_done + WHERE repl_by_id != 0 -- select only codes with mapping to standard concepts + ) +-- insert new mappings into concept_relationship_manual table +INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason) +( + SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + valid_start_date, + valid_end_date, + invalid_reason + FROM mapping m + -- don't insert codes with mapping if the same exists in the current manual file + WHERE (concept_code_1, --the same source_code is mapped + concept_code_2, --to the same concept_code + vocabulary_id_1, + vocabulary_id_2, --of the same vocabulary + relationship_id) --with the same relationship + NOT IN (SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id FROM concept_relationship_manual) + ) +; + +SELECT * FROM concept_relationship_manual; + + +SELECT * FROM concept_relationship_manual; + +SELECT * FROM concept_relationship_manual; \ No newline at end of file From 2ffe85887df10ef4ffd60eff3fca36ad436f2375 Mon Sep 17 00:00:00 2001 From: Irina Date: Thu, 11 Aug 2022 02:40:34 +0300 Subject: [PATCH 04/21] KCD7, CIM10, ICD10CN create_manual table update --- CIM10/manual_work/crm_changes.sql | 9 ++------- ICD10CN/manual_work/create_manual_table.sql | 1 + ICD10CN/manual_work/crm_changes.sql | 19 +++++++++++++++++++ KCD7/manual_work/crm_changes.sql | 13 ++++--------- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/CIM10/manual_work/crm_changes.sql b/CIM10/manual_work/crm_changes.sql index 84613c2d3..ff83bd6eb 100644 --- a/CIM10/manual_work/crm_changes.sql +++ b/CIM10/manual_work/crm_changes.sql @@ -32,7 +32,7 @@ $body$; TRUNCATE TABLE dev_cim10.concept_relationship_manual; INSERT INTO dev_cim10.concept_relationship_manual -SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_04_25; +SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_05_18; -- deprecate previous inaccurate mapping UPDATE concept_relationship_manual crm @@ -77,7 +77,7 @@ with mapping AS -- select all new codes with their mappings from manual file ( SELECT DISTINCT icd_code AS concept_code_1, repl_by_code AS concept_code_2, - 'cim10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 + 'CIM10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 repl_by_vocabulary AS vocabulary_id_2, repl_by_relationship AS relationship_id, current_date AS valid_start_date, -- set the date of the refresh as valid_start_date @@ -113,8 +113,3 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula ; SELECT * FROM concept_relationship_manual; - - -SELECT * FROM concept_relationship_manual; - -SELECT * FROM concept_relationship_manual; \ No newline at end of file diff --git a/ICD10CN/manual_work/create_manual_table.sql b/ICD10CN/manual_work/create_manual_table.sql index c8a409d0a..7dd262215 100644 --- a/ICD10CN/manual_work/create_manual_table.sql +++ b/ICD10CN/manual_work/create_manual_table.sql @@ -19,6 +19,7 @@ DROP TABLE IF EXISTS refresh_lookup_done; TRUNCATE TABLE refresh_lookup_done; CREATE TABLE refresh_lookup_done ( +id serial primary key, icd_code VARCHAR, icd_name VARCHAR, repl_by_relationship VARCHAR, diff --git a/ICD10CN/manual_work/crm_changes.sql b/ICD10CN/manual_work/crm_changes.sql index 86477b353..e64cb3f66 100644 --- a/ICD10CN/manual_work/crm_changes.sql +++ b/ICD10CN/manual_work/crm_changes.sql @@ -54,6 +54,25 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order ) ; +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd') + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + -- insert new mapping with mapping AS -- select all new codes with their mappings from manual file ( diff --git a/KCD7/manual_work/crm_changes.sql b/KCD7/manual_work/crm_changes.sql index 0d154075b..4c40079b0 100644 --- a/KCD7/manual_work/crm_changes.sql +++ b/KCD7/manual_work/crm_changes.sql @@ -30,9 +30,9 @@ $body$ END $body$; -TRUNCATE TABLE dev_cim10.concept_relationship_manual; -INSERT INTO dev_cim10.concept_relationship_manual -SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_05_18; +TRUNCATE TABLE dev_kcd7.concept_relationship_manual; +INSERT INTO dev_kcd7.concept_relationship_manual +SELECT*FROM dev_kcd7.concept_relationship_manual_backup_2022_; -- deprecate previous inaccurate mapping UPDATE concept_relationship_manual crm @@ -77,7 +77,7 @@ with mapping AS -- select all new codes with their mappings from manual file ( SELECT DISTINCT icd_code AS concept_code_1, repl_by_code AS concept_code_2, - 'CIM10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 + 'KCD7' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 repl_by_vocabulary AS vocabulary_id_2, repl_by_relationship AS relationship_id, current_date AS valid_start_date, -- set the date of the refresh as valid_start_date @@ -113,8 +113,3 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula ; SELECT * FROM concept_relationship_manual; - - -SELECT * FROM concept_relationship_manual; - -SELECT * FROM concept_relationship_manual; \ No newline at end of file From b716e328c0fdc34dd763db8cb1481810568a4eb0 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Thu, 11 Aug 2022 21:17:36 +0300 Subject: [PATCH 05/21] Added ID for refresh_lookup_done --- ICD10CM/manual_work/create_manual_table.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/ICD10CM/manual_work/create_manual_table.sql b/ICD10CM/manual_work/create_manual_table.sql index 682582424..35ed4284f 100644 --- a/ICD10CM/manual_work/create_manual_table.sql +++ b/ICD10CM/manual_work/create_manual_table.sql @@ -19,6 +19,7 @@ DROP TABLE IF EXISTS refresh_lookup_done; TRUNCATE TABLE refresh_lookup_done; CREATE TABLE refresh_lookup_done ( +id serial primary key, icd_code VARCHAR, icd_name VARCHAR, repl_by_relationship VARCHAR, From 86a00b9e99520a184279de83846e0071e601bd72 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Fri, 12 Aug 2022 17:02:41 +0300 Subject: [PATCH 06/21] CRM changes are assigned to ICD10like vocabs and read to incororate an Update Structure to set valid status of Maps to relationship preexistin in CRM and refresh_lookup_done tables. Load stage of ICD10CM is affected by Update statement to set the Domain signuture for U% ICD10CM codes, as the domains for them were nos assigned (possible reason - no Valid Maps to in CRS). --- ICD10/manual_work/crm_changes.sql | 18 ++++++++++++++++++ ICD10CM/load_stage.sql | 6 ++++++ ICD10CM/manual_work/crm_changes.sql | 20 +++++++++++++++++++- ICD10CN/manual_work/crm_changes.sql | 5 ++--- ICD10GM/manual_work/crm_changes.sql | 18 ++++++++++++++++++ ICD9CM/manual_work/crm_changes.sql | 2 -- Read/manual_work/crm_changes.sql | 18 ++++++++++++++++++ 7 files changed, 81 insertions(+), 6 deletions(-) diff --git a/ICD10/manual_work/crm_changes.sql b/ICD10/manual_work/crm_changes.sql index d3c4b2091..07d2b96ed 100644 --- a/ICD10/manual_work/crm_changes.sql +++ b/ICD10/manual_work/crm_changes.sql @@ -53,6 +53,24 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order ) ; +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date =current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + -- insert new mapping with mapping AS -- select all new codes with their mappings from manual file ( diff --git a/ICD10CM/load_stage.sql b/ICD10CM/load_stage.sql index 410f251e7..5529f5e3f 100644 --- a/ICD10CM/load_stage.sql +++ b/ICD10CM/load_stage.sql @@ -220,6 +220,12 @@ FROM ( WHERE i.concept_code = cs.concept_code AND cs.vocabulary_id = 'ICD10CM'; +--TODO: check why the actual U* code limitation is not used. +--Only unassigned Emergency use codes (starting with U) don't have mappings to SNOMED, put Observation as closest meaning to Unknown domain +UPDATE concept_stage +SET domain_id = 'Observation' +WHERE domain_id IS NULL; + --13. Check for NULL in domain_id ALTER TABLE concept_stage ALTER COLUMN domain_id SET NOT NULL; ALTER TABLE concept_stage ALTER COLUMN domain_id DROP NOT NULL; diff --git a/ICD10CM/manual_work/crm_changes.sql b/ICD10CM/manual_work/crm_changes.sql index 88b1fdfd9..e69f54b28 100644 --- a/ICD10CM/manual_work/crm_changes.sql +++ b/ICD10CM/manual_work/crm_changes.sql @@ -33,7 +33,7 @@ $body$; --restore concept_relationship_manual table (run it only if something went wrong) TRUNCATE TABLE dev_icd10cm.concept_relationship_manual; INSERT INTO dev_icd10cm.concept_relationship_manual -SELECT * FROM dev_icd10cm.concept_relationship_manual_backup_2022_04_21; +SELECT * FROM dev_icd10cm.concept_relationship_manual_backup_2022_04_21 where concept_code_1='U07'; DO $body$ @@ -73,6 +73,24 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order ) ; +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date =current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + -- insert new mapping with mapping AS -- select all new codes with their mappings from manual file ( diff --git a/ICD10CN/manual_work/crm_changes.sql b/ICD10CN/manual_work/crm_changes.sql index e64cb3f66..3c1cdb964 100644 --- a/ICD10CN/manual_work/crm_changes.sql +++ b/ICD10CN/manual_work/crm_changes.sql @@ -57,13 +57,12 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order -- activate mapping, that became valid again UPDATE concept_relationship_manual crm SET invalid_reason = null, - valid_end_date = to_date('20991231','yyyymmdd') + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date =current_date --SELECT * FROM concept_relationship_manual crm --use this SELECT for QA WHERE invalid_reason = 'D' -- activate only deprecated mappings - AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh - AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file FROM refresh_lookup_done rl WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped diff --git a/ICD10GM/manual_work/crm_changes.sql b/ICD10GM/manual_work/crm_changes.sql index cb6dc8721..fad7e5fbc 100644 --- a/ICD10GM/manual_work/crm_changes.sql +++ b/ICD10GM/manual_work/crm_changes.sql @@ -48,6 +48,24 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order ) ; +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date = current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + -- insert new mapping with mapping AS -- select all new codes with their mappings from manual file ( diff --git a/ICD9CM/manual_work/crm_changes.sql b/ICD9CM/manual_work/crm_changes.sql index 6496d747b..6c0918239 100644 --- a/ICD9CM/manual_work/crm_changes.sql +++ b/ICD9CM/manual_work/crm_changes.sql @@ -81,8 +81,6 @@ SET invalid_reason = null, --SELECT * FROM concept_relationship_manual crm --use this SELECT for QA WHERE invalid_reason = 'D' -- activate only deprecated mappings - AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh - AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file FROM refresh_lookup_done rl WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped diff --git a/Read/manual_work/crm_changes.sql b/Read/manual_work/crm_changes.sql index 38a145352..84e4369e9 100644 --- a/Read/manual_work/crm_changes.sql +++ b/Read/manual_work/crm_changes.sql @@ -73,6 +73,24 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order ) ; +-- activate mapping, that became valid again +UPDATE concept_relationship_manual crm +SET invalid_reason = null, + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date =current_date + +--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA +WHERE invalid_reason = 'D' -- activate only deprecated mappings + + AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file + FROM refresh_lookup_done rl + WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped + AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code + AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary + AND rl.repl_by_relationship = crm.relationship_id --with the same relationship + ) +; + -- insert new mapping with mapping AS -- select all new codes with their mappings from manual file ( From 06f42cdd4a8af7f8c07447b722098ea256548319 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Fri, 12 Aug 2022 19:29:23 +0300 Subject: [PATCH 07/21] CRM changes are assigned to ICD10like vocabs and read to incororate an Update Structure to set valid status of Maps to relationship preexistin in CRM and refresh_lookup_done tables. Load stage of ICD10CM is affected by Update statement to set the Domain signuture for U% ICD10CM codes, as the domains for them were nos assigned (possible reason - no Valid Maps to in CRS). --resolved filtering for _backup table --- ICD10CM/manual_work/crm_changes.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ICD10CM/manual_work/crm_changes.sql b/ICD10CM/manual_work/crm_changes.sql index e69f54b28..3c6d741d8 100644 --- a/ICD10CM/manual_work/crm_changes.sql +++ b/ICD10CM/manual_work/crm_changes.sql @@ -33,7 +33,7 @@ $body$; --restore concept_relationship_manual table (run it only if something went wrong) TRUNCATE TABLE dev_icd10cm.concept_relationship_manual; INSERT INTO dev_icd10cm.concept_relationship_manual -SELECT * FROM dev_icd10cm.concept_relationship_manual_backup_2022_04_21 where concept_code_1='U07'; +SELECT * FROM dev_icd10cm.concept_relationship_manual_backup_2022_04_21; DO $body$ @@ -136,3 +136,5 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula +SELECT * FROm concept_relationship_manual +where concept_code_1 IN () \ No newline at end of file From e3e478ccf3dbeb9a277baa50b73ff928b32e2c83 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Fri, 12 Aug 2022 19:29:45 +0300 Subject: [PATCH 08/21] CRM changes are assigned to ICD10like vocabs and read to incororate an Update Structure to set valid status of Maps to relationship preexistin in CRM and refresh_lookup_done tables. Load stage of ICD10CM is affected by Update statement to set the Domain signuture for U% ICD10CM codes, as the domains for them were nos assigned (possible reason - no Valid Maps to in CRS). --resolved filtering for _backup table (ICD10CM) --- ICD10CM/manual_work/crm_changes.sql | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ICD10CM/manual_work/crm_changes.sql b/ICD10CM/manual_work/crm_changes.sql index 3c6d741d8..87999bca8 100644 --- a/ICD10CM/manual_work/crm_changes.sql +++ b/ICD10CM/manual_work/crm_changes.sql @@ -133,8 +133,3 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula ) ) ; - - - -SELECT * FROm concept_relationship_manual -where concept_code_1 IN () \ No newline at end of file From e3d18e10d8111a5b8cebd238e802778a4a9a4e14 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Mon, 15 Aug 2022 21:05:02 +0300 Subject: [PATCH 09/21] --Script for detection of existcn ICD10 mapping in refreshlookup_table --Sript for detection of mapping discrpenecies beteween ICD10 like vocabs (sequence dependent) --- ICD10CN/manual_work/inter_icd10_integrity.sql | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 ICD10CN/manual_work/inter_icd10_integrity.sql diff --git a/ICD10CN/manual_work/inter_icd10_integrity.sql b/ICD10CN/manual_work/inter_icd10_integrity.sql new file mode 100644 index 000000000..2403b9924 --- /dev/null +++ b/ICD10CN/manual_work/inter_icd10_integrity.sql @@ -0,0 +1,77 @@ +--Run the ICD10 +--Run and check ICD10CM +--1 Upload the refresh_lookup_done +--2 Run the script +--Compare number of rows in uploaded csv and script output +--Drop flagged rows from G-frive +--Manually asses Qualitu of Discr tagged mappings + + +--Detect codes with no mapping in ICD10 +with no_map_by_icd10 as (SELECT distinct r.*,cc.* +FROM refresh_lookup_done r +left JOIN devv5.concept c +on trim(lower(r.icd_code)) = trim(lower(c.concept_code)) +and c.vocabulary_id ='ICD10' + LEFT JOIN devv5.concept_relationship cr + on c.concept_id = cr.concept_id_1 + and cr.relationship_id in ( 'Maps to','Maps to value') + and cr.invalid_reason is null + left JOIN devv5.concept cc + on cr.concept_id_2 = cc.concept_id + and cr.invalid_reason is null + and cr.relationship_id in ( 'Maps to','Maps to value') +where cc.concept_id is null) +, +to_be_dropped as ( + SELECT distinct b.id, + b.icd_code, + b.icd_name, + b.repl_by_relationship, + b.repl_by_id, + b.repl_by_code, + b.repl_by_name, + b.repl_by_domain, + b.repl_by_vocabulary, + case when a.icd_code is null then 'drop' else null end as flag -- drop rows where mapping will come from ICD10 + from no_map_by_icd10 a + RIGHT JOIN refresh_lookup_done b + on a.id=b.id/*a.icd_code = b.icd_code + and a.repl_by_id = b.repl_by_id*/ +) +, +discr as ( +SELECT distinct aa.*, + case when aa.icd_code=r.icd_code and r.repl_by_id<>aa.repl_by_id and aa.repl_by_relationship=r.repl_by_relationship then 'discr' else null end as dicrep --detect rows where possible micctargeting occur (when code exists in several ICD10 like vocabs) +FROM to_be_dropped aa +LEFT JOIN dev_icd10cm.refresh_lookup_done r +ON aa.icd_code=r.icd_code + and aa.repl_by_relationship=r.repl_by_relationship +order by aa.id) +SELECT id, + icd_code, + repl_by_id, + flag, + string_agg(distinct dicrep,'X') as dicrep, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary + + +FROM discr +group by id, + icd_code, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary, + flag +order by id +; \ No newline at end of file From 833786c48814495f9995f78b17b61d176dae305b Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Tue, 16 Aug 2022 21:21:46 +0300 Subject: [PATCH 10/21] CIM10 is updated SOME names are affected --- CIM10/manual_work/crm_changes.sql | 20 ++++++ CIM10/manual_work/inter_icd10_integrity.sql | 77 +++++++++++++++++++++ CIM10/manual_work/readme.md | 70 +++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 CIM10/manual_work/inter_icd10_integrity.sql create mode 100644 CIM10/manual_work/readme.md diff --git a/CIM10/manual_work/crm_changes.sql b/CIM10/manual_work/crm_changes.sql index ff83bd6eb..f53a1fd7d 100644 --- a/CIM10/manual_work/crm_changes.sql +++ b/CIM10/manual_work/crm_changes.sql @@ -30,6 +30,26 @@ $body$ END $body$; +--create current date backup of concept_manual table +DO +$body$ + DECLARE + update text; + BEGIN + SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') + INTO update; + EXECUTE format('create table %I as select * from concept_manual', + 'concept_manual_backup_' || update); + + END +$body$; +--Backup without new NON-translated codes - concept_manual_backup_2022_08_16 +--SELECT*FROM concept_manual_backup_2022_08_16; +SELECT distinct * +FROM concept_manual; + + + TRUNCATE TABLE dev_cim10.concept_relationship_manual; INSERT INTO dev_cim10.concept_relationship_manual SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_05_18; diff --git a/CIM10/manual_work/inter_icd10_integrity.sql b/CIM10/manual_work/inter_icd10_integrity.sql new file mode 100644 index 000000000..2403b9924 --- /dev/null +++ b/CIM10/manual_work/inter_icd10_integrity.sql @@ -0,0 +1,77 @@ +--Run the ICD10 +--Run and check ICD10CM +--1 Upload the refresh_lookup_done +--2 Run the script +--Compare number of rows in uploaded csv and script output +--Drop flagged rows from G-frive +--Manually asses Qualitu of Discr tagged mappings + + +--Detect codes with no mapping in ICD10 +with no_map_by_icd10 as (SELECT distinct r.*,cc.* +FROM refresh_lookup_done r +left JOIN devv5.concept c +on trim(lower(r.icd_code)) = trim(lower(c.concept_code)) +and c.vocabulary_id ='ICD10' + LEFT JOIN devv5.concept_relationship cr + on c.concept_id = cr.concept_id_1 + and cr.relationship_id in ( 'Maps to','Maps to value') + and cr.invalid_reason is null + left JOIN devv5.concept cc + on cr.concept_id_2 = cc.concept_id + and cr.invalid_reason is null + and cr.relationship_id in ( 'Maps to','Maps to value') +where cc.concept_id is null) +, +to_be_dropped as ( + SELECT distinct b.id, + b.icd_code, + b.icd_name, + b.repl_by_relationship, + b.repl_by_id, + b.repl_by_code, + b.repl_by_name, + b.repl_by_domain, + b.repl_by_vocabulary, + case when a.icd_code is null then 'drop' else null end as flag -- drop rows where mapping will come from ICD10 + from no_map_by_icd10 a + RIGHT JOIN refresh_lookup_done b + on a.id=b.id/*a.icd_code = b.icd_code + and a.repl_by_id = b.repl_by_id*/ +) +, +discr as ( +SELECT distinct aa.*, + case when aa.icd_code=r.icd_code and r.repl_by_id<>aa.repl_by_id and aa.repl_by_relationship=r.repl_by_relationship then 'discr' else null end as dicrep --detect rows where possible micctargeting occur (when code exists in several ICD10 like vocabs) +FROM to_be_dropped aa +LEFT JOIN dev_icd10cm.refresh_lookup_done r +ON aa.icd_code=r.icd_code + and aa.repl_by_relationship=r.repl_by_relationship +order by aa.id) +SELECT id, + icd_code, + repl_by_id, + flag, + string_agg(distinct dicrep,'X') as dicrep, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary + + +FROM discr +group by id, + icd_code, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary, + flag +order by id +; \ No newline at end of file diff --git a/CIM10/manual_work/readme.md b/CIM10/manual_work/readme.md new file mode 100644 index 000000000..6e9489194 --- /dev/null +++ b/CIM10/manual_work/readme.md @@ -0,0 +1,70 @@ +### STEP 6 of the refresh: work with manual staging tables (skip this step if implementing on the Pallas vocabulary server) +6.1.Extract the [respective csv file](https://drive.google.com/file/d/1mwDnNYb7fWHLm7lgNHwoUyMfWuJ2GfLb/view?usp=sharing) into the concept_manual table. The file was generated using the query: +```sql +SELECT concept_name, + domain_id, + vocabulary_id, + concept_class_id, + standard_concept, + concept_code, + valid_start_date, + valid_end_date, + invalid_reason +FROM concept_manual +ORDER BY vocabulary_id, concept_code, invalid_reason, valid_start_date, valid_end_date, concept_name; +ADD new codes with translation here +``` +6.2.Extract the [respective csv file](https://drive.google.com/file/d/1C9qVJwR369y9Jk02iS-qK45Gn5iGGRJr/view?usp=sharing) into the concept_synonym_manual table. The file was generated using the query: +```sql +SELECT synonym_name, + synonym_concept_code, + synonym_vocabulary_id, + language_concept_id +FROM concept_synonym_manual +ORDER BY synonym_vocabulary_id, synonym_concept_code, language_concept_id, synonym_name;` +``` +6.3.Extract the [respective csv file](https://drive.google.com/drive/u/0/folders/1_nY1eDu0RfXmvaipJ1tEGZldnGuOwi2B) into the concept_relationship_manual table. The file was generated using the query: +```sql +SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id, + valid_start_date, + valid_end_date, + invalid_reason +FROM concept_relationship_manual +ORDER BY vocabulary_id_1, vocabulary_id_2, relationship_id, concept_code_1, concept_code_2, invalid_reason, valid_start_date, valid_end_date; +``` +##### csv format: +- delimiter: ',' +- encoding: 'UTF8' +- header: ON +- decimal symbol: '.' +- quote escape: with backslash \ +- quote always: FALSE +- NULL string: empty + +### STEP 8 of the refresh: solving problems which are difened during the first load_stage run +8.1. Run [mapping_refresh.sql]. Table refresh_lookup will be created. It contains the list with mappings to outdated, deprecated or updated Standard concepts, as well as automaticaly improved mapping. +8.2. Download this table and open it in spreadsheet editor. Columns icd_ represent ICD10CM concepts with uncertain mapping, columns current_ refer to mapping which currently exists in concept_relationship_stage and columns repl_by_ suggest automatically created mapping, the reason for concepts appearing in this table you can see in column reason (e.g., 'improve_map','without mapping'). +8.3. Perform manual review and mapping. Note, if you think that current mapping is better than suggested replacement, delete rows with these concepts from Excel table. Add column repl_by_relationship and put there necessary relationship_id following the recommendations described below. Then, delete current_ and reason columns. +8.4. Save table as refresh_lookup_done.csv and upload it into your schema using script [create_manual_table.sql] +8.5. Run [manual_mapping_qa.sql] to check whether refresh mapping meets the ICD10CM logic +8.6. If everything is OK, deprecate old mappings for the ICD10CM codes of interest and add fresh mappings to the concept_relationship_manual using [crm_changes.sql]) script + +### Recomanditions for relationship_ids + * **"Maps to"** is used for 1-to-1 FULL equivalent mapping only + * **"Maps to" + "Maps to value"** is used for for Observations and Measurements with results + * **"Is a"** is a temporary relationship used for this check only and applicable for 1-to-1 PARTIAL equivalent AND 1-to-many mappings. +Preserve a manual table with 'Is a' relationships, but change 'Is a' to 'Maps to' during the insertion into the concept_relatioship_manual (e.g. using CASE WHEN). + +#### Required fields in a manual table +- icd_code VARHCAR, +- icd_name VARHCAR, +- repl_by_relationship VARCHAR, +- repl_by_id INT, +- repl_by_code VARCHAR, +- repl_by_name VARCHAR, +- repl_by_domain VARCHAR, +- repl_by_vocabulary VARCHAR From 82a737718dfcfa5c3dd01d6422d05cc81d984068 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Wed, 17 Aug 2022 16:59:12 +0300 Subject: [PATCH 11/21] KCD7: correct _backup table naming restored LOAD stage - substring function was added to permit LS run - Update for incorrect combined domains was added --- KCD7/load_stage.sql | 12 ++++++++++-- KCD7/manual_work/crm_changes.sql | 8 +++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/KCD7/load_stage.sql b/KCD7/load_stage.sql index 92aee8101..38a880241 100644 --- a/KCD7/load_stage.sql +++ b/KCD7/load_stage.sql @@ -216,8 +216,8 @@ FROM ( AND next_domain IS NOT NULL THEN CASE WHEN prev_domain < next_domain - THEN prev_domain || '/' || next_domain - ELSE next_domain || '/' || prev_domain + THEN substring((prev_domain || '/' || next_domain),1,20) -- essential due to length constraints + ELSE substring((next_domain || '/' || prev_domain),1,20) -- essential due to length constraints END -- prev and next domain are not same and not null both, with order by name ELSE coalesce(prev_domain, next_domain, 'Condition') END @@ -238,6 +238,14 @@ WHERE rd.concept_code = c.concept_code AND c.vocabulary_id = 'KCD7' AND c.domain_id IS NULL; + +--11.1 Detect and Update misclassified domains to Condition +UPDATE concept_stage c +SET domain_id = 'Condition' +where domain_id ='Condition/Observatio' +; + + --12. Manual name fix UPDATE concept_stage SET concept_name = 'Emergency use of U07.1 | Disease caused by severe acute respiratory syndrome coronavirus 2' diff --git a/KCD7/manual_work/crm_changes.sql b/KCD7/manual_work/crm_changes.sql index 4c40079b0..37630dd16 100644 --- a/KCD7/manual_work/crm_changes.sql +++ b/KCD7/manual_work/crm_changes.sql @@ -32,8 +32,9 @@ $body$; TRUNCATE TABLE dev_kcd7.concept_relationship_manual; INSERT INTO dev_kcd7.concept_relationship_manual -SELECT*FROM dev_kcd7.concept_relationship_manual_backup_2022_; - +SELECT * +FROM dev_kcd7.concept_relationship_manual_backup_2022_05_18 + ; -- deprecate previous inaccurate mapping UPDATE concept_relationship_manual crm SET invalid_reason = 'D', @@ -56,7 +57,8 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order -- activate mapping, that became valid again UPDATE concept_relationship_manual crm SET invalid_reason = null, - valid_end_date = to_date('20991231','yyyymmdd') + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date =current_date --SELECT * FROM concept_relationship_manual crm --use this SELECT for QA WHERE invalid_reason = 'D' -- activate only deprecated mappings From e76d8917ba6b48e1d7bd9f5cabfc4cb9bb05d7ec Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Wed, 17 Aug 2022 19:18:05 +0300 Subject: [PATCH 12/21] ICD10CN: Refactoring OF LS to build mappings for histologies correctly --- ICD10CN/load_stage.sql | 66 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/ICD10CN/load_stage.sql b/ICD10CN/load_stage.sql index 3ed52e211..8483772d0 100644 --- a/ICD10CN/load_stage.sql +++ b/ICD10CN/load_stage.sql @@ -316,7 +316,31 @@ JOIN sources.icd10cn_concept ic2 ON ic2.concept_id = r.concept_id_2 WHERE r.relationship_id = 'Is a' AND ic1.concept_code_clean <> ic2.concept_code_clean; ---9. Find parents among ICD10 and ICDO3 to inherit mapping relationships from +--9. Append resulting file from Medical Coder (in concept_relationship_stage format) to concept_relationship_stage +DO $_$ +BEGIN + PERFORM VOCABULARY_PACK.ProcessManualRelationships(); +END $_$; + +--10. Add mapping from deprecated to fresh concepts +DO $_$ +BEGIN + PERFORM VOCABULARY_PACK.AddFreshMAPSTO(); +END $_$; + +--11. Deprecate 'Maps to' mappings to deprecated and upgraded concepts +DO $_$ +BEGIN + PERFORM VOCABULARY_PACK.DeprecateWrongMAPSTO(); +END $_$; + +--12. Update Domains +--ICD10 Histologies are always Condition +UPDATE concept_stage +SET domain_id = 'Condition' +WHERE concept_class_id = 'ICD10 Histology'; + +--13 Find parents among ICD10 and ICDO3 to inherit mapping relationships from CREATE INDEX IF NOT EXISTS trgm_idx ON concept_stage USING GIN (concept_code devv5.gin_trgm_ops); --For LIKE patterns ANALYZE concept_stage; @@ -361,9 +385,9 @@ FROM ( AND c2.concept_code = c1.concept_code || '-NULL' --Commented since we allow fuzzy match uphill for this iteration -- where substring (c.concept_code from 6 for 1) = '0' --Exact match to ICDO is MXXXX0/X - + UNION ALL - + SELECT DISTINCT cs.concept_code, FIRST_VALUE(c.concept_id) OVER ( PARTITION BY cs.concept_code ORDER BY LENGTH(c.concept_code) DESC --Longest matching code for best results @@ -378,9 +402,6 @@ FROM ( 'ICD10 code', 'ICD10 Hierarchy' ) - --Exclude ICD10CN COVID-19 specific stuff that is mapped in CRM - AND cs.concept_code NOT LIKE 'U07.100%' - AND cs.concept_code <> 'Z03.800x001' ) i JOIN concept_relationship r ON r.concept_id_1 = i.concept_id AND r.invalid_reason IS NULL @@ -388,31 +409,14 @@ JOIN concept_relationship r ON r.concept_id_1 = i.concept_id 'Maps to', 'Maps to value' ) -JOIN concept c ON c.concept_id = r.concept_id_2; - ---10. Append resulting file from Medical Coder (in concept_relationship_stage format) to concept_relationship_stage -DO $_$ -BEGIN - PERFORM VOCABULARY_PACK.ProcessManualRelationships(); -END $_$; - ---11. Add mapping from deprecated to fresh concepts -DO $_$ -BEGIN - PERFORM VOCABULARY_PACK.AddFreshMAPSTO(); -END $_$; - ---12. Deprecate 'Maps to' mappings to deprecated and upgraded concepts -DO $_$ -BEGIN - PERFORM VOCABULARY_PACK.DeprecateWrongMAPSTO(); -END $_$; - ---13. Update Domains ---ICD10 Histologies are always Condition -UPDATE concept_stage -SET domain_id = 'Condition' -WHERE concept_class_id = 'ICD10 Histology'; +JOIN concept c ON c.concept_id = r.concept_id_2 + +where NOT EXISTS (SELECT 1 + from concept_relationship_stage crs + where crs.concept_code_1 = i.concept_code + and crs.invalid_reason is null + and crs.relationship_id in ('Maps to') + ); --From mapping target UPDATE concept_stage cs From 2ba36dcba986d489a8715dbcbcdc64dde0ff7155 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Thu, 18 Aug 2022 21:21:52 +0300 Subject: [PATCH 13/21] ICD10CN: Refactoring of LS to build mappings for histologies correctly KCD7: Refactoring of LS to build Maps to Correctly (TODO refreshmapsto may fully subsctitute --5) --- ICD10CN/load_stage.sql | 24 ++++++- KCD7/load_stage.sql | 3 +- KCD7/manual_work/inter_icd10_integrity.sql | 77 ++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 KCD7/manual_work/inter_icd10_integrity.sql diff --git a/ICD10CN/load_stage.sql b/ICD10CN/load_stage.sql index 8483772d0..a2a58a2fb 100644 --- a/ICD10CN/load_stage.sql +++ b/ICD10CN/load_stage.sql @@ -486,7 +486,27 @@ UPDATE concept_stage SET domain_id = 'Observation' WHERE domain_id = 'Undefined'; ---14. Add "subsumes" relationship between concepts where the concept_code is like of another +--14. Add mapping from deprecated to fresh concepts +DO $_$ +BEGIN + PERFORM VOCABULARY_PACK.AddFreshMAPSTO(); +END $_$; + +--14.1 BRAND NEW Add mapping from deprecated to fresh concepts (value level) +/*DO $_$ +BEGIN + PERFORM VOCABULARY_PACK.AddFreshMapsToValue(); +END $_$;*/ + +--15. Deprecate +-- 'Maps to' mappings to deprecated and upgraded concepts +DO $_$ +BEGIN + PERFORM VOCABULARY_PACK.DeprecateWrongMAPSTO(); +END $_$; + + +--16. Add "subsumes" relationship between concepts where the concept_code is like of another -- Although 'Is a' relations exist, it is done to differentiate between "true" source-provided hierarchy and convenient "jump" links we build now INSERT INTO concept_relationship_stage ( concept_code_1, @@ -603,7 +623,7 @@ JOIN concept_stage c2 ON LEFT(c2.concept_code, 3) BETWEEN c1.start_code AND r_int.relationship_id = 'Subsumes' ); ---15. Cleanup +--17. Cleanup DROP INDEX trgm_idx; DROP TABLE icd10cn_chapters, name_source, intervals; diff --git a/KCD7/load_stage.sql b/KCD7/load_stage.sql index 38a880241..4bafa758b 100644 --- a/KCD7/load_stage.sql +++ b/KCD7/load_stage.sql @@ -119,7 +119,8 @@ JOIN concept c ON c.concept_code = cs.concept_code JOIN concept_relationship cr ON cr.concept_id_1 = c.concept_id AND cr.invalid_reason IS NULL JOIN concept c2 ON c2.concept_id = cr.concept_id_2 - AND c2.vocabulary_id = 'SNOMED'; +and cr.relationship_id IN ('Maps to', 'Maps to value') +; --6. Add "Subsumes" relationship between concepts where the concept_code is like of another CREATE INDEX IF NOT EXISTS trgm_idx ON concept_stage USING GIN (concept_code devv5.gin_trgm_ops); --for LIKE patterns diff --git a/KCD7/manual_work/inter_icd10_integrity.sql b/KCD7/manual_work/inter_icd10_integrity.sql new file mode 100644 index 000000000..2403b9924 --- /dev/null +++ b/KCD7/manual_work/inter_icd10_integrity.sql @@ -0,0 +1,77 @@ +--Run the ICD10 +--Run and check ICD10CM +--1 Upload the refresh_lookup_done +--2 Run the script +--Compare number of rows in uploaded csv and script output +--Drop flagged rows from G-frive +--Manually asses Qualitu of Discr tagged mappings + + +--Detect codes with no mapping in ICD10 +with no_map_by_icd10 as (SELECT distinct r.*,cc.* +FROM refresh_lookup_done r +left JOIN devv5.concept c +on trim(lower(r.icd_code)) = trim(lower(c.concept_code)) +and c.vocabulary_id ='ICD10' + LEFT JOIN devv5.concept_relationship cr + on c.concept_id = cr.concept_id_1 + and cr.relationship_id in ( 'Maps to','Maps to value') + and cr.invalid_reason is null + left JOIN devv5.concept cc + on cr.concept_id_2 = cc.concept_id + and cr.invalid_reason is null + and cr.relationship_id in ( 'Maps to','Maps to value') +where cc.concept_id is null) +, +to_be_dropped as ( + SELECT distinct b.id, + b.icd_code, + b.icd_name, + b.repl_by_relationship, + b.repl_by_id, + b.repl_by_code, + b.repl_by_name, + b.repl_by_domain, + b.repl_by_vocabulary, + case when a.icd_code is null then 'drop' else null end as flag -- drop rows where mapping will come from ICD10 + from no_map_by_icd10 a + RIGHT JOIN refresh_lookup_done b + on a.id=b.id/*a.icd_code = b.icd_code + and a.repl_by_id = b.repl_by_id*/ +) +, +discr as ( +SELECT distinct aa.*, + case when aa.icd_code=r.icd_code and r.repl_by_id<>aa.repl_by_id and aa.repl_by_relationship=r.repl_by_relationship then 'discr' else null end as dicrep --detect rows where possible micctargeting occur (when code exists in several ICD10 like vocabs) +FROM to_be_dropped aa +LEFT JOIN dev_icd10cm.refresh_lookup_done r +ON aa.icd_code=r.icd_code + and aa.repl_by_relationship=r.repl_by_relationship +order by aa.id) +SELECT id, + icd_code, + repl_by_id, + flag, + string_agg(distinct dicrep,'X') as dicrep, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary + + +FROM discr +group by id, + icd_code, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary, + flag +order by id +; \ No newline at end of file From 1c2840800bb8c9badb0f0597eb91baaafca617da Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Mon, 22 Aug 2022 21:19:17 +0300 Subject: [PATCH 14/21] ICD10CN: Refactoring of LS to build mappings for histologies correctly KCD7: Refactoring of LS to build Maps to Correctly (TODO refreshmapsto may fully subsctitute --5) ICD0GM new code added to _manual --- ICD10CN/load_stage.sql | 4 +- ICD10GM/load_stage.sql | 6 +- ICD10GM/manual_work/create_manual_table.sql | 7 +- ICD10GM/manual_work/crm_changes.sql | 4 + ICD10GM/manual_work/inter_icd10_integrity.sql | 77 +++++++++++++++++++ ICD10GM/manual_work/readme.md | 4 +- KCD7/load_stage.sql | 5 +- 7 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 ICD10GM/manual_work/inter_icd10_integrity.sql diff --git a/ICD10CN/load_stage.sql b/ICD10CN/load_stage.sql index a2a58a2fb..0eb7cfd7a 100644 --- a/ICD10CN/load_stage.sql +++ b/ICD10CN/load_stage.sql @@ -493,10 +493,10 @@ BEGIN END $_$; --14.1 BRAND NEW Add mapping from deprecated to fresh concepts (value level) -/*DO $_$ +DO $_$ BEGIN PERFORM VOCABULARY_PACK.AddFreshMapsToValue(); -END $_$;*/ +END $_$; --15. Deprecate -- 'Maps to' mappings to deprecated and upgraded concepts diff --git a/ICD10GM/load_stage.sql b/ICD10GM/load_stage.sql index 2727436fb..6eb561706 100644 --- a/ICD10GM/load_stage.sql +++ b/ICD10GM/load_stage.sql @@ -62,7 +62,11 @@ LEFT JOIN concept c ON c.concept_code = g.concept_code AND c.vocabulary_id = 'ICD10' AND c.concept_class_id NOT LIKE '%Chapter%'; ---4. Append concept corrections -- COVID concepts added and English translation +-- 4.0 Per readme file Manual Cleaun-Up +DELETE FROM concept_manual +WHERE concept_code NOT IN (SELECT concept_code FROM sources.icd10gm); + +--4.1 Append concept corrections -- COVID concepts added and English translation DO $_$ BEGIN PERFORM VOCABULARY_PACK.ProcessManualConcepts(); diff --git a/ICD10GM/manual_work/create_manual_table.sql b/ICD10GM/manual_work/create_manual_table.sql index bcab40471..b5a1cc6b2 100644 --- a/ICD10GM/manual_work/create_manual_table.sql +++ b/ICD10GM/manual_work/create_manual_table.sql @@ -16,8 +16,10 @@ * Authors: Darina Ivakhnenko, Dmitry Dymshyts * Date: 2021 **************************************************************************/ - +DROP TABLE refresh_lookup_done; +TRUNCATE TABLE refresh_lookup_done CREATE TABLE refresh_lookup_done ( +id serial primary key , icd_code VARCHAR, icd_name VARCHAR, repl_by_relationship VARCHAR, @@ -25,4 +27,5 @@ repl_by_id INT, repl_by_code VARCHAR, repl_by_name VARCHAR, repl_by_domain VARCHAR, -repl_by_vocabulary VARCHAR); +repl_by_vocabulary VARCHAR + ); diff --git a/ICD10GM/manual_work/crm_changes.sql b/ICD10GM/manual_work/crm_changes.sql index fad7e5fbc..dfa3d254c 100644 --- a/ICD10GM/manual_work/crm_changes.sql +++ b/ICD10GM/manual_work/crm_changes.sql @@ -29,6 +29,10 @@ $body$ END $body$; +TRUNCATE TABLE concept_relationship_manual; +INSERT INTO concept_relationship_manual +SELECT*FROM concept_relationship_manual_backup_2022_05_21; + -- deprecate previous inaccurate mapping UPDATE concept_relationship_manual crm SET invalid_reason = 'D', diff --git a/ICD10GM/manual_work/inter_icd10_integrity.sql b/ICD10GM/manual_work/inter_icd10_integrity.sql new file mode 100644 index 000000000..2403b9924 --- /dev/null +++ b/ICD10GM/manual_work/inter_icd10_integrity.sql @@ -0,0 +1,77 @@ +--Run the ICD10 +--Run and check ICD10CM +--1 Upload the refresh_lookup_done +--2 Run the script +--Compare number of rows in uploaded csv and script output +--Drop flagged rows from G-frive +--Manually asses Qualitu of Discr tagged mappings + + +--Detect codes with no mapping in ICD10 +with no_map_by_icd10 as (SELECT distinct r.*,cc.* +FROM refresh_lookup_done r +left JOIN devv5.concept c +on trim(lower(r.icd_code)) = trim(lower(c.concept_code)) +and c.vocabulary_id ='ICD10' + LEFT JOIN devv5.concept_relationship cr + on c.concept_id = cr.concept_id_1 + and cr.relationship_id in ( 'Maps to','Maps to value') + and cr.invalid_reason is null + left JOIN devv5.concept cc + on cr.concept_id_2 = cc.concept_id + and cr.invalid_reason is null + and cr.relationship_id in ( 'Maps to','Maps to value') +where cc.concept_id is null) +, +to_be_dropped as ( + SELECT distinct b.id, + b.icd_code, + b.icd_name, + b.repl_by_relationship, + b.repl_by_id, + b.repl_by_code, + b.repl_by_name, + b.repl_by_domain, + b.repl_by_vocabulary, + case when a.icd_code is null then 'drop' else null end as flag -- drop rows where mapping will come from ICD10 + from no_map_by_icd10 a + RIGHT JOIN refresh_lookup_done b + on a.id=b.id/*a.icd_code = b.icd_code + and a.repl_by_id = b.repl_by_id*/ +) +, +discr as ( +SELECT distinct aa.*, + case when aa.icd_code=r.icd_code and r.repl_by_id<>aa.repl_by_id and aa.repl_by_relationship=r.repl_by_relationship then 'discr' else null end as dicrep --detect rows where possible micctargeting occur (when code exists in several ICD10 like vocabs) +FROM to_be_dropped aa +LEFT JOIN dev_icd10cm.refresh_lookup_done r +ON aa.icd_code=r.icd_code + and aa.repl_by_relationship=r.repl_by_relationship +order by aa.id) +SELECT id, + icd_code, + repl_by_id, + flag, + string_agg(distinct dicrep,'X') as dicrep, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary + + +FROM discr +group by id, + icd_code, + icd_name, + repl_by_relationship, + repl_by_id, + repl_by_code, + repl_by_name, + repl_by_domain, + repl_by_vocabulary, + flag +order by id +; \ No newline at end of file diff --git a/ICD10GM/manual_work/readme.md b/ICD10GM/manual_work/readme.md index bc0f27490..ea1e8e47c 100644 --- a/ICD10GM/manual_work/readme.md +++ b/ICD10GM/manual_work/readme.md @@ -19,7 +19,7 @@ ORDER BY vocabulary_id, concept_code, invalid_reason, valid_start_date, valid_en DELETE FROM concept_manual WHERE concept_code NOT IN (SELECT concept_code FROM sources.icd10gm); ``` -6.3.Extract the [respective csv file](https://drive.google.com/file/d/1oPJtaUuhhU7uDSQ6y2QwwFwmps_rRm5x/view?usp=sharing) into the concept_relationship_manual table. The file was generated using the query: +6.3.Extract the [respective csv file](https://drive.google.com/file/d/1UaM9OEGXj7D5xvJHKj-JHiG1cGxX3Jm6/view?usp=sharing) into the concept_relationship_manual table. The file was generated using the query: ```sql SELECT concept_code_1, concept_code_2, @@ -64,4 +64,4 @@ Preserve a manual table with 'Is a' relationships, but change 'Is a' to 'Maps to - decimal symbol: '.' - quote escape: with backslash \ - quote always: FALSE -- NULL string: empty \ No newline at end of file +- NULL string: empty diff --git a/KCD7/load_stage.sql b/KCD7/load_stage.sql index 4bafa758b..775ea4d92 100644 --- a/KCD7/load_stage.sql +++ b/KCD7/load_stage.sql @@ -195,13 +195,14 @@ FROM ( AND cs1.vocabulary_id = 'KCD7' JOIN concept c2 ON c2.concept_code = crs.concept_code_2 AND c2.vocabulary_id = crs.vocabulary_id_2 - AND c2.vocabulary_id = 'SNOMED' + --AND c2.vocabulary_id = 'SNOMED' WHERE crs.relationship_id = 'Maps to' AND crs.invalid_reason IS NULL ) i WHERE i.concept_code = cs.concept_code AND cs.vocabulary_id = 'KCD7'; + --11. If domain_id is empty we use previous and next domain_id UPDATE concept_stage c SET domain_id = rd.domain_id @@ -239,7 +240,7 @@ WHERE rd.concept_code = c.concept_code AND c.vocabulary_id = 'KCD7' AND c.domain_id IS NULL; - +; --11.1 Detect and Update misclassified domains to Condition UPDATE concept_stage c SET domain_id = 'Condition' From 9ccf25560c2332bf6d96a7ba783cbc4dd61231c5 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Tue, 23 Aug 2022 15:18:05 +0300 Subject: [PATCH 15/21] KCD7: crm_changes.sql is modified (Use the patch AFTER GENERIC UPDATE TP DETECT INNACURATE VALUE MAPPING SHOULD BE INVALIDATED) --- KCD7/manual_work/crm_changes.sql | 54 +++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/KCD7/manual_work/crm_changes.sql b/KCD7/manual_work/crm_changes.sql index 37630dd16..0461748bc 100644 --- a/KCD7/manual_work/crm_changes.sql +++ b/KCD7/manual_work/crm_changes.sql @@ -114,4 +114,56 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula ) ; -SELECT * FROM concept_relationship_manual; +-- 23/08/2022 patch used after generic to detect Values to be invalidated +INSERT INTO concept_relationship_manual(vocabulary_id_1, concept_code_1, relationship_id, valid_start_date, invalid_reason, valid_end_date, vocabulary_id_2, concept_code_2) +SELECT +distinct + con.vocabulary_id as vocabulary_id_1, + con.concept_code as concept_code_1, + crm.relationship_id, + crm.valid_start_date, + 'D' as invalid_reason, + current_date as valid_end_date, + con2.vocabulary_id as vocabulary_id_2, + con2.concept_code as concept_code_2 +FROM concept_relationship crm +JOIN concept con +on con.concept_id=crm.concept_id_1 +and con.vocabulary_id='KCD7' +and crm.invalid_reason is null + and crm.relationship_id ilike 'Maps%' +JOIN concept con2 +on crm.concept_id_2=con2.concept_id +where exists(SELECT 1 + from concept c + JOIN concept_relationship cr + on cr.concept_id_1 = c.concept_id + and cr.relationship_id = 'Maps to value' + and cr.invalid_reason is null + and crm.concept_id_1=c.concept_id + and c.vocabulary_id='KCD7' + ) +and not exists(SELECT 1 + from devv5.concept c1 + JOIN devv5.concept_relationship cr1 + on cr1.concept_id_1 = c1.concept_id + and cr1.relationship_id = 'Maps to value' + and cr1.invalid_reason is null + and con.concept_code=c1.concept_code + and 'ICD10'=c1.vocabulary_id + ) +and crm.relationship_id ='Maps to value' + and (con.concept_code, --the same source_code is mapped + con2.concept_code, --to the same concept_code + con.vocabulary_id, + con2.vocabulary_id, --of the same vocabulary + crm.relationship_id) --with the same relationship + NOT IN (SELECT concept_code_1, + concept_code_2, + vocabulary_id_1, + vocabulary_id_2, + relationship_id FROM concept_relationship_manual) +; + + + From 694a6d87be601e4595c6db36569aa563d1d1025d Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Thu, 25 Aug 2022 16:04:14 +0300 Subject: [PATCH 16/21] readme changes --- ICD10GM/manual_work/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ICD10GM/manual_work/readme.md b/ICD10GM/manual_work/readme.md index ea1e8e47c..f87163fb7 100644 --- a/ICD10GM/manual_work/readme.md +++ b/ICD10GM/manual_work/readme.md @@ -1,6 +1,6 @@ ### STEP 6 of the refresh: work with manual staging tables (skip this step if implementing on the Pallas vocabulary server) - -6.1.Extract the [respective csv file](https://drive.google.com/file/d/1ZjYCykojpUyxljZ4v1Qs3Yz72TiXWvKC/view?usp=sharing) into the concept_manual table. The file was generated using the query: +Detected newly codes should be uploaded to concept manual (name translation via Google translate) +6.1.Extract the [respective csv file](https://docs.google.com/spreadsheets/d/1nk66P2seuEcXrdcWxAzJePko6RgJ5_5CJHynIZKU7Xo/edit#gid=1634989798) into the concept_manual table. The file was generated using the query: ```sql SELECT concept_name, domain_id, From 9623d35ebb0b111827a208043f02acc1805c4238 Mon Sep 17 00:00:00 2001 From: "Vlad.korsik@odysseusinc.com" Date: Thu, 25 Aug 2022 23:20:23 +0300 Subject: [PATCH 17/21] ICD9CM Load stage update (commented /not delted ros with SNOMED as taret for DOmain detection) --- ICD9CM/load_stage.sql | 4 ++-- ICD9CM/manual_work/crm_changes.sql | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ICD9CM/load_stage.sql b/ICD9CM/load_stage.sql index b5499045d..5d9a0dc66 100644 --- a/ICD9CM/load_stage.sql +++ b/ICD9CM/load_stage.sql @@ -317,7 +317,7 @@ FROM ( AND cs1.vocabulary_id = 'ICD9CM' JOIN concept c2 ON c2.concept_code = crs.concept_code_2 AND c2.vocabulary_id = crs.vocabulary_id_2 - AND c2.vocabulary_id = 'SNOMED' + --AND c2.vocabulary_id = 'SNOMED' WHERE crs.relationship_id = 'Maps to' AND crs.invalid_reason IS NULL @@ -343,7 +343,7 @@ FROM ( JOIN concept c1 ON c1.concept_id = cr.concept_id_1 AND c1.vocabulary_id = 'ICD9CM' JOIN concept c2 ON c2.concept_id = cr.concept_id_2 - AND c2.vocabulary_id = 'SNOMED' + -- AND c2.vocabulary_id = 'SNOMED' JOIN concept_stage cs1 ON cs1.concept_code = c1.concept_code AND cs1.vocabulary_id = c1.vocabulary_id WHERE cr.relationship_id = 'Maps to' diff --git a/ICD9CM/manual_work/crm_changes.sql b/ICD9CM/manual_work/crm_changes.sql index 6c0918239..aa0d9ec98 100644 --- a/ICD9CM/manual_work/crm_changes.sql +++ b/ICD9CM/manual_work/crm_changes.sql @@ -76,7 +76,8 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order -- activate mapping, that became valid again UPDATE concept_relationship_manual crm SET invalid_reason = null, - valid_end_date = to_date('20991231','yyyymmdd') + valid_end_date = to_date('20991231','yyyymmdd'), + valid_start_date = current_date --SELECT * FROM concept_relationship_manual crm --use this SELECT for QA WHERE invalid_reason = 'D' -- activate only deprecated mappings From 83302073dbe2b7b7d8f6f6ac2ca53ed9f2ea08ad Mon Sep 17 00:00:00 2001 From: Alexander <32357692+Alexdavv@users.noreply.github.com> Date: Fri, 26 Aug 2022 14:10:43 +0300 Subject: [PATCH 18/21] Update load_stage.sql --- ICD10GM/load_stage.sql | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ICD10GM/load_stage.sql b/ICD10GM/load_stage.sql index 6eb561706..7131d087a 100644 --- a/ICD10GM/load_stage.sql +++ b/ICD10GM/load_stage.sql @@ -62,11 +62,7 @@ LEFT JOIN concept c ON c.concept_code = g.concept_code AND c.vocabulary_id = 'ICD10' AND c.concept_class_id NOT LIKE '%Chapter%'; --- 4.0 Per readme file Manual Cleaun-Up -DELETE FROM concept_manual -WHERE concept_code NOT IN (SELECT concept_code FROM sources.icd10gm); - ---4.1 Append concept corrections -- COVID concepts added and English translation +--4 Append concept corrections -- COVID concepts added and English translation DO $_$ BEGIN PERFORM VOCABULARY_PACK.ProcessManualConcepts(); From e6e64485fe04a5f96be22c697201ae22e48bbbd8 Mon Sep 17 00:00:00 2001 From: Alexander <32357692+Alexdavv@users.noreply.github.com> Date: Fri, 26 Aug 2022 14:11:08 +0300 Subject: [PATCH 19/21] Update load_stage.sql --- ICD10GM/load_stage.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ICD10GM/load_stage.sql b/ICD10GM/load_stage.sql index 7131d087a..2727436fb 100644 --- a/ICD10GM/load_stage.sql +++ b/ICD10GM/load_stage.sql @@ -62,7 +62,7 @@ LEFT JOIN concept c ON c.concept_code = g.concept_code AND c.vocabulary_id = 'ICD10' AND c.concept_class_id NOT LIKE '%Chapter%'; ---4 Append concept corrections -- COVID concepts added and English translation +--4. Append concept corrections -- COVID concepts added and English translation DO $_$ BEGIN PERFORM VOCABULARY_PACK.ProcessManualConcepts(); From 6d2864e8ddeb7be4d2a3499831647cc12c0b23ef Mon Sep 17 00:00:00 2001 From: Timur Date: Fri, 26 Aug 2022 16:22:51 +0300 Subject: [PATCH 20/21] ICD10CN refactoring --- ICD10CN/load_stage.sql | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ICD10CN/load_stage.sql b/ICD10CN/load_stage.sql index 0eb7cfd7a..a4f9f6a4d 100644 --- a/ICD10CN/load_stage.sql +++ b/ICD10CN/load_stage.sql @@ -384,10 +384,10 @@ FROM ( AND c2.concept_class_id = 'ICDO Condition' AND c2.concept_code = c1.concept_code || '-NULL' --Commented since we allow fuzzy match uphill for this iteration - -- where substring (c.concept_code from 6 for 1) = '0' --Exact match to ICDO is MXXXX0/X - + --where substring (c.concept_code from 6 for 1) = '0' --Exact match to ICDO is MXXXX0/X + UNION ALL - + SELECT DISTINCT cs.concept_code, FIRST_VALUE(c.concept_id) OVER ( PARTITION BY cs.concept_code ORDER BY LENGTH(c.concept_code) DESC --Longest matching code for best results @@ -410,13 +410,13 @@ JOIN concept_relationship r ON r.concept_id_1 = i.concept_id 'Maps to value' ) JOIN concept c ON c.concept_id = r.concept_id_2 - -where NOT EXISTS (SELECT 1 - from concept_relationship_stage crs - where crs.concept_code_1 = i.concept_code - and crs.invalid_reason is null - and crs.relationship_id in ('Maps to') - ); +WHERE NOT EXISTS ( + SELECT 1 + FROM concept_relationship_stage crs + WHERE crs.concept_code_1 = i.concept_code + AND crs.invalid_reason IS NULL + AND crs.relationship_id = 'Maps to' + ); --From mapping target UPDATE concept_stage cs @@ -505,7 +505,6 @@ BEGIN PERFORM VOCABULARY_PACK.DeprecateWrongMAPSTO(); END $_$; - --16. Add "subsumes" relationship between concepts where the concept_code is like of another -- Although 'Is a' relations exist, it is done to differentiate between "true" source-provided hierarchy and convenient "jump" links we build now INSERT INTO concept_relationship_stage ( From 11ab2ea57da3fb8cc68b626903e11f70b5ccc796 Mon Sep 17 00:00:00 2001 From: Timur Date: Mon, 29 Aug 2022 12:53:46 +0300 Subject: [PATCH 21/21] refactoring --- ICD9CM/load_stage.sql | 2 +- KCD7/load_stage.sql | 21 +++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/ICD9CM/load_stage.sql b/ICD9CM/load_stage.sql index 5d9a0dc66..4f27be404 100644 --- a/ICD9CM/load_stage.sql +++ b/ICD9CM/load_stage.sql @@ -343,7 +343,7 @@ FROM ( JOIN concept c1 ON c1.concept_id = cr.concept_id_1 AND c1.vocabulary_id = 'ICD9CM' JOIN concept c2 ON c2.concept_id = cr.concept_id_2 - -- AND c2.vocabulary_id = 'SNOMED' + --AND c2.vocabulary_id = 'SNOMED' JOIN concept_stage cs1 ON cs1.concept_code = c1.concept_code AND cs1.vocabulary_id = c1.vocabulary_id WHERE cr.relationship_id = 'Maps to' diff --git a/KCD7/load_stage.sql b/KCD7/load_stage.sql index 775ea4d92..351705b75 100644 --- a/KCD7/load_stage.sql +++ b/KCD7/load_stage.sql @@ -118,9 +118,11 @@ JOIN concept c ON c.concept_code = cs.concept_code AND c.vocabulary_id = 'ICD10' JOIN concept_relationship cr ON cr.concept_id_1 = c.concept_id AND cr.invalid_reason IS NULL -JOIN concept c2 ON c2.concept_id = cr.concept_id_2 -and cr.relationship_id IN ('Maps to', 'Maps to value') -; + AND cr.relationship_id IN ( + 'Maps to', + 'Maps to value' + ) +JOIN concept c2 ON c2.concept_id = cr.concept_id_2; --6. Add "Subsumes" relationship between concepts where the concept_code is like of another CREATE INDEX IF NOT EXISTS trgm_idx ON concept_stage USING GIN (concept_code devv5.gin_trgm_ops); --for LIKE patterns @@ -202,7 +204,6 @@ FROM ( WHERE i.concept_code = cs.concept_code AND cs.vocabulary_id = 'KCD7'; - --11. If domain_id is empty we use previous and next domain_id UPDATE concept_stage c SET domain_id = rd.domain_id @@ -218,10 +219,10 @@ FROM ( AND next_domain IS NOT NULL THEN CASE WHEN prev_domain < next_domain - THEN substring((prev_domain || '/' || next_domain),1,20) -- essential due to length constraints - ELSE substring((next_domain || '/' || prev_domain),1,20) -- essential due to length constraints + THEN LEFT((prev_domain || '/' || next_domain),20) -- essential due to length constraints + ELSE LEFT((next_domain || '/' || prev_domain),20) -- essential due to length constraints END -- prev and next domain are not same and not null both, with order by name - ELSE coalesce(prev_domain, next_domain, 'Condition') + ELSE COALESCE(prev_domain, next_domain, 'Condition') END END domain_id FROM ( @@ -240,13 +241,10 @@ WHERE rd.concept_code = c.concept_code AND c.vocabulary_id = 'KCD7' AND c.domain_id IS NULL; -; --11.1 Detect and Update misclassified domains to Condition UPDATE concept_stage c SET domain_id = 'Condition' -where domain_id ='Condition/Observatio' -; - +WHERE domain_id = 'Condition/Observatio'; --12. Manual name fix UPDATE concept_stage @@ -257,5 +255,4 @@ UPDATE concept_synonym_stage SET synonym_name = '코로나바이러스질환2019[코로나-19]' WHERE synonym_concept_code = 'U07.1' and language_concept_id=4175771; - -- At the end, the three tables concept_stage, concept_relationship_stage and concept_synonym_stage should be ready to be fed into the generic_update.sql script \ No newline at end of file