Skip to content

Commit

Permalink
Merge pull request #658 from OHDSI/ICDs_to_SNOMED_refresh_05_2022
Browse files Browse the repository at this point in the history
ICDs scripts update
  • Loading branch information
hardhouse committed Aug 29, 2022
2 parents 3194a2a + 11ab2ea commit 7099634
Show file tree
Hide file tree
Showing 28 changed files with 1,603 additions and 52 deletions.
34 changes: 34 additions & 0 deletions CIM10/manual_work/create_manual_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

/**************************************************************************
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts
* Date: 2021
**************************************************************************/

DROP TABLE IF EXISTS refresh_lookup_done;
TRUNCATE TABLE refresh_lookup_done;
CREATE TABLE refresh_lookup_done (
id serial primary key ,
icd_code VARCHAR,
icd_name VARCHAR,
repl_by_relationship VARCHAR,
repl_by_id INT,
repl_by_code VARCHAR,
repl_by_name VARCHAR,
repl_by_domain VARCHAR,
repl_by_vocabulary VARCHAR);

SELECT*FROM refresh_lookup_done;
135 changes: 135 additions & 0 deletions CIM10/manual_work/crm_changes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/**************************************************************************
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts
* Date: 2021
**************************************************************************/
-- create current date backup of concept_relationship_manual table
DO
$body$
DECLARE
update text;
BEGIN
SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD')
INTO update;
EXECUTE format('create table %I as select * from concept_relationship_manual',
'concept_relationship_manual_backup_' || update);

END
$body$;

--create current date backup of concept_manual table
DO
$body$
DECLARE
update text;
BEGIN
SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD')
INTO update;
EXECUTE format('create table %I as select * from concept_manual',
'concept_manual_backup_' || update);

END
$body$;
--Backup without new NON-translated codes - concept_manual_backup_2022_08_16
--SELECT*FROM concept_manual_backup_2022_08_16;
SELECT distinct *
FROM concept_manual;



TRUNCATE TABLE dev_cim10.concept_relationship_manual;
INSERT INTO dev_cim10.concept_relationship_manual
SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_05_18;

-- deprecate previous inaccurate mapping
UPDATE concept_relationship_manual crm
SET invalid_reason = 'D',
valid_end_date = current_date

--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA
WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order to preserve the original deprecation date

AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh

AND NOT EXISTS (SELECT 1 --don't deprecate mapping if the same exists in the current manual file
FROM refresh_lookup_done rl
WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped
AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code
AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary
AND rl.repl_by_relationship = crm.relationship_id --with the same relationship
)
;

-- activate mapping, that became valid again
UPDATE concept_relationship_manual crm
SET invalid_reason = null,
valid_end_date = to_date('20991231','yyyymmdd')

--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA
WHERE invalid_reason = 'D' -- activate only deprecated mappings

AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh

AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file
FROM refresh_lookup_done rl
WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped
AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code
AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary
AND rl.repl_by_relationship = crm.relationship_id --with the same relationship
)
;

-- insert new mapping
with mapping AS -- select all new codes with their mappings from manual file
(
SELECT DISTINCT icd_code AS concept_code_1,
repl_by_code AS concept_code_2,
'CIM10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1
repl_by_vocabulary AS vocabulary_id_2,
repl_by_relationship AS relationship_id,
current_date AS valid_start_date, -- set the date of the refresh as valid_start_date
to_date('20991231','yyyymmdd') AS valid_end_date,
NULL AS invalid_reason -- make all new mappings valid
FROM refresh_lookup_done
WHERE repl_by_id != 0 -- select only codes with mapping to standard concepts
)
-- insert new mappings into concept_relationship_manual table
INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason)
(
SELECT concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date,
invalid_reason
FROM mapping m
-- don't insert codes with mapping if the same exists in the current manual file
WHERE (concept_code_1, --the same source_code is mapped
concept_code_2, --to the same concept_code
vocabulary_id_1,
vocabulary_id_2, --of the same vocabulary
relationship_id) --with the same relationship
NOT IN (SELECT concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id FROM concept_relationship_manual)
)
;

SELECT * FROM concept_relationship_manual;
77 changes: 77 additions & 0 deletions CIM10/manual_work/inter_icd10_integrity.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
--Run the ICD10
--Run and check ICD10CM
--1 Upload the refresh_lookup_done
--2 Run the script
--Compare number of rows in uploaded csv and script output
--Drop flagged rows from G-frive
--Manually asses Qualitu of Discr tagged mappings


--Detect codes with no mapping in ICD10
with no_map_by_icd10 as (SELECT distinct r.*,cc.*
FROM refresh_lookup_done r
left JOIN devv5.concept c
on trim(lower(r.icd_code)) = trim(lower(c.concept_code))
and c.vocabulary_id ='ICD10'
LEFT JOIN devv5.concept_relationship cr
on c.concept_id = cr.concept_id_1
and cr.relationship_id in ( 'Maps to','Maps to value')
and cr.invalid_reason is null
left JOIN devv5.concept cc
on cr.concept_id_2 = cc.concept_id
and cr.invalid_reason is null
and cr.relationship_id in ( 'Maps to','Maps to value')
where cc.concept_id is null)
,
to_be_dropped as (
SELECT distinct b.id,
b.icd_code,
b.icd_name,
b.repl_by_relationship,
b.repl_by_id,
b.repl_by_code,
b.repl_by_name,
b.repl_by_domain,
b.repl_by_vocabulary,
case when a.icd_code is null then 'drop' else null end as flag -- drop rows where mapping will come from ICD10
from no_map_by_icd10 a
RIGHT JOIN refresh_lookup_done b
on a.id=b.id/*a.icd_code = b.icd_code
and a.repl_by_id = b.repl_by_id*/
)
,
discr as (
SELECT distinct aa.*,
case when aa.icd_code=r.icd_code and r.repl_by_id<>aa.repl_by_id and aa.repl_by_relationship=r.repl_by_relationship then 'discr' else null end as dicrep --detect rows where possible micctargeting occur (when code exists in several ICD10 like vocabs)
FROM to_be_dropped aa
LEFT JOIN dev_icd10cm.refresh_lookup_done r
ON aa.icd_code=r.icd_code
and aa.repl_by_relationship=r.repl_by_relationship
order by aa.id)
SELECT id,
icd_code,
repl_by_id,
flag,
string_agg(distinct dicrep,'X') as dicrep,
icd_name,
repl_by_relationship,
repl_by_id,
repl_by_code,
repl_by_name,
repl_by_domain,
repl_by_vocabulary


FROM discr
group by id,
icd_code,
icd_name,
repl_by_relationship,
repl_by_id,
repl_by_code,
repl_by_name,
repl_by_domain,
repl_by_vocabulary,
flag
order by id
;
70 changes: 70 additions & 0 deletions CIM10/manual_work/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
### STEP 6 of the refresh: work with manual staging tables (skip this step if implementing on the Pallas vocabulary server)
6.1.Extract the [respective csv file](https://drive.google.com/file/d/1mwDnNYb7fWHLm7lgNHwoUyMfWuJ2GfLb/view?usp=sharing) into the concept_manual table. The file was generated using the query:
```sql
SELECT concept_name,
domain_id,
vocabulary_id,
concept_class_id,
standard_concept,
concept_code,
valid_start_date,
valid_end_date,
invalid_reason
FROM concept_manual
ORDER BY vocabulary_id, concept_code, invalid_reason, valid_start_date, valid_end_date, concept_name;
ADD new codes with translation here
```
6.2.Extract the [respective csv file](https://drive.google.com/file/d/1C9qVJwR369y9Jk02iS-qK45Gn5iGGRJr/view?usp=sharing) into the concept_synonym_manual table. The file was generated using the query:
```sql
SELECT synonym_name,
synonym_concept_code,
synonym_vocabulary_id,
language_concept_id
FROM concept_synonym_manual
ORDER BY synonym_vocabulary_id, synonym_concept_code, language_concept_id, synonym_name;`
```
6.3.Extract the [respective csv file](https://drive.google.com/drive/u/0/folders/1_nY1eDu0RfXmvaipJ1tEGZldnGuOwi2B) into the concept_relationship_manual table. The file was generated using the query:
```sql
SELECT concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date,
invalid_reason
FROM concept_relationship_manual
ORDER BY vocabulary_id_1, vocabulary_id_2, relationship_id, concept_code_1, concept_code_2, invalid_reason, valid_start_date, valid_end_date;
```
##### csv format:
- delimiter: ','
- encoding: 'UTF8'
- header: ON
- decimal symbol: '.'
- quote escape: with backslash \
- quote always: FALSE
- NULL string: empty

### STEP 8 of the refresh: solving problems which are difened during the first load_stage run
8.1. Run [mapping_refresh.sql]. Table refresh_lookup will be created. It contains the list with mappings to outdated, deprecated or updated Standard concepts, as well as automaticaly improved mapping.
8.2. Download this table and open it in spreadsheet editor. Columns icd_ represent ICD10CM concepts with uncertain mapping, columns current_ refer to mapping which currently exists in concept_relationship_stage and columns repl_by_ suggest automatically created mapping, the reason for concepts appearing in this table you can see in column reason (e.g., 'improve_map','without mapping').
8.3. Perform manual review and mapping. Note, if you think that current mapping is better than suggested replacement, delete rows with these concepts from Excel table. Add column repl_by_relationship and put there necessary relationship_id following the recommendations described below. Then, delete current_ and reason columns.
8.4. Save table as refresh_lookup_done.csv and upload it into your schema using script [create_manual_table.sql]
8.5. Run [manual_mapping_qa.sql] to check whether refresh mapping meets the ICD10CM logic
8.6. If everything is OK, deprecate old mappings for the ICD10CM codes of interest and add fresh mappings to the concept_relationship_manual using [crm_changes.sql]) script

### Recomanditions for relationship_ids
* **"Maps to"** is used for 1-to-1 FULL equivalent mapping only
* **"Maps to" + "Maps to value"** is used for for Observations and Measurements with results
* **"Is a"** is a temporary relationship used for this check only and applicable for 1-to-1 PARTIAL equivalent AND 1-to-many mappings.
Preserve a manual table with 'Is a' relationships, but change 'Is a' to 'Maps to' during the insertion into the concept_relatioship_manual (e.g. using CASE WHEN).

#### Required fields in a manual table
- icd_code VARHCAR,
- icd_name VARHCAR,
- repl_by_relationship VARCHAR,
- repl_by_id INT,
- repl_by_code VARCHAR,
- repl_by_name VARCHAR,
- repl_by_domain VARCHAR,
- repl_by_vocabulary VARCHAR
4 changes: 4 additions & 0 deletions ICD10/manual_work/create_manual_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
* Date: 2021
**************************************************************************/

DROP TABLE IF EXISTS refresh_lookup_done;
TRUNCATE TABLE refresh_lookup_done;
CREATE TABLE refresh_lookup_done (
icd_code VARCHAR,
icd_name VARCHAR,
Expand All @@ -27,3 +29,5 @@ repl_by_code VARCHAR,
repl_by_name VARCHAR,
repl_by_domain VARCHAR,
repl_by_vocabulary VARCHAR);

SELECT*FROM refresh_lookup_done;
26 changes: 25 additions & 1 deletion ICD10/manual_work/crm_changes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ $body$
END
$body$;

TRUNCATE TABLE dev_icd10.concept_relationship_manual;
INSERT INTO dev_icd10.concept_relationship_manual
SELECT*FROM dev_icd10.concept_relationship_manual_backup_2022_04_25;

-- deprecate previous inaccurate mapping
UPDATE concept_relationship_manual crm
SET invalid_reason = 'D',
Expand All @@ -49,6 +53,24 @@ WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order
)
;

-- activate mapping, that became valid again
UPDATE concept_relationship_manual crm
SET invalid_reason = null,
valid_end_date = to_date('20991231','yyyymmdd'),
valid_start_date =current_date

--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA
WHERE invalid_reason = 'D' -- activate only deprecated mappings

AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file
FROM refresh_lookup_done rl
WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped
AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code
AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary
AND rl.repl_by_relationship = crm.relationship_id --with the same relationship
)
;

-- insert new mapping
with mapping AS -- select all new codes with their mappings from manual file
(
Expand Down Expand Up @@ -87,4 +109,6 @@ INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabula
vocabulary_id_2,
relationship_id FROM concept_relationship_manual)
)
;
;

SELECT * FROM concept_relationship_manual;
6 changes: 6 additions & 0 deletions ICD10CM/load_stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,12 @@ FROM (
WHERE i.concept_code = cs.concept_code
AND cs.vocabulary_id = 'ICD10CM';

--TODO: check why the actual U* code limitation is not used.
--Only unassigned Emergency use codes (starting with U) don't have mappings to SNOMED, put Observation as closest meaning to Unknown domain
UPDATE concept_stage
SET domain_id = 'Observation'
WHERE domain_id IS NULL;

--13. Check for NULL in domain_id
ALTER TABLE concept_stage ALTER COLUMN domain_id SET NOT NULL;
ALTER TABLE concept_stage ALTER COLUMN domain_id DROP NOT NULL;
Expand Down
Loading

0 comments on commit 7099634

Please sign in to comment.