/
load_stage.sql
2625 lines (2539 loc) · 134 KB
/
load_stage.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**************************************************************************
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Authors: Eduard Korchmar, Alexander Davydov, Timur Vakhitov, Christian Reich, Oleg Zhuk
* Date: 2022
**************************************************************************/
--1. Extract each component (International, UK & US) versions to properly date the combined source in next step
CREATE OR REPLACE VIEW module_date AS
SELECT DISTINCT ON (m.id) m.moduleid,
TO_CHAR(m.sourceeffectivetime, 'yyyy-mm-dd') AS version
FROM sources.der2_ssrefset_moduledependency_merged m
WHERE m.active = 1
AND m.referencedcomponentid = 900000000000012004
AND --Model component module; Synthetic target, contains source version in each row
m.moduleid IN (
900000000000207008, --Core (international) module
999000011000000103, --UK edition
731000124108 --US edition
)
ORDER BY m.id,
m.effectivetime DESC;
--2. Update latest_update field to new date
--Use the latest of the release dates of all source versions. Usually, the UK is the latest.
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.SetLatestUpdate(
pVocabularyName => 'SNOMED',
pVocabularyDate => (SELECT vocabulary_date FROM sources.sct2_concept_full_merged LIMIT 1),
pVocabularyVersion =>
(SELECT version FROM module_date where moduleid = 900000000000207008) || ' SNOMED CT International Edition; ' ||
(SELECT version FROM module_date where moduleid = 731000124108) || ' SNOMED CT US Edition; ' ||
(SELECT version FROM module_date where moduleid = 999000011000000103) || ' SNOMED CT UK Edition',
pVocabularyDevSchema => 'DEV_SNOMED'
);
END $_$;
--3. Truncate all working tables
TRUNCATE TABLE concept_stage;
TRUNCATE TABLE concept_relationship_stage;
TRUNCATE TABLE concept_synonym_stage;
TRUNCATE TABLE pack_content_stage;
TRUNCATE TABLE drug_strength_stage;
--4. Create core version of SNOMED without concept_id, domain_id, concept_class_id, standard_concept
INSERT INTO concept_stage (
concept_name,
vocabulary_id,
concept_code,
valid_start_date,
valid_end_date,
invalid_reason
)
SELECT sct2.concept_name,
'SNOMED' AS vocabulary_id,
sct2.concept_code,
TO_DATE(effectivestart, 'yyyymmdd') AS valid_start_date,
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date,
NULL AS invalid_reason
FROM (
SELECT vocabulary_pack.CutConceptName(d.term) AS concept_name,
d.conceptid::TEXT AS concept_code,
c.active,
FIRST_VALUE(c.effectivetime) OVER (
PARTITION BY c.id ORDER BY c.active DESC, c.effectivetime --if there ever were active versions of the concept, take the earliest one
) AS effectivestart,
ROW_NUMBER() OVER (
PARTITION BY d.conceptid
-- Order of preference:
-- Active descriptions first, characterised as Preferred Synonym, prefer SNOMED Int, then US, then UK, then take the latest term
ORDER BY c.active DESC,
d.active DESC,
l.active DESC,
CASE l.acceptabilityid
WHEN 900000000000548007
THEN 1 --Preferred
WHEN 900000000000549004
THEN 2 --Acceptable
ELSE 99
END ASC,
CASE d.typeid
WHEN 900000000000013009
THEN 1 --Synonym (PT)
WHEN 900000000000003001
THEN 2 --Fully specified name
ELSE 99
END ASC,
CASE l.refsetid
WHEN 900000000000509007
THEN 1 --US English language reference set
WHEN 900000000000508004
THEN 2 --UK English language reference set
ELSE 99 -- Various UK specific refsets
END,
CASE l.source_file_id
WHEN 'INT'
THEN 1 -- International release
WHEN 'US'
THEN 2 -- SNOMED US
WHEN 'GB_DE'
THEN 3 -- SNOMED UK Drug extension, updated more often
WHEN 'UK'
THEN 4 -- SNOMED UK
ELSE 99
END ASC,
l.effectivetime DESC,
d.term
) AS rn
FROM sources.sct2_concept_full_merged c
JOIN sources.sct2_desc_full_merged d ON d.conceptid = c.id
JOIN sources.der2_crefset_language_merged l ON l.referencedcomponentid = d.id
) sct2
WHERE sct2.rn = 1;
--4.1 For concepts with latest entry in sct2_concept having active = 0, preserve invalid_reason and valid_end date
WITH inactive
AS (
SELECT c.id,
MAX(c.effectivetime) AS effectiveend
FROM sources.sct2_concept_full_merged c
LEFT JOIN sources.sct2_concept_full_merged c2 ON --ignore all entries before latest one with active = 1
c2.active = 1
AND c.id = c2.id
AND c.effectivetime < c2.effectivetime
WHERE c2.id IS NULL
AND c.active = 0
GROUP BY c.id
)
UPDATE concept_stage cs
SET invalid_reason = 'D',
valid_end_date = TO_DATE(i.effectiveend, 'yyyymmdd')
FROM inactive i
WHERE i.id::TEXT = cs.concept_code;
--4.2 Some concepts were never alive; we don't know what their valid_start_date would be, so we set it to default minimum
UPDATE concept_stage
SET valid_start_date = TO_DATE('19700101', 'yyyymmdd')
WHERE valid_start_date = valid_end_date;
--4.3 Fix concept names: change vitamin B>12< deficiency to vitamin B-12 deficiency; NAD(P)^+^ to NAD(P)+
UPDATE concept_stage
SET concept_name = vocabulary_pack.CutConceptName(TRANSLATE(concept_name, '>,<,^', '-'))
WHERE (
(
concept_name LIKE '%>%'
AND concept_name LIKE '%<%'
)
OR (concept_name LIKE '%^%^%')
)
AND LENGTH(concept_name) > 5;
--5. Update concept_class_id from extracted hierarchy tag information and terms ordered by description table precedence
UPDATE concept_stage cs
SET concept_class_id = i.concept_class_id
FROM (
WITH tmp_concept_class AS (
SELECT *
FROM (
SELECT concept_code,
f7, -- SNOMED hierarchy tag
ROW_NUMBER() OVER (
PARTITION BY concept_code
-- order of precedence: active, by class relevance
-- Might be redundant, as normally concepts will never have more than 1 hierarchy tag, but we have concurrent sources, so this may prevent problems and breaks nothing
ORDER BY active DESC,
rnb,
CASE f7
WHEN 'disorder'
THEN 1
WHEN 'finding'
THEN 2
WHEN 'procedure'
THEN 3
WHEN 'regime/therapy'
THEN 4
WHEN 'qualifier value'
THEN 5
WHEN 'contextual qualifier'
THEN 6
WHEN 'body structure'
THEN 7
WHEN 'cell'
THEN 8
WHEN 'cell structure'
THEN 9
WHEN 'external anatomical feature'
THEN 10
WHEN 'organ component'
THEN 11
WHEN 'organism'
THEN 12
WHEN 'living organism'
THEN 13
WHEN 'physical object'
THEN 14
WHEN 'physical device'
THEN 15
WHEN 'physical force'
THEN 16
WHEN 'occupation'
THEN 17
WHEN 'person'
THEN 18
WHEN 'ethnic group'
THEN 19
WHEN 'religion/philosophy'
THEN 20
WHEN 'life style'
THEN 21
WHEN 'social concept'
THEN 22
WHEN 'racial group'
THEN 23
WHEN 'event'
THEN 24
WHEN 'life event - finding'
THEN 25
WHEN 'product'
THEN 26
WHEN 'substance'
THEN 27
WHEN 'assessment scale'
THEN 28
WHEN 'tumor staging'
THEN 29
WHEN 'staging scale'
THEN 30
WHEN 'specimen'
THEN 31
WHEN 'special concept'
THEN 32
WHEN 'observable entity'
THEN 33
WHEN 'namespace concept'
THEN 34
WHEN 'morphologic abnormality'
THEN 35
WHEN 'foundation metadata concept'
THEN 36
WHEN 'core metadata concept'
THEN 37
WHEN 'metadata'
THEN 38
WHEN 'environment'
THEN 39
WHEN 'geographic location'
THEN 40
WHEN 'situation'
THEN 41
WHEN 'situation'
THEN 42
WHEN 'context-dependent category'
THEN 43
WHEN 'biological function'
THEN 44
WHEN 'attribute'
THEN 45
WHEN 'administrative concept'
THEN 46
WHEN 'record artifact'
THEN 47
WHEN 'navigational concept'
THEN 48
WHEN 'inactive concept'
THEN 49
WHEN 'linkage concept'
THEN 50
WHEN 'link assertion'
THEN 51
WHEN 'environment / location'
THEN 52
ELSE 99
END
) AS rnc
FROM (
SELECT concept_code,
active,
SUBSTRING(term, '\(([^(]+)\)$') AS f7,
rna AS rnb -- row number in sct2_desc_full_merged
FROM (
SELECT c.concept_code,
d.term,
d.active,
ROW_NUMBER() OVER (
PARTITION BY c.concept_code ORDER
BY
d.active DESC, -- active ones
d.effectivetime DESC -- latest active ones
) rna -- row number in sct2_desc_full_merged
FROM concept_stage c
JOIN sources.sct2_desc_full_merged d ON d.conceptid::TEXT = c.concept_code
WHERE
c.vocabulary_id = 'SNOMED' AND
d.typeid = 900000000000003001 -- only Fully Specified Names
) AS s0
) AS s1
) AS s2
WHERE rnc = 1
)
SELECT concept_code,
CASE
WHEN F7 = 'disorder'
THEN 'Clinical Finding'
WHEN F7 = 'procedure'
THEN 'Procedure'
WHEN F7 = 'finding'
THEN 'Clinical Finding'
WHEN F7 = 'organism'
THEN 'Organism'
WHEN F7 = 'body structure'
THEN 'Body Structure'
WHEN F7 = 'substance'
THEN 'Substance'
WHEN F7 = 'product'
THEN 'Pharma/Biol Product'
WHEN F7 = 'event'
THEN 'Event'
WHEN F7 = 'qualifier value'
THEN 'Qualifier Value'
WHEN F7 = 'observable entity'
THEN 'Observable Entity'
WHEN F7 = 'situation'
THEN 'Context-dependent'
WHEN F7 = 'occupation'
THEN 'Social Context'
WHEN F7 = 'regime/therapy'
THEN 'Procedure'
WHEN F7 = 'morphologic abnormality'
THEN 'Morph Abnormality'
WHEN F7 = 'physical object'
THEN 'Physical Object'
WHEN F7 = 'specimen'
THEN 'Specimen'
WHEN F7 = 'environment'
THEN 'Location'
WHEN F7 = 'environment / location'
THEN 'Location'
WHEN F7 = 'context-dependent category'
THEN 'Context-dependent'
WHEN F7 = 'attribute'
THEN 'Attribute'
WHEN F7 = 'linkage concept'
THEN 'Linkage Concept'
WHEN F7 = 'assessment scale'
THEN 'Staging / Scales'
WHEN F7 = 'person'
THEN 'Social Context'
WHEN F7 = 'cell'
THEN 'Body Structure'
WHEN F7 = 'geographic location'
THEN 'Location'
WHEN F7 = 'cell structure'
THEN 'Body Structure'
WHEN F7 = 'ethnic group'
THEN 'Social Context'
WHEN F7 = 'tumor staging'
THEN 'Staging / Scales'
WHEN F7 = 'religion/philosophy'
THEN 'Social Context'
WHEN F7 = 'record artifact'
THEN 'Record Artifact'
WHEN F7 = 'physical force'
THEN 'Physical Force'
WHEN F7 = 'foundation metadata concept'
THEN 'Model Comp'
WHEN F7 = 'namespace concept'
THEN 'Namespace Concept'
WHEN F7 = 'administrative concept'
THEN 'Admin Concept'
WHEN F7 = 'biological function'
THEN 'Biological Function'
WHEN F7 = 'living organism'
THEN 'Organism'
WHEN F7 = 'life style'
THEN 'Social Context'
WHEN F7 = 'contextual qualifier'
THEN 'Qualifier Value'
WHEN F7 = 'staging scale'
THEN 'Staging / Scales'
WHEN F7 = 'life event - finding'
THEN 'Event'
WHEN F7 = 'social concept'
THEN 'Social Context'
WHEN F7 = 'core metadata concept'
THEN 'Model Comp'
WHEN F7 = 'special concept'
THEN 'Special Concept'
WHEN F7 = 'racial group'
THEN 'Social Context'
WHEN F7 = 'therapy'
THEN 'Procedure'
WHEN F7 = 'external anatomical feature'
THEN 'Body Structure'
WHEN F7 = 'organ component'
THEN 'Body Structure'
WHEN F7 = 'physical device'
THEN 'Physical Object'
WHEN F7 = 'linkage concept'
THEN 'Linkage Concept'
WHEN F7 = 'link assertion'
THEN 'Linkage Assertion'
WHEN F7 = 'metadata'
THEN 'Model Comp'
WHEN F7 = 'navigational concept'
THEN 'Navi Concept'
WHEN F7 = 'inactive concept'
THEN 'Inactive Concept'
--added 20190109 (AVOF-1369)
WHEN F7 = 'administration method'
THEN 'Qualifier Value'
WHEN F7 = 'basic dose form'
THEN 'Dose Form'
WHEN F7 = 'clinical drug'
THEN 'Clinical Drug'
WHEN F7 = 'disposition'
THEN 'Disposition'
WHEN F7 = 'dose form'
THEN 'Dose Form'
WHEN F7 = 'intended site'
THEN 'Qualifier Value'
WHEN F7 = 'medicinal product'
THEN 'Pharma/Biol Product'
WHEN F7 = 'medicinal product form'
THEN 'Clinical Drug Form'
WHEN F7 = 'number'
THEN 'Qualifier Value'
WHEN F7 = 'release characteristic'
THEN 'Qualifier Value'
WHEN F7 = 'role'
THEN 'Qualifier Value'
WHEN F7 = 'state of matter'
THEN 'Qualifier Value'
WHEN F7 = 'transformation'
THEN 'Qualifier Value'
WHEN F7 = 'unit of presentation'
THEN 'Qualifier Value'
--Metadata concepts
WHEN F7 = 'OWL metadata concept'
THEN 'Model Comp'
--Specific drug qualifiers
WHEN F7 = 'supplier'
THEN 'Qualifier Value'
WHEN F7 = 'product name'
THEN 'Qualifier Value'
ELSE 'Undefined'
END AS concept_class_id
FROM tmp_concept_class
) i
WHERE i.concept_code = cs.concept_code;
--Assign top SNOMED concept
UPDATE concept_stage
SET concept_class_id = 'Model Comp'
WHERE concept_code = '138875005'
AND vocabulary_id = 'SNOMED';
--Deprecated Concepts with broken fully specified name
UPDATE concept_stage
SET concept_class_id = 'Procedure'
WHERE vocabulary_id = 'SNOMED'
AND concept_code IN (
'712611000000106', --Assessment using childhood health assessment questionnaire
'193371000000106' --Fluoroscopic angioplasty of carotid artery
);
--6. --Some old deprecated concepts from UK drug extension module never have had correct FSN, so we can't get explicit hierarchy tag and keep them as Context-dependent class
UPDATE concept_stage c
SET concept_class_id = 'Context-dependent'
WHERE c.concept_class_id = 'Undefined'
AND c.invalid_reason IS NOT NULL
AND --Make sure we only affect old concepts and not mask new classes additions
EXISTS (
SELECT 1
FROM sources.sct2_concept_full_merged m
WHERE m.id::TEXT = c.concept_code
AND m.moduleid = 999000011000001104 --SNOMED CT United Kingdom drug extension module
);
--7. Get all the synonyms from UMLS ('PT', 'PTGB', 'SY', 'SYGB', 'MTH_PT', 'FN', 'MTH_SY', 'SB') into concept_synonym_stage
INSERT INTO concept_synonym_stage (
synonym_concept_code,
synonym_vocabulary_id,
synonym_name,
language_concept_id
)
SELECT DISTINCT m.code,
'SNOMED',
vocabulary_pack.CutConceptSynonymName(m.str),
4180186 -- English
FROM sources.mrconso m
JOIN concept_stage s ON s.concept_code = m.code
WHERE m.sab = 'SNOMEDCT_US'
AND m.tty IN (
'PT',
'PTGB',
'SY',
'SYGB',
'MTH_PT',
'FN',
'MTH_SY',
'SB'
);
--8. Add active synonyms from merged descriptions list
INSERT INTO concept_synonym_stage (
synonym_concept_code,
synonym_vocabulary_id,
synonym_name,
language_concept_id
)
SELECT DISTINCT d.conceptid,
'SNOMED',
vocabulary_pack.CutConceptSynonymName(d.term),
4180186 -- English
FROM (
SELECT m.id,
m.conceptid::TEXT,
m.term,
FIRST_VALUE(active) OVER (
PARTITION BY id ORDER BY effectivetime DESC
) AS active_status
FROM sources.sct2_desc_full_merged m
) d
JOIN concept_stage s ON s.concept_code = d.conceptid
WHERE d.active_status = 1
AND NOT EXISTS (
SELECT 1
FROM concept_synonym_stage css_int
WHERE css_int.synonym_concept_code = d.conceptid
AND css_int.synonym_name = vocabulary_pack.CutConceptSynonymName(d.term)
);
--9. Fill concept_relationship_stage from merged SNOMED source
INSERT INTO concept_relationship_stage (
concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date,
invalid_reason
)
WITH tmp_rel AS (
-- get relationships from latest records that are active
SELECT sourceid::TEXT,
destinationid::TEXT,
REPLACE(term, ' (attribute)', '') AS term
FROM (
SELECT r.sourceid,
r.destinationid,
d.term,
ROW_NUMBER() OVER (
PARTITION BY r.id ORDER BY r.effectivetime DESC,
d.id DESC -- fix for AVOF-650
) AS rn, -- get the latest in a sequence of relationships, to decide whether it is still active
r.active
FROM sources.sct2_rela_full_merged r
JOIN sources.sct2_desc_full_merged d ON d.conceptid = r.typeid
) AS s0
WHERE rn = 1
AND active = 1
AND sourceid IS NOT NULL
AND destinationid IS NOT NULL
AND term <> 'PBCL flag true'
UNION ALL
--add relationships from concept to module
SELECT cs.concept_code::TEXT,
moduleid::TEXT,
'Has Module' AS term
FROM sources.sct2_concept_full_merged c
JOIN concept_stage cs ON cs.concept_code = c.id::TEXT
AND cs.vocabulary_id = 'SNOMED'
WHERE c.moduleid IN (
900000000000207008, --Core (international) module
999000011000000103, --UK edition
731000124108, --US edition
999000011000001104, --SNOMED CT United Kingdom drug extension module
900000000000012004, --SNOMED CT model component
999000021000001108 --SNOMED CT United Kingdom drug extension reference set module
)
UNION ALL
--add relationship from concept to status
SELECT st.concept_code::TEXT,
st.statusid::TEXT,
'Has status'
FROM (
SELECT cs.concept_code,
statusid::TEXT,
ROW_NUMBER() OVER (
PARTITION BY id ORDER BY TO_DATE(effectivetime, 'YYYYMMDD') DESC
) rn
FROM sources.sct2_concept_full_merged c
JOIN concept_stage cs ON cs.concept_code = c.id::TEXT
AND cs.vocabulary_id = 'SNOMED'
WHERE c.statusid IN (
900000000000073002, --Defined
900000000000074008 --Primitive
)
) st
WHERE st.rn = 1
)
SELECT concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date,
invalid_reason
FROM (
--convert SNOMED to OMOP-type relationship_id
--TODO: this deserves a massive overhaul using raw typeid instead of extracted terms; however, it works in current state with no reported issues
SELECT DISTINCT sourceid AS concept_code_1,
destinationid AS concept_code_2,
'SNOMED' AS vocabulary_id_1,
'SNOMED' AS vocabulary_id_2,
CASE
WHEN term = 'Access'
THEN 'Has access'
WHEN term = 'Associated aetiologic finding'
THEN 'Has etiology'
WHEN term = 'After'
THEN 'Followed by'
WHEN term = 'Approach'
THEN 'Has surgical appr' -- looks like old version
WHEN term = 'Associated finding'
THEN 'Has asso finding'
WHEN term = 'Associated morphology'
THEN 'Has asso morph'
WHEN term = 'Associated procedure'
THEN 'Has asso proc'
WHEN term = 'Associated with'
THEN 'Finding asso with'
WHEN term = 'AW'
THEN 'Finding asso with'
WHEN term = 'Causative agent'
THEN 'Has causative agent'
WHEN term = 'Clinical course'
THEN 'Has clinical course'
WHEN term = 'Component'
THEN 'Has component'
WHEN term = 'Direct device'
THEN 'Has dir device'
WHEN term = 'Direct morphology'
THEN 'Has dir morph'
WHEN term = 'Direct substance'
THEN 'Has dir subst'
WHEN term = 'Due to'
THEN 'Has due to'
WHEN term = 'Episodicity'
THEN 'Has episodicity'
WHEN term = 'Extent'
THEN 'Has extent'
WHEN term = 'Finding context'
THEN 'Has finding context'
WHEN term = 'Finding informer'
THEN 'Using finding inform'
WHEN term = 'Finding method'
THEN 'Using finding method'
WHEN term = 'Finding site'
THEN 'Has finding site'
WHEN term = 'Has active ingredient'
THEN 'Has active ing'
WHEN term = 'Has definitional manifestation'
THEN 'Has manifestation'
WHEN term = 'Has dose form'
THEN 'Has dose form'
WHEN term = 'Has focus'
THEN 'Has focus'
WHEN term = 'Has interpretation'
THEN 'Has interpretation'
WHEN term = 'Has measured component'
THEN 'Has meas component'
WHEN term = 'Has specimen'
THEN 'Has specimen'
WHEN term = 'Stage'
THEN 'Has stage'
WHEN term = 'Indirect device'
THEN 'Has indir device'
WHEN term = 'Indirect morphology'
THEN 'Has indir morph'
WHEN term = 'Instrumentation'
THEN 'Using device' -- looks like an old version
WHEN term IN (
'Intent',
'Has intent'
)
THEN 'Has intent'
WHEN term = 'Interprets'
THEN 'Has interprets'
WHEN term = 'Is a'
THEN 'Is a'
WHEN term = 'Laterality'
THEN 'Has laterality'
WHEN term = 'Measurement method'
THEN 'Has measurement'
WHEN term = 'Measurement Method'
THEN 'Has measurement' -- looks like misspelling
WHEN term = 'Method'
THEN 'Has method'
WHEN term = 'Morphology'
THEN 'Has asso morph' -- changed to the same thing as 'Has Morphology'
WHEN term = 'Occurrence'
THEN 'Has occurrence'
WHEN term = 'Onset'
THEN 'Has clinical course' -- looks like old version
WHEN term = 'Part of'
THEN 'Part of'
WHEN term = 'Pathological process'
THEN 'Has pathology'
WHEN term = 'Pathological process (qualifier value)'
THEN 'Has pathology'
WHEN term = 'Priority'
THEN 'Has priority'
WHEN term = 'Procedure context'
THEN 'Has proc context'
WHEN term = 'Procedure device'
THEN 'Has proc device'
WHEN term = 'Procedure morphology'
THEN 'Has proc morph'
WHEN term = 'Procedure site - Direct'
THEN 'Has dir proc site'
WHEN term = 'Procedure site - Indirect'
THEN 'Has indir proc site'
WHEN term = 'Procedure site'
THEN 'Has proc site'
WHEN term = 'Property'
THEN 'Has property'
WHEN term = 'Recipient category'
THEN 'Has recipient cat'
WHEN term = 'Revision status'
THEN 'Has revision status'
WHEN term = 'Route of administration'
THEN 'Has route of admin'
WHEN term = 'Route of administration - attribute'
THEN 'Has route of admin'
WHEN term = 'Scale type'
THEN 'Has scale type'
WHEN term = 'Severity'
THEN 'Has severity'
WHEN term = 'Specimen procedure'
THEN 'Has specimen proc'
WHEN term = 'Specimen source identity'
THEN 'Has specimen source'
WHEN term = 'Specimen source morphology'
THEN 'Has specimen morph'
WHEN term = 'Specimen source topography'
THEN 'Has specimen topo'
WHEN term = 'Specimen substance'
THEN 'Has specimen subst'
WHEN term = 'Subject relationship context'
THEN 'Has relat context'
WHEN term = 'Surgical approach'
THEN 'Has surgical appr'
WHEN term = 'Temporal context'
THEN 'Has temporal context'
WHEN term = 'Temporally follows'
THEN 'Occurs after' -- looks like an old version
WHEN term = 'Time aspect'
THEN 'Has time aspect'
WHEN term = 'Using access device'
THEN 'Using acc device'
WHEN term = 'Using device'
THEN 'Using device'
WHEN term = 'Using energy'
THEN 'Using energy'
WHEN term = 'Using substance'
THEN 'Using subst'
WHEN term = 'Following'
THEN 'Followed by'
WHEN term = 'VMP non-availability indicator'
THEN 'Has non-avail ind'
WHEN term = 'Has ARP'
THEN 'Has ARP'
WHEN term = 'Has VRP'
THEN 'Has VRP'
WHEN term = 'Has trade family group'
THEN 'Has trade family grp'
WHEN term = 'Flavour'
THEN 'Has flavor'
WHEN term = 'Discontinued indicator'
THEN 'Has disc indicator'
WHEN term = 'VRP prescribing status'
THEN 'VRP has prescr stat'
WHEN term = 'Has specific active ingredient'
THEN 'Has spec active ing'
WHEN term = 'Has excipient'
THEN 'Has excipient'
WHEN term = 'Has basis of strength substance'
THEN 'Has basis str subst'
WHEN term = 'Has VMP'
THEN 'Has VMP'
WHEN term = 'Has AMP'
THEN 'Has AMP'
WHEN term = 'Has dispensed dose form'
THEN 'Has disp dose form'
WHEN term = 'VMP prescribing status'
THEN 'VMP has prescr stat'
WHEN term = 'Legal category'
THEN 'Has legal category'
WHEN term = 'Caused by'
THEN 'Caused by'
WHEN term = 'Precondition'
THEN 'Has precondition'
WHEN term = 'Inherent location'
THEN 'Has inherent loc'
WHEN term = 'Technique'
THEN 'Has technique'
WHEN term = 'Relative to part of'
THEN 'Has relative part'
WHEN term = 'Process output'
THEN 'Has process output'
WHEN term = 'Property type'
THEN 'Has property type'
WHEN term = 'Inheres in'
THEN 'Inheres in'
WHEN term = 'Direct site'
THEN 'Has direct site'
WHEN term = 'Characterizes'
THEN 'Characterizes'
--added 20171116
WHEN term = 'During'
THEN 'During'
WHEN term = 'Has BoSS'
THEN 'Has basis str subst' -- use existing relationship
WHEN term = 'Has manufactured dose form'
THEN 'Has dose form' -- use existing relationship
WHEN term = 'Has presentation strength denominator unit'
THEN 'Has denominator unit'
WHEN term = 'Has presentation strength denominator value'
THEN 'Has denomin value'
WHEN term = 'Has presentation strength numerator unit'
THEN 'Has numerator unit'
WHEN term = 'Has presentation strength numerator value'
THEN 'Has numerator value'
--added 20180205
WHEN term = 'Has basic dose form'
THEN 'Has basic dose form'
WHEN term = 'Has disposition'
THEN 'Has disposition'
WHEN term = 'Has dose form administration method'
THEN 'Has admin method'
WHEN term = 'Has dose form intended site'
THEN 'Has intended site'
WHEN term = 'Has dose form release characteristic'
THEN 'Has release charact'
WHEN term = 'Has dose form transformation'
THEN 'Has transformation'
WHEN term = 'Has state of matter'
THEN 'Has state of matter'
WHEN term = 'Temporally related to'
THEN 'Temp related to'
--added 20180622
WHEN term = 'Has NHS dm+d basis of strength substance'
THEN 'Has basis str subst'
WHEN term = 'Has unit of administration'
THEN 'Has unit of admin'
WHEN term = 'Has precise active ingredient'
THEN 'Has prec ingredient'
WHEN term = 'Has unit of presentation'
THEN 'Has unit of presen'
WHEN term = 'Has concentration strength numerator value'
THEN 'Has conc num val'
WHEN term = 'Has concentration strength denominator value'
THEN 'Has conc denom val'
WHEN term = 'Has concentration strength denominator unit'
THEN 'Has conc denom unit'
WHEN term = 'Has concentration strength numerator unit'
THEN 'Has conc num unit'
WHEN term = 'Is modification of'
THEN 'Modification of'
WHEN term = 'Count of base of active ingredient'
THEN 'Has count of ing'
--20190204
WHEN term = 'Has realization'
THEN 'Has pathology'
WHEN term = 'Plays role'
THEN 'Plays role'
--20190823
WHEN term = 'Has NHS dm+d (dictionary of medicines and devices) VMP (Virtual Medicinal Product) route of administration'
THEN 'Has route'
WHEN term = 'Has NHS dm+d (dictionary of medicines and devices) controlled drug category'
THEN 'Has CD category'
WHEN term = 'Has NHS dm+d (dictionary of medicines and devices) VMP (Virtual Medicinal Product) ontology form and route'
THEN 'Has ontological form'
WHEN term = 'VMP combination product indicator'
THEN 'Has combi prod ind'
WHEN term = 'Has NHS dm+d (dictionary of medicines and devices) dose form indicator'
THEN 'Has form continuity'
--20200312
WHEN term = 'Has NHS dm+d (dictionary of medicines and devices) additional monitoring indicator'
THEN 'Has add monitor ind'
WHEN term = 'Has NHS dm+d (dictionary of medicines and devices) AMP (actual medicinal product) availability restriction indicator'
THEN 'Has AMP restr ind'
WHEN term = 'Has NHS dm+d parallel import indicator'
THEN 'Paral imprt ind'
WHEN term = 'Has NHS dm+d freeness indicator'
THEN 'Has free indicator'
WHEN term = 'Units'
THEN 'Has unit'
WHEN term = 'Process duration'
THEN 'Has proc duration'
--20201023
WHEN term = 'Relative to'
THEN 'Relative to'
WHEN term = 'Count of active ingredient'
THEN 'Has count of act ing'
WHEN term = 'Has product characteristic'
THEN 'Has prod character'
WHEN term = 'Has ingredient characteristic'
THEN 'Has prod character'
WHEN term = 'Has surface characteristic'
THEN 'Surf character of'
WHEN term = 'Has device intended site'
THEN 'Has dev intend site'
WHEN term = 'Has device characteristic'
THEN 'Has prod character'
WHEN term = 'Has compositional material'
THEN 'Has comp material'
WHEN term = 'Has filling'
THEN 'Has filling'
--January 2022
WHEN term = 'Has coating material'
THEN 'Has coating material'
WHEN term = 'Has absorbability'
THEN 'Has absorbability'
WHEN term = 'Process extends to'
THEN 'Process extends to'
WHEN term = 'Has ingredient qualitative strength'
THEN 'Has strength'
WHEN term = 'Has surface texture'
THEN 'Has surface texture'
WHEN term = 'Is sterile'
THEN 'Is sterile'
WHEN term = 'Has target population'
THEN 'Has targ population'
WHEN term = 'Has Module'
THEN 'Has Module'
WHEN term = 'Has status'
THEN 'Has status'
ELSE term --'non-existing'
END AS relationship_id,
(
SELECT latest_update
FROM vocabulary
WHERE vocabulary_id = 'SNOMED'
) AS valid_start_date,
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date,
NULL AS invalid_reason
FROM tmp_rel
) sn
WHERE NOT EXISTS (
SELECT 1
FROM concept_relationship_stage crs
WHERE crs.concept_code_1 = sn.concept_code_1
AND crs.concept_code_2 = sn.concept_code_2
AND crs.relationship_id = sn.relationship_id
);
--check for non-existing relationships
ALTER TABLE concept_relationship_stage ADD CONSTRAINT tmp_constraint_relid FOREIGN KEY (relationship_id) REFERENCES relationship (relationship_id);
ALTER TABLE concept_relationship_stage DROP CONSTRAINT tmp_constraint_relid;
--SELECT relationship_id FROM concept_relationship_stage EXCEPT SELECT relationship_id FROM relationship;
--10. Add replacement relationships. They are handled in a different SNOMED table
INSERT INTO concept_relationship_stage (
concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date,
invalid_reason
)
SELECT DISTINCT sn.concept_code_1,
sn.concept_code_2,
'SNOMED',
'SNOMED',
sn.relationship_id,
COALESCE(cs.valid_end_date, (
SELECT latest_update