/
generateCohort.sql
144 lines (117 loc) · 5 KB
/
generateCohort.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
@codesetQuery
SELECT event_id, person_id, start_date, end_date, op_start_date, op_end_date, visit_occurrence_id
INTO #qualified_events
FROM
(
select pe.event_id, pe.person_id, pe.start_date, pe.end_date, pe.op_start_date, pe.op_end_date, row_number() over (partition by pe.person_id order by pe.start_date @QualifiedEventSort) as ordinal, cast(pe.visit_occurrence_id as bigint) as visit_occurrence_id
FROM (@primaryEventsQuery) pe
@additionalCriteriaQuery
) QE
@QualifiedLimitFilter
;
--- Inclusion Rule Inserts
@inclusionCohortInserts
select event_id, person_id, start_date, end_date, op_start_date, op_end_date
into #included_events
FROM (
SELECT event_id, person_id, start_date, end_date, op_start_date, op_end_date, row_number() over (partition by person_id order by start_date @IncludedEventSort) as ordinal
from
(
select Q.event_id, Q.person_id, Q.start_date, Q.end_date, Q.op_start_date, Q.op_end_date, SUM(coalesce(POWER(cast(2 as bigint), I.inclusion_rule_id), 0)) as inclusion_rule_mask
from #qualified_events Q
LEFT JOIN #inclusion_events I on I.person_id = Q.person_id and I.event_id = Q.event_id
GROUP BY Q.event_id, Q.person_id, Q.start_date, Q.end_date, Q.op_start_date, Q.op_end_date
) MG -- matching groups
{@ruleTotal != 0}?{
-- the matching group with all bits set ( POWER(2,# of inclusion rules) - 1 = inclusion_rule_mask
WHERE (MG.inclusion_rule_mask = POWER(cast(2 as bigint),@ruleTotal)-1)
}
) Results
@ResultLimitFilter
;
@strategy_ends_temp_tables
-- generate cohort periods into #final_cohort
select person_id, start_date, end_date
INTO #cohort_rows
from ( -- first_ends
select F.person_id, F.start_date, F.end_date
FROM (
select I.event_id, I.person_id, I.start_date, CE.end_date, row_number() over (partition by I.person_id, I.event_id order by CE.end_date) as ordinal
from #included_events I
join ( -- cohort_ends
-- cohort exit dates
@cohort_end_unions
) CE on I.event_id = CE.event_id and I.person_id = CE.person_id and CE.end_date >= I.start_date
) F
WHERE F.ordinal = 1
) FE;
select person_id, min(start_date) as start_date, DATEADD(day,-1 * @eraconstructorpad, max(end_date)) as end_date
into #final_cohort
from (
select person_id, start_date, end_date, sum(is_start) over (partition by person_id order by start_date, is_start desc rows unbounded preceding) group_idx
from (
select person_id, start_date, end_date,
case when max(end_date) over (partition by person_id order by start_date rows between unbounded preceding and 1 preceding) >= start_date then 0 else 1 end is_start
from (
select person_id, start_date, DATEADD(day,@eraconstructorpad,end_date) as end_date
from #cohort_rows
) CR
) ST
) GR
group by person_id, group_idx;
DELETE FROM @target_database_schema.@target_cohort_table where @cohort_id_field_name = @target_cohort_id;
INSERT INTO @target_database_schema.@target_cohort_table (@cohort_id_field_name, subject_id, cohort_start_date, cohort_end_date)
@finalCohortQuery
;
{@generateStats != 0}?{
-- BEGIN: Censored Stats
delete from @results_database_schema.cohort_censor_stats where @cohort_id_field_name = @target_cohort_id;
@cohortCensoredStatsQuery
-- END: Censored Stats
}
{@generateStats != 0 & @ruleTotal != 0}?{
@inclusionRuleTable
-- Find the event that is the 'best match' per person.
-- the 'best match' is defined as the event that satisfies the most inclusion rules.
-- ties are solved by choosing the event that matches the earliest inclusion rule, and then earliest.
select q.person_id, q.event_id
into #best_events
from #qualified_events Q
join (
SELECT R.person_id, R.event_id, ROW_NUMBER() OVER (PARTITION BY R.person_id ORDER BY R.rule_count DESC,R.min_rule_id ASC, R.start_date ASC) AS rank_value
FROM (
SELECT Q.person_id, Q.event_id, COALESCE(COUNT(DISTINCT I.inclusion_rule_id), 0) AS rule_count, COALESCE(MIN(I.inclusion_rule_id), 0) AS min_rule_id, Q.start_date
FROM #qualified_events Q
LEFT JOIN #inclusion_events I ON q.person_id = i.person_id AND q.event_id = i.event_id
GROUP BY Q.person_id, Q.event_id, Q.start_date
) R
) ranked on Q.person_id = ranked.person_id and Q.event_id = ranked.event_id
WHERE ranked.rank_value = 1
;
-- modes of generation: (the same tables store the results for the different modes, identified by the mode_id column)
-- 0: all events
-- 1: best event
-- BEGIN: Inclusion Impact Analysis - event
@inclusionImpactAnalysisByEventQuery
-- END: Inclusion Impact Analysis - event
-- BEGIN: Inclusion Impact Analysis - person
@inclusionImpactAnalysisByPersonQuery
-- END: Inclusion Impact Analysis - person
TRUNCATE TABLE #best_events;
DROP TABLE #best_events;
TRUNCATE TABLE #inclusion_rules;
DROP TABLE #inclusion_rules;
}
@strategy_ends_cleanup
TRUNCATE TABLE #cohort_rows;
DROP TABLE #cohort_rows;
TRUNCATE TABLE #final_cohort;
DROP TABLE #final_cohort;
TRUNCATE TABLE #inclusion_events;
DROP TABLE #inclusion_events;
TRUNCATE TABLE #qualified_events;
DROP TABLE #qualified_events;
TRUNCATE TABLE #included_events;
DROP TABLE #included_events;
TRUNCATE TABLE #Codesets;
DROP TABLE #Codesets;