In [None]:
from src.data_structure import VisitData, Record, RecordKey, record_key_from_abbreviation
from src.database import read_sql

In [None]:
data = VisitData()

In [None]:
df = read_sql('''
WITH observations AS (
    SELECT * FROM student_data.vwd_observations
    -- LIMIT 100000
),
cases as (
    SELECT f.visit_oid, f.startingvisitoid
    FROM student_data.vwd_diagnosen AS d JOIN student_data.vwd_faelle AS f ON d.visit_oid = f.visit_oid
    WHERE d."isPrimary" = FALSE
    AND d."Code" ilike('j18%')
)

SELECT
    cases.startingvisitoid AS visit_id,
    observations."Value" AS value,
    observations."StartDT_minutes_since_start" AS minutes_since_start,
    observations."FindingAbbr" AS finding
FROM cases JOIN observations ON cases.visit_oid = observations.patientvisit_oid
ORDER BY minutes_since_start
''')

In [None]:
def process_row(visit_id, value, minutes_since_start, finding):
    key = record_key_from_abbreviation(finding)
    if not key: return
    record = Record(minutes_since_start, value)
    data.append(visit_id, key, record)

_ = [process_row(*row) for row in zip(*(df[col] for col in df))]

In [None]:
print(len(data.visits))

In [None]:
# investigation results
df = read_sql("""WITH ir AS (
	SELECT * 
	FROM student_data.vwd_investigationresults 
	-- LIMIT 1000000
	)
SELECT f.visit_oid, s.description, ir.findingabbreviation, ir.resultvalue, ir.resultdatetime_minutes_since_start
FROM ir
JOIN 
	student_data.vwd_service AS s
ON 
	ir.service_oid = s.objectid
JOIN
	student_data.vwd_faelle AS f
ON
	ir.patientvisit_oid = f.visit_oid
JOIN
	student_data.vwd_diagnosen AS d
ON
	f.visit_oid = d.visit_oid
WHERE
	d."Code" LIKE 'J18%'
AND
	d."isPrimary" = TRUE
AND
	f.visit_oid = f.startingvisitoid
AND
	(
		ir.findingabbreviation LIKE 'Hämoglobin'
	OR
		ir.findingabbreviation LIKE 'Thrombozyten'
	OR
		ir.findingabbreviation LIKE 'HB POC'
	OR
		ir.findingabbreviation LIKE 'Leukozyten3'
	OR
		ir.findingabbreviation LIKE 'GFR'
	OR
		ir.findingabbreviation LIKE 'Natrium'
	OR
		ir.findingabbreviation LIKE 'Kalium'
	OR
		ir.findingabbreviation LIKE 'C-reakt.Pro.'
	OR
		ir.findingabbreviation LIKE 'GLUCP2'
	OR
		ir.findingabbreviation LIKE 'PTT1-1'
	OR
		ir.findingabbreviation LIKE 'BE(B) POC'
	OR
		ir.findingabbreviation LIKE 'HCO3-std P'
	OR
		ir.findingabbreviation LIKE 'O2-SAT POC'
	)		
--LIMIT 10000
;
""")
df