In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

path = 'data/'

In [2]:
# import the redcap data
# data\Deidentified REDCap Survey Data 2024.08.26.csv
redcap = pd.read_csv(path + 'Deidentified REDCap Survey Data 2024.08.26.csv')
redcap['visit_date'] = pd.to_datetime(redcap['visit_date']).dt.strftime('%Y-%m-%d')
temp = redcap.copy()
temp['visit_date'] = pd.to_datetime(temp['visit_date'])
temp = temp[['encrypt_mrn', 'ed_screened', 'visit_date', 'new_diagnosis', 'pcp_followup']]
temp = temp.drop_duplicates(subset='encrypt_mrn', keep='first')

In [3]:
# import boold pressure data
# data\Historical Blood Pressure Data File 2024.08.23.csv
bp = pd.read_csv(path + 'Historical Blood Pressure Data File 2024.08.23.csv')
bp['bp_date'] = pd.to_datetime(bp['bp_date'])
bp = bp.rename(columns={'bp_date': 'visit_date'})
bp['bp_difference'] = bp['sbp'] - bp['dbp']

In [4]:
# group the blood pressure data by encrypt_mrn and visit_date
bp_group = bp.groupby(['encrypt_mrn', 'visit_date']).agg(
    max_sbp=('sbp', 'max'),
    min_dbp=('dbp', 'min'),
    max_diff=('bp_difference', 'max'),
    avg_sbp=('sbp', 'mean'),
    avg_dbp=('dbp', 'mean'),
    avg_diff=('bp_difference', 'mean'),
    test_nums=('encrypt_mrn','count')
).reset_index()

# keep two decimal places
bp_group = bp_group.round({
    'max_sbp': 2,
    'min_dbp': 2,
    'max_diff': 2,
    'avg_sbp': 2,
    'avg_dbp': 2,
    'avg_diff': 2
})

bp_group['visit_date'] = pd.to_datetime(bp_group['visit_date'])

In [5]:
# merge the redcap data with the blood pressure data
merged = pd.merge(temp, bp_group, on=['encrypt_mrn', 'visit_date'], how='left')

In [6]:
merged.to_csv('processed_data/bp_data.csv', index=False)