# Evaluate COVID status of patients to establish subgroup

In [None]:
import pandas as pd
import os

In [None]:
data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20220815'
lab_file_start = 'labo'

In [None]:
lab_files = [pd.read_csv(os.path.join(data_path, f), delimiter=';', encoding='utf-8', dtype=str)
             for f in os.listdir(data_path)
             if f.startswith(lab_file_start)]
lab_df = pd.concat(lab_files, ignore_index=True)

In [None]:
from preprocessing.geneva_stroke_unit_preprocessing.utils import create_ehr_case_identification_column

lab_df['case_admission_id'] = create_ehr_case_identification_column(lab_df)

In [None]:
lab_df.head()

In [None]:
accepted_materials = ['frot. nasophary.', 'frot. orophary.', 'LBA', 'aspir. bronch.', 'aspir. nasoph.']
positive_labels = ['POSITIF', 'PRESENT']

In [None]:
# find subset where SARS is in dosage_label
lab_df[(lab_df['dosage_label'].str.contains('SARS')) & (lab_df.material_label.isin(accepted_materials))].head()

Number of patients with positive Sars-Cov-2 test (anytime within same admission)

In [None]:
lab_df[(lab_df['dosage_label'].str.contains('SARS')) & (lab_df.material_label.isin(accepted_materials)) & (lab_df.value.isin(positive_labels))].case_admission_id.nunique()

In [None]:
# check number of patients with test value is number (after conversion to numeric)
lab_df['value_numeric'] = pd.to_numeric(lab_df['value'], errors='coerce')
lab_df[(lab_df['dosage_label'].str.contains('SARS')) & (lab_df.material_label.isin(accepted_materials)) & (lab_df.value_numeric.notna())].case_admission_id.nunique()

only within first 72h

In [None]:
datatime_format = '%d.%m.%Y %H:%M'
lab_df['delta_admission_to_sample'] = (pd.to_datetime(lab_df['sample_date'], format=datatime_format ) - pd.to_datetime(lab_df['begin_date'], format=datatime_format)).dt.total_seconds() / 3600

In [None]:
lab_df[(lab_df['dosage_label'].str.contains('SARS')) & (lab_df.material_label.isin(accepted_materials)) & (lab_df.value.isin(positive_labels)) & (lab_df.delta_admission_to_sample < 72)].case_admission_id.nunique()