In [None]:
import fiber
from fiber.cohort import Cohort
from fiber.condition import Patient, MRNs
from fiber.condition import Diagnosis
from fiber.condition import Measurement, Encounter, Drug
from fiber.storage import yaml as fiberyaml
import pandas as pd
import pyarrow.parquet as pq
import numpy as np
import os
from functools import reduce 
from fiber.utils import Timer
import pickle

In [None]:
#Cases with disease development cohort extraction

#Load dataframe HT cohort
Cases_HT_beforeOnset2013= pd.read_pickle("./Cases_HT_beforeOnset2013.pkl")
Cases_HT_beforeOnset2013 = Cases_HT_beforeOnset2013.rename(columns={"onset_year": "HT_onset_year"})
Cases_HT_beforeOnset2013 = Cases_HT_beforeOnset2013.rename(columns={"age_in_days": "age_in_days_HT_onset"})

def df_to_cohort(df):
    mrns = list(df.index.values)
    condition = MRNs(mrns)
    return Cohort(condition)

cohort = df_to_cohort(Cases_HT_beforeOnset2013)

#renal diseases
condition = (Diagnosis('N17.%', 'ICD-10')|
              Diagnosis('N18.%', 'ICD-10')|
              Diagnosis('N19.%', 'ICD-10')|
              Diagnosis('I12.%', 'ICD-10')| #hypertensive kidney
              Diagnosis('584.%', 'ICD-9')|
              Diagnosis('585.%', 'ICD-9')|
              Diagnosis('403.%', 'ICD-9')| #hypertensive kidney
              Diagnosis('586.%', 'ICD-9'))
  
Cases_HT_before2013_disease = cohort.get(condition)

Cases_HT_before2013_disease.set_index('medical_record_number', inplace = True)

### create new dataframe with only disease MRN and date_of_birth
Cases_HT_beforeOnset2013_DOB = Cases_HT_beforeOnset2013[['date_of_birth_actual','HT_onset_year', 'age_in_days_HT_onset']]
Cases_HT_before2013_disease_merged_DOB = Cases_HT_before2013_disease.merge(Cases_HT_beforeOnset2013_DOB, left_index=True, right_index=True, how="inner")

#add onset year
def add_onset_year(df_main):
    df_main['age_in_days_delta'] = pd.to_timedelta(df_main['age_in_days'],'d')
    df_main['onset_year'] = (df_main['date_of_birth_actual'] + df_main['age_in_days_delta']).dt.year
    return df_main

Cases_HT_before2013_disease_onset = add_onset_year(Cases_HT_before2013_disease_merged_DOB)
Cases_HT_before2013_disease_onset_unique = Cases_HT_before2013_disease_onset.groupby("medical_record_number").count()



In [None]:
Cases_HT_before2013_disease_onset

In [None]:
Cases_HT_before2013_disease_onset_unique

In [None]:
#save as pkl files
Cases_HT_before2013_disease_onset.to_pickle("home/kiwitn01/master_thesis_hypertension-complications/Cohort_Extraction/Complications/Renal_Diseases/All/Cases_HT_renal_disease_all.pkl")
Cases_HT_before2013_disease_onset_unique.to_pickle("home/kiwitn01/master_thesis_hypertension-complications/Cohort_Extraction/Complications/Renal_Diseases/All/Cases_HT_renal_disease_all_unique.pkl")


In [None]:
#save as pkl files
Cases_HT_before2013_disease_onset = pd.read_pickle("/home/kiwitn01/master_thesis_hypertension-complications/Cohort_Extraction/Complications/Renal_Diseases/All/Cases_HT_renal_disease_all.pkl")
Cases_HT_before2013_disease_onset_unique= pd.read_pickle("/home/kiwitn01/master_thesis_hypertension-complications/Cohort_Extraction/Complications/Renal_Diseases/All/Cases_HT_renal_disease_all_unique.pkl")


In [None]:
Cases_HT_before2013_disease_onset_unique

In [None]:
#Controls#

In [None]:
# Control Creation

#Choose file to pick from
Cases_complication = pd.read_pickle("./Cases_HT_renal_disease_all_unique.pkl")

#Create file with only MRNs for the cases
Cases_complication_OnlyMRNs = Cases_complication.drop(['age_in_days', 'context_name', 'context_diagnosis_code','date_of_birth_actual','age_in_days_delta', 'onset_year', 'HT_onset_year','age_in_days_HT_onset'], axis = 1) 

#Merge All Cases with only CR_MRNS
Controls_without_complication = Cases_HT_beforeOnset2013.merge(Cases_complication_OnlyMRNs, on ='medical_record_number', how='outer', indicator=True)

#Create new dataframe where MRNs that appear in both dataframes before are ignored
Controls = Controls_without_complication[Controls_without_complication._merge != 'both']


In [None]:
Controls

In [None]:
#name file correctly and save cohort
Controls.to_pickle("./Controls_No_Renal_Diseases_Ever.pkl")
