# **Managing the Co-Morbidity Data**

The package and usage of it comes from [https://jackwasey.github.io/icd/](https://jackwasey.github.io/icd/).

## Preamble

In [76]:
.libPaths()

In [77]:
#library(devtools)
#install_github("jackwasey/icd")

## Load Data

In [78]:
# Load the data
df_comorb <- read.csv(file = '/Users/mathiasschindler/Library/Mobile Documents/com~apple~CloudDocs/BSE/_T2--CML2/Project1/Data/MIMIC_diagnoses.csv')

# Keep only relevant columns
df_comorb <- subset(df_comorb, select = c('SUBJECT_ID', 'ICD9_CODE') )

# Check data type
class(df_comorb)

# Print head
head(df_comorb)

Unnamed: 0_level_0,SUBJECT_ID,ICD9_CODE
Unnamed: 0_level_1,<int>,<chr>
1,256,53240
2,256,41071
3,256,53560
4,256,40390
5,256,5859
6,256,4186


## **Load `icd`-Library**

In [79]:
library(icd)

## **Transform Comorbidities Information**

### Co-Morbidities One-Hot Encoded

In [80]:
# Transform using `idb` library
comorb_charlson <- comorbid_charlson(df_comorb)
df_comorb_charlson <- as.data.frame(comorb_charlson)
head(df_comorb_charlson)

Unnamed: 0_level_0,MI,CHF,PVD,Stroke,Dementia,Pulmonary,Rheumatic,PUD,LiverMild,DM,DMcx,Paralysis,Renal,Cancer,LiverSevere,Mets,HIV
Unnamed: 0_level_1,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
256,True,False,True,False,False,True,False,True,False,False,False,False,True,False,False,False,False
512,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
768,True,True,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False
1024,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False
66560,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False
1280,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [81]:
# Convert logicals to numeric
cols <- sapply(df_comorb_charlson, is.logical)
df_comorb_charlson[,cols] <- lapply(df_comorb_charlson[,cols], as.numeric)
head(df_comorb_charlson)

Unnamed: 0_level_0,MI,CHF,PVD,Stroke,Dementia,Pulmonary,Rheumatic,PUD,LiverMild,DM,DMcx,Paralysis,Renal,Cancer,LiverSevere,Mets,HIV
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
256,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0
512,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
768,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
1024,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
66560,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1280,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Charlston Score

In [82]:
# Calculcate charlson score
charlson_score <- charlson(df_comorb)

# Save as dataframe
df_charlson_score <- as.data.frame(charlson_score)
head(df_charlson_score, 15)

Unnamed: 0_level_0,charlson_score
Unnamed: 0_level_1,<dbl>
256,6
512,0
768,5
1024,3
66560,2
1280,1
67072,4
1792,2
2048,0
2304,1


### Joining Both DataFrames

In [83]:
# Investigate unique SUBJECT_IDs in both dataframes
head(unique(rownames(df_comorb_charlson)), 15)
head(unique(rownames(df_charlson_score)), 15)

In [84]:
# Add rownames (pythonic: index) as column
library(tibble)
df_comorb_charlson_idx <- tibble::rownames_to_column(df_comorb_charlson, "subject_id")
df_charlson_score_idx <- tibble::rownames_to_column(df_charlson_score, "subject_id")

head(df_comorb_charlson_idx) 
head(df_charlson_score_idx) 

Unnamed: 0_level_0,subject_id,MI,CHF,PVD,Stroke,Dementia,Pulmonary,Rheumatic,PUD,LiverMild,DM,DMcx,Paralysis,Renal,Cancer,LiverSevere,Mets,HIV
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,256,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0
2,512,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,768,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,1024,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
5,66560,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
6,1280,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Unnamed: 0_level_0,subject_id,charlson_score
Unnamed: 0_level_1,<chr>,<dbl>
1,256,6
2,512,0
3,768,5
4,1024,3
5,66560,2
6,1280,1


In [85]:
comorb_final = merge(df_comorb_charlson_idx, df_charlson_score_idx, by = "subject_id")
head(comorb_final, 15)

Unnamed: 0_level_0,subject_id,MI,CHF,PVD,Stroke,Dementia,Pulmonary,Rheumatic,PUD,LiverMild,DM,DMcx,Paralysis,Renal,Cancer,LiverSevere,Mets,HIV,charlson_score
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,100,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,1000,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,5
4,10000,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,6
5,10001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,10002,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,10003,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,10004,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1
9,10005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10,10006,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,4


## Export `.csv`

In [86]:
path = "/Users/mathiasschindler/Library/Mobile Documents/com~apple~CloudDocs/BSE/_T2--CML2/Project1/Data/out/"
filename = "comorb_final"
date = format(Sys.time(), "%Y-%m-%d-%H-%M", sep = "")
sep = "-"
filetype = ".csv"

In [87]:
finalfilename = paste(path, filename, sep, date, filetype, sep = "")
finalfilename

In [88]:
write.csv(comorb_final, finalfilename, row.names = TRUE)