# na_interpolation_linear

In [1]:
df=read.table("p015036_sepsis_sofa_qsofa_with_nan.psv", sep="|", header=T) 

In [2]:
df

Resp,SBP,Platelets,Bilirubin_total,Creatinine,ICULOS,SepsisLabel,qSOFA,SOFA
,,,,,1,0,0,0
20.0,,,,,2,0,0,0
19.0,,,,,3,0,0,0
24.0,,,2.3,3.4,4,0,1,1
25.0,,,,,5,0,1,0
26.0,,,,,6,0,1,0
28.5,,187.0,2.7,3.7,7,0,1,1
24.25,,,,,8,0,1,0
15.0,,,,,9,0,0,0
25.0,,,,,10,0,1,0


In [3]:
library(imputeTS)

In [4]:
# For na_interpolation we have options as
# 1: Perform linear interpolation na_interpolation(x) 
# 2: Perform spline interpolation na_interpolation(x, option = "spline")
# 3: Perform stine interpolation na_interpolation(x, option = "stine")

In [5]:
a=na_interpolation(df["Resp"]) 
b=na_interpolation(df["SBP"]) 
c=na_interpolation(df["Platelets"]) 
d=na_interpolation(df["Bilirubin_total"])   # for na interpolation at least 2 non na values are require
e=na_interpolation(df["Creatinine"]) 
f=df['ICULOS']
g=df["SepsisLabel"]

In [6]:
test1 <- list(c(a,b,c,d,e,f,g)) 

In [7]:
test1

In [8]:
as.data.frame(test1)

Resp,SBP,Platelets,Bilirubin_total,Creatinine,ICULOS,SepsisLabel
20.0,101.0,187.0,2.3,3.4,1,0
20.0,101.0,187.0,2.3,3.4,2,0
19.0,101.0,187.0,2.3,3.4,3,0
24.0,101.0,187.0,2.3,3.4,4,0
25.0,101.0,187.0,2.433333,3.5,5,0
26.0,101.0,187.0,2.566667,3.6,6,0
28.5,101.0,187.0,2.7,3.7,7,0
24.25,101.0,181.2,2.76,3.72,8,0
15.0,101.0,175.4,2.82,3.74,9,0
25.0,101.0,169.6,2.88,3.76,10,0


In [9]:
data=as.data.frame(test1)
write.table(data,'p015036_sepsis_na_interpolation_linear.psv', sep="|")

# na_interpolation_spline

In [10]:
df=read.table("p015036_sepsis_sofa_qsofa_with_nan.psv", sep="|", header=T) 

h=na_interpolation(df["Resp"], option="spline") 
i=na_interpolation(df["SBP"], option="spline") 
j=na_interpolation(df["Platelets"], option="spline") 
k=na_interpolation(df["Bilirubin_total"], option="spline")   # for na interpolation at least 2 non na values are require
l=na_interpolation(df["Creatinine"], option="spline") 
m=df['ICULOS']
n=df["SepsisLabel"]

test2 <- list(c(h,i,j,k,l,m,n)) 
data2=as.data.frame(test2)
write.table(data2,'p015036_sepsis_na_interpolation_spline.psv', sep="|")

# na_intrpolation_stine

In [11]:

o=na_interpolation(df["Resp"], option="stine") 
p=na_interpolation(df["SBP"], option="stine") 
q=na_interpolation(df["Platelets"], option="stine") 
r=na_interpolation(df["Bilirubin_total"], option="stine")   # for na interpolation at least 2 non na values are require
s=na_interpolation(df["Creatinine"], option="stine") 
t=df['ICULOS']
u=df["SepsisLabel"]

test3 <- list(c(o,p,q,r,s,t,u)) 
data3=as.data.frame(test3)
write.table(data3,'p015036_sepsis_na_interpolation_stine.psv', sep="|")

# na_kalman

In [18]:
# imputation with KalmanSmoother and state space representation of arima model na_kalman(x) 

aa=na_kalman(df["Resp"]) 
bb=na_kalman(df["SBP"]) 
cc=na_kalman(df["Platelets"]) 
dd=na_kalman(df["Bilirubin_total"])   # for na kalman at least 3 non na values are require
ee=na_kalman(df["Creatinine"]) 
ff=df['ICULOS']
gg=df["SepsisLabel"]

test4 <- list(c(aa,bb,cc,dd,ee,ff,gg)) 
data4=as.data.frame(test4)
write.table(data4,'p015036_sepsis_na_kalman_state_space.psv', sep="|")

In [20]:
# imputation with KalmanSmooth and StructTS model na_kalman(x, model = "StructTS", smooth = TRUE )

hh=na_kalman(df["Resp"],model = "StructTS",smooth=TRUE) 
ii=na_kalman(df["SBP"], model = "StructTS", smooth = TRUE) 
jj=na_kalman(df["Platelets"], model = "StructTS", smooth = TRUE) 
kk=na_kalman(df["Bilirubin_total"], model = "StructTS", smooth = TRUE)   # for na kalman at least 3 non na values are require
ll=na_kalman(df["Creatinine"], model = "StructTS", smooth = TRUE) 
mm=df['ICULOS']
nn=df["SepsisLabel"]

test5 <- list(c(hh,ii,jj,kk,ll,mm,nn)) 
data5=as.data.frame(test5)
write.table(data5,'p015036_sepsis_na_kalman_StructTS.psv', sep="|")



# na_locf Missing Value Imputation by Last Observation Carried Forward

In [25]:
# na_locf(x)
# Last Observation Carried Forward

oo=na_locf(df["Resp"]) 
pp=na_locf(df["SBP"]) 
qq=na_locf(df["Platelets"]) 
rr=na_locf(df["Bilirubin_total"])  
ss=na_locf(df["Creatinine"]) 
tt=df['ICULOS']
uu=df["SepsisLabel"]

test6 <- list(c(oo,pp,qq,rr,ss,tt,uu)) 
data6=as.data.frame(test6)
write.table(data6,'p015036_sepsis_na_locf.psv', sep="|")

In [26]:
# na_locf(x, option = "nocb")
# Next Observation Carried Backward

vv=na_locf(df["Resp"], option="nocb") 
ww=na_locf(df["SBP"], option="nocb") 
xx=na_locf(df["Platelets"], option="nocb") 
yy=na_locf(df["Bilirubin_total"], option="nocb")   
zz=na_locf(df["Creatinine"], option="nocb") 
zza=df['ICULOS']
zzb=df["SepsisLabel"]

test7 <- list(c(vv,ww,xx,yy,zz,zza,zzb)) 
data7=as.data.frame(test7)
write.table(data7,'p015036_sepsis_na_nocb.psv', sep="|")

# na_ma imputation using moving average

In [27]:
# Example 1: Perform imputation with simple moving average na_ma(x, weighting = "simple")


one=na_ma(df["Resp"],weighting = "simple") 
two=na_ma(df["SBP"], weighting = "simple") 
three=na_ma(df["Platelets"], weighting = "simple") 
four=na_ma(df["Bilirubin_total"], weighting = "simple")  
five=na_ma(df["Creatinine"],weighting = "simple") 
six=df['ICULOS']
seven=df["SepsisLabel"]

test8 <- list(c(one,two,three,four,five,six,seven)) 
data8=as.data.frame(test8)
write.table(data8,'p015036_sepsis_na_ma.psv', sep="|")

In [28]:
# Example 2: Perform imputation with exponential weighted moving average na_ma(x)
eight=na_ma(df["Resp"]) 
nine=na_ma(df["SBP"]) 
ten=na_ma(df["Platelets"]) 
eleven=na_ma(df["Bilirubin_total"])  
twelve=na_ma(df["Creatinine"]) 
thirteen=df['ICULOS']
fourteen=df["SepsisLabel"]

test9 <- list(c(eight,nine,ten,eleven,twelve,thirteen,fourteen)) 
data9=as.data.frame(test9)
write.table(data9,'p015036_sepsis_na_ma_exponential.psv', sep="|")

# na_mean Perform imputation with the overall mean

In [29]:
oneg=na_mean(df["Resp"]) 
twog=na_mean(df["SBP"])  
threeg=na_mean(df["Platelets"]) 
fourg=na_mean(df["Bilirubin_total"])  
fiveg=na_mean(df["Creatinine"]) 
sixg=df['ICULOS']
seveng=df["SepsisLabel"]

test10 <- list(c(oneg,twog,threeg,fourg,fiveg,sixg,seveng)) 
data10=as.data.frame(test10)
write.table(data10,'p015036_sepsis_na_mean.psv', sep="|")

# na_random Missing Value Imputation by Random Sample

In [30]:
oneh=na_random(df["Resp"]) 
twoh=na_random(df["SBP"]) 
threeh=na_random(df["Platelets"]) 
fourh=na_random(df["Bilirubin_total"])  
fiveh=na_random(df["Creatinine"]) 
sixh=df['ICULOS']
sevenh=df["SepsisLabel"]

test11 <- list(c(oneh,twoh,threeh,fourh,fiveh,sixh,sevenh)) 
data11=as.data.frame(test11)
write.table(data11,'p015036_sepsis_na_random.psv', sep="|") 