In [6]:
# import relevant libraries and modules

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None) 

In [7]:
df = pd.read_csv("Preprocessed ICU data.csv")
df = df[~np.isclose(df["Gender"].round(8), 0.56192144,atol=1e-8)]
gen = df.iloc[:,:7]
mean1 = df.iloc[:,7:44]
min1 = df.iloc[:,44:81]
max1 = df.iloc[:,81:118]
mean2 = df.iloc[:,118:155]
min2 = df.iloc[:,155:192]
max2 = df.iloc[:,192:229]
outcome = df.iloc[:,229:]

In [30]:
gen

Unnamed: 0,RecordID,Age,Gender,Height,ICUType2,ICUType3,ICUType4
0,132543,68,1.0,180.300000,0,1,0
1,132545,88,0.0,169.787227,0,1,0
2,132547,64,1.0,180.300000,0,0,0
3,132551,78,0.0,162.600000,0,1,0
4,132554,64,0.0,169.787227,0,1,0
...,...,...,...,...,...,...,...
1469,142661,89,1.0,177.800000,0,0,1
1470,142662,86,1.0,162.600000,0,1,0
1471,142664,51,0.0,169.787227,0,0,1
1472,142665,70,0.0,169.787227,0,0,1


In [8]:
X = df.drop(["In.hospital_death", "RecordID"], axis = 1)
y = df["In.hospital_death"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)



In [10]:
model = RandomForestClassifier(random_state=42)
rfe = RFE(estimator=model, n_features_to_select=10)



In [11]:
rfe.fit(X_train,y_train)
ranking=rfe.ranking_
selected_features = X.columns[rfe.support_]


In [12]:
print("Selected Features: ", selected_features)

Selected Features:  Index(['Mean_HR.x', 'Max_HR.x', 'Mean_GCS.y', 'Mean_NISysABP.y',
       'Mean_Urine.y', 'Mean_BUN.y', 'Min_Weight.y', 'Min_Lactate.y',
       'Max_GCS.y', 'Max_WBC.y'],
      dtype='object')


In [13]:
X_train_selected = rfe.transform(X_train)
X_test_selected = rfe.transform(X_test)

In [14]:
model.fit(X_train_selected,y_train)

In [15]:
y_pred=model.predict(X_test_selected)
accuracy=accuracy_score(y_test,y_pred)

In [16]:
print("Model accuracy with Selected Features:", accuracy)

Model accuracy with Selected Features: 0.7171945701357466


In [17]:
X2 = mean1
X_train, X_test, y_train, y_test = train_test_split(X2,y,test_size = 0.3, random_state = 42)
rfe2 = RFE(estimator=model, n_features_to_select=5)
rfe2.fit(X_train,y_train)
ranking=rfe2.ranking_
selected_features = X2.columns[rfe2.support_]

In [18]:
print("Selected Features: ", selected_features)
X_train_selected = rfe2.transform(X_train)
X_test_selected = rfe2.transform(X_test)

Selected Features:  Index(['Mean_GCS.x', 'Mean_HR.x', 'Mean_Urine.x', 'Mean_Platelets.x',
       'Mean_PaCO2.x'],
      dtype='object')


In [19]:
model.fit(X_train_selected,y_train)

In [20]:
y_pred=model.predict(X_test_selected)
accuracy=accuracy_score(y_test,y_pred)

In [21]:
print("Model accuracy with Selected Features:", accuracy)

Model accuracy with Selected Features: 0.6606334841628959


In [68]:
def do_rfe(X, y, n_features_to_select):
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)
    model = RandomForestClassifier(random_state=42)
    rfe = RFE(estimator=model, n_features_to_select=n_features_to_select)
    rfe.fit(X_train,y_train)
    ranking=rfe.ranking_
    selected_features = X.columns[rfe.support_]
    X_train_selected = rfe.transform(X_train)
    X_test_selected = rfe.transform(X_test)
    model.fit(X_train_selected,y_train)
    y_pred=model.predict(X_test_selected)
    accuracy=accuracy_score(y_test,y_pred)

    results = pd.DataFrame({
        'Feature': X.columns,
        'Ranking': ranking,})
        
    # print("Selected Features: ", selected_features)
    # print(X.columns)
    # print(ranking)
    # print("Model accuracy with Selected Features:", accuracy)

    return results
    
    

In [150]:
min1_r = do_rfe(min1, y, 2)

In [151]:
max1_r = do_rfe(max1,y,2)

In [152]:
mean1_r = do_rfe(mean1,y,2)

In [153]:
mean2_r = do_rfe(mean2,y,2)

In [154]:
max2_r = do_rfe(max2,y,2)

In [155]:
min2_r = do_rfe(min2,y,2)

In [78]:
gen_r = do_rfe(gen.drop("RecordID", axis =1),y,3)

In [156]:
mean1_r = mean1_r.sort_values(by="Ranking", ascending=True)


In [157]:
max1_r = max1_r.sort_values(by="Ranking", ascending=True)


In [158]:
min1_r = min1_r.sort_values(by="Ranking", ascending=True)
mean2_r = mean2_r.sort_values(by="Ranking", ascending=True)
max2_r = max2_r.sort_values(by="Ranking", ascending=True)
min2_r = min2_r.sort_values(by="Ranking", ascending=True)

In [159]:
all_ranks2 = pd.concat([mean1_r,max1_r,min1_r,mean2_r,max2_r,min2_r], axis =1)

In [139]:
all_ranks = pd.concat([mean1_r,max1_r,min1_r,mean2_r,max2_r,min2_r], axis =1)

In [140]:
all_ranks

Unnamed: 0,Feature,Ranking,Feature.1,Ranking.1,Feature.2,Ranking.2,Feature.3,Ranking.3,Feature.4,Ranking.4,Feature.5,Ranking.5
0,Mean_Urine.x,1,Max_Platelets.x,1,Min_Weight.x,1,Mean_GCS.y,1,Max_WBC.y,1,Min_WBC.y,1
1,Mean_HR.x,1,Max_Glucose.x,1,Min_Platelets.x,1,Mean_Urine.y,1,Max_GCT.y,1,Min_Platelets.y,1
2,Mean_Platelets.x,2,Max_WBC.x,2,Min_WBC.x,2,Mean_WBC.y,2,Max_Glucose.y,2,Min_Weight.y,2
3,Mean_GCS.x,3,Max_HR.x,3,Min_pH.x,3,Mean_NISysABP.y,3,Max_Urine.y,3,Min_GCT.y,3
4,Mean_PaCO2.x,4,Max_GCT.x,4,Min_Glucose.x,4,Mean_HR.y,4,Max_GCS.y,4,Min_GCS.y,4
5,Mean_GCT.x,5,Max_Urine.x,5,Min_BUN.x,5,Mean_BUN.y,5,Max_HR.y,5,Min_BUN.y,5
6,Mean_Temp.x,6,Max_PaCO2.x,6,Min_NIMAP.x,6,Mean_Glucose.y,6,Max_Platelets.y,6,Min_Glucose.y,6
7,Mean_SysABP.x,7,Max_NISysABP.x,7,Min_Lactate.x,7,Mean_GCT.y,7,Max_NISysABP.y,7,Min_Lactate.y,7
8,Mean_BUN.x,8,Max_Weight.x,8,Min_Temp.x,8,Mean_Lactate.y,8,Max_PaO2.y,8,Min_HR.y,8
9,Mean_HCO3.x,9,Max_GCS.x,9,Min_GCT.x,9,Mean_Temp.y,9,Max_SysABP.y,9,Min_Temp.y,9


In [160]:
all_ranks2

Unnamed: 0,Feature,Ranking,Feature.1,Ranking.1,Feature.2,Ranking.2,Feature.3,Ranking.3,Feature.4,Ranking.4,Feature.5,Ranking.5
8,Mean_Urine.x,1,Max_Urine.x,5,Min_Urine.x,24,Mean_Urine.y,1,Max_Urine.y,3,Min_Urine.y,18
2,Mean_HR.x,1,Max_HR.x,3,Min_HR.x,10,Mean_HR.y,4,Max_HR.y,5,Min_HR.y,8
15,Mean_Platelets.x,2,Max_Platelets.x,1,Min_Platelets.x,1,Mean_Platelets.y,11,Max_Platelets.y,6,Min_Platelets.y,1
1,Mean_GCS.x,3,Max_GCS.x,9,Min_GCS.x,21,Mean_GCS.y,1,Max_GCS.y,4,Min_GCS.y,4
20,Mean_PaCO2.x,4,Max_PaCO2.x,6,Min_PaCO2.x,11,Mean_PaCO2.y,10,Max_PaCO2.y,15,Min_PaCO2.y,14
13,Mean_GCT.x,5,Max_GCT.x,4,Min_GCT.x,9,Mean_GCT.y,7,Max_GCT.y,1,Min_GCT.y,3
7,Mean_Temp.x,6,Max_Temp.x,14,Min_Temp.x,8,Mean_Temp.y,9,Max_Temp.y,19,Min_Temp.y,9
26,Mean_SysABP.x,7,Max_SysABP.x,11,Min_SysABP.x,29,Mean_SysABP.y,13,Max_SysABP.y,9,Min_SysABP.y,16
9,Mean_BUN.x,8,Max_BUN.x,10,Min_BUN.x,5,Mean_BUN.y,5,Max_BUN.y,10,Min_BUN.y,5
12,Mean_HCO3.x,9,Max_HCO3.x,16,Min_HCO3.x,12,Mean_HCO3.y,17,Max_HCO3.y,22,Min_HCO3.y,10


In [162]:
all_ranks2['Row Sum'] = all_ranks2.select_dtypes(include='number').sum(axis=1)

In [164]:
all_ranks2.sort_values(by="Row Sum", ascending=True)

Unnamed: 0,Feature,Ranking,Feature.1,Ranking.1,Feature.2,Ranking.2,Feature.3,Ranking.3,Feature.4,Ranking.4,Feature.5,Ranking.5,Row Sum
15,Mean_Platelets.x,2,Max_Platelets.x,1,Min_Platelets.x,1,Mean_Platelets.y,11,Max_Platelets.y,6,Min_Platelets.y,1,22
18,Mean_WBC.x,17,Max_WBC.x,2,Min_WBC.x,2,Mean_WBC.y,2,Max_WBC.y,1,Min_WBC.y,1,25
11,Mean_Glucose.x,10,Max_Glucose.x,1,Min_Glucose.x,4,Mean_Glucose.y,6,Max_Glucose.y,2,Min_Glucose.y,6,29
13,Mean_GCT.x,5,Max_GCT.x,4,Min_GCT.x,9,Mean_GCT.y,7,Max_GCT.y,1,Min_GCT.y,3,29
2,Mean_HR.x,1,Max_HR.x,3,Min_HR.x,10,Mean_HR.y,4,Max_HR.y,5,Min_HR.y,8,31
1,Mean_GCS.x,3,Max_GCS.x,9,Min_GCS.x,21,Mean_GCS.y,1,Max_GCS.y,4,Min_GCS.y,4,42
9,Mean_BUN.x,8,Max_BUN.x,10,Min_BUN.x,5,Mean_BUN.y,5,Max_BUN.y,10,Min_BUN.y,5,43
0,Mean_Weight.x,14,Max_Weight.x,8,Min_Weight.x,1,Mean_Weight.y,14,Max_Weight.y,11,Min_Weight.y,2,50
8,Mean_Urine.x,1,Max_Urine.x,5,Min_Urine.x,24,Mean_Urine.y,1,Max_Urine.y,3,Min_Urine.y,18,52
5,Mean_NISysABP.x,13,Max_NISysABP.x,7,Min_NISysABP.x,15,Mean_NISysABP.y,3,Max_NISysABP.y,7,Min_NISysABP.y,15,60


In [8]:
name = "poopoo head "
print(name.strip(" "))

poopoo head


In [9]:
name = name.strip()

In [10]:
print(name)

poopoo head


In [12]:
length = len(name)
print(length)

11


In [13]:
name = name.replace(" ","")
print(name)

poopoohead


In [16]:
x = 15
print(x // 2)

7
